This commit is contained in:
weixin_43297441 2025-03-05 17:48:40 +08:00
parent 24923b65f1
commit c806a931d1
1 changed files with 49 additions and 19 deletions

View File

@ -165,10 +165,16 @@ def main():
model = llama_iti.LlamaForCausalLM.from_pretrained(MODEL, low_cpu_mem_usage=True, model = llama_iti.LlamaForCausalLM.from_pretrained(MODEL, low_cpu_mem_usage=True,
torch_dtype=torch.float16, torch_dtype=torch.float16,
device_map="auto").cuda() device_map="auto").cuda()
HEADS = [f"model.layers.{i}.self_attn.head_out" for i in range(model.config.num_hidden_layers)]
MLPS = [f"model.layers.{i}.mlp" for i in range(model.config.num_hidden_layers)]
# firstly get the embeddings of the generated question and answers. # firstly get the embeddings of the generated question and answers.
embed_generated = [] embed_generated = []
embed_generated_h =[] embed_generated_h =[]
embed_generated_t=[] embed_generated_t=[]
embed_generated_t_loc2 = []
embed_generated_t_loc1 = []
embed_generated_h_loc2 = []
embed_generated_h_loc1 = []
if args.dataset_name == 'tydiqa': if args.dataset_name == 'tydiqa':
length = len(used_indices) length = len(used_indices)
@ -224,12 +230,19 @@ def main():
f"Answer the question concisely. Q: {question}" + " A:" + tru, f"Answer the question concisely. Q: {question}" + " A:" + tru,
return_tensors='pt').input_ids.cuda() return_tensors='pt').input_ids.cuda()
with torch.no_grad(): with torch.no_grad():
hidden_states = model(prompt, output_hidden_states=True).hidden_states with TraceDict(model, HEADS + MLPS) as ret:
hidden_states = torch.stack(hidden_states, dim=0).squeeze() output = model(prompt, output_hidden_states=True)
hidden_states = hidden_states.detach().cpu().numpy()[:, -1, :] head_wise_hidden_states = [ret[head].output.squeeze().detach().cpu() for head in HEADS]
embed_generated_t.append(hidden_states) head_wise_hidden_states = torch.stack(head_wise_hidden_states, dim=0).squeeze().numpy()
embed_generated_t = np.asarray(np.stack(embed_generated_t), dtype=np.float32) mlp_wise_hidden_states = [ret[mlp].output.squeeze().detach().cpu() for mlp in MLPS]
np.save(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_t_layer_wise.npy', embed_generated_t) mlp_wise_hidden_states = torch.stack(mlp_wise_hidden_states, dim=0).squeeze().numpy()
embed_generated_t_loc2.append(mlp_wise_hidden_states[:, -1, :])
embed_generated_t_loc1.append(head_wise_hidden_states[:, -1, :])
embed_generated_t_loc2 = np.asarray(np.stack(embed_generated_t_loc2), dtype=np.float32)
embed_generated_t_loc1 = np.asarray(np.stack(embed_generated_t_loc1), dtype=np.float32)
np.save(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_t_head_wise.npy', embed_generated_t_loc2)
np.save(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_t_mlp_wise.npy', embed_generated_t_loc1)
for hal in hallucinations: for hal in hallucinations:
@ -245,15 +258,21 @@ def main():
f"Answer the question concisely. Q: {question}" + " A:" + hal, f"Answer the question concisely. Q: {question}" + " A:" + hal,
return_tensors='pt').input_ids.cuda() return_tensors='pt').input_ids.cuda()
with torch.no_grad(): with torch.no_grad():
hidden_states = model(prompt, output_hidden_states=True).hidden_states with TraceDict(model, HEADS + MLPS) as ret:
hidden_states = torch.stack(hidden_states, dim=0).squeeze() output = model(prompt, output_hidden_states=True)
hidden_states = hidden_states.detach().cpu().numpy()[:, -1, :] head_wise_hidden_states = [ret[head].output.squeeze().detach().cpu() for head in HEADS]
embed_generated_h.append(hidden_states) head_wise_hidden_states = torch.stack(head_wise_hidden_states, dim=0).squeeze().numpy()
embed_generated_h = np.asarray(np.stack(embed_generated_h), dtype=np.float32) mlp_wise_hidden_states = [ret[mlp].output.squeeze().detach().cpu() for mlp in MLPS]
np.save(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_h_layer_wise.npy', embed_generated_h) mlp_wise_hidden_states = torch.stack(mlp_wise_hidden_states, dim=0).squeeze().numpy()
embed_generated_h_loc2.append(mlp_wise_hidden_states[:, -1, :])
embed_generated_h_loc1.append(head_wise_hidden_states[:, -1, :])
embed_generated_h_loc2 = np.asarray(np.stack(embed_generated_t_loc2), dtype=np.float32)
embed_generated_h_loc1 = np.asarray(np.stack(embed_generated_t_loc1), dtype=np.float32)
np.save(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_h_head_wise.npy', embed_generated_h_loc2)
np.save(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_h_mlp_wise.npy', embed_generated_h_loc1)
HEADS = [f"model.layers.{i}.self_attn.head_out" for i in range(model.config.num_hidden_layers)]
MLPS = [f"model.layers.{i}.mlp" for i in range(model.config.num_hidden_layers)]
embed_generated_loc2 = [] embed_generated_loc2 = []
embed_generated_loc1 = [] embed_generated_loc1 = []
for i in tqdm(range(length)): for i in tqdm(range(length)):
@ -432,17 +451,28 @@ def main():
if args.most_likely: if args.most_likely:
if feat_loc == 3: if feat_loc == 1:
embed_generated = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_layer_wise.npy', embed_generated = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_head_wise.npy',
allow_pickle=True)
embed_generated_h = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_h_head_wise.npy',
allow_pickle=True)
embed_generated_t = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_t_head_wise.npy',
allow_pickle=True) allow_pickle=True)
elif feat_loc == 2: elif feat_loc == 2:
embed_generated = np.load( embed_generated = np.load(
f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_mlp_wise.npy', f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_mlp_wise.npy',
allow_pickle=True) allow_pickle=True)
embed_generated_h = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_h_mlp_wise.npy',
allow_pickle=True)
embed_generated_t = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_t_mlp_wise.npy',
allow_pickle=True)
else: else:
embed_generated = np.load( assert "Not supported!"
f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_head_wise.npy', # embed_generated = np.load(f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_layer_wise.npy',
allow_pickle=True) # allow_pickle=True)
# embed_generated = np.load(
# f'save_for_eval/{args.dataset_name}/{args.model_name}_hal_det/' + info + f'{args.model_name}_gene_embeddings_head_wise.npy',
# allow_pickle=True)
feat_indices_wild = [] feat_indices_wild = []
feat_indices_eval = [] feat_indices_eval = []