diff --git a/a.sh b/a.sh index af39add..b0cefda 100644 --- a/a.sh +++ b/a.sh @@ -1,2 +1,2 @@ export HF_ENDPOINT=https://hf-mirror.com -CUDA_VISIBLE_DEVICES=0 python hal_det_llama.py --dataset_name triviaqa --model_name llama2_chat_7B --most_likely 0 --use_rouge 0 --generate_gt 1 \ No newline at end of file +CUDA_VISIBLE_DEVICES=9 python hal_generate.py \ No newline at end of file diff --git a/hal_det_llama.py b/hal_det_llama.py index 3b30ad1..e56d6a2 100644 --- a/hal_det_llama.py +++ b/hal_det_llama.py @@ -168,6 +168,8 @@ def main(): device_map="auto").cuda() # firstly get the embeddings of the generated question and answers. embed_generated = [] + embed_generated_h =[] + embed_generated_t=[] if args.dataset_name == 'tydiqa': length = len(used_indices) @@ -180,13 +182,16 @@ def main(): question = dataset[i]['question'] answers = np.load( f'save_for_eval/{args.dataset_name}_hal_det/answers/most_likely_hal_det_{args.model_name}_{args.dataset_name}_answers_index_{i}.npy') - + truths= np.load( + f'save_for_eval/{args.dataset_name}_hal_det/answers/most_likely_hal_det_{args.model_name}_{args.dataset_name}_answers_index_{i}.npy') + hallucinations= np.load( + f'save_for_eval/{args.dataset_name}_hal_det/hallucinations/most_likely_hal_det_{args.model_name}_{args.dataset_name}_hallucinations_index_{i}.npy') for anw in answers: if args.dataset_name == 'tydiqa': prompt = tokenizer( "Concisely answer the following question based on the information in the given passage: \n" + \ - " Passage: " + dataset[int(used_indices[i])]['context'] + " \n Q: " + question + " \n A:", + " Passage: " + dataset[int(used_indices[i])]['context'] + " \n Q: " + question + " \n A:" + anw, return_tensors='pt').input_ids.cuda() elif args.dataset_name == 'coqa': prompt = tokenizer(dataset[i]['prompt'] + anw, return_tensors='pt').input_ids.cuda() @@ -202,6 +207,48 @@ def main(): embed_generated = np.asarray(np.stack(embed_generated), dtype=np.float32) np.save(f'save_for_eval/{args.dataset_name}_hal_det/most_likely_{args.model_name}_gene_embeddings_layer_wise.npy', embed_generated) + for tru in truths: + + if args.dataset_name == 'tydiqa': + prompt = tokenizer( + "Concisely answer the following question based on the information in the given passage: \n" + \ + " Passage: " + dataset[int(used_indices[i])]['context'] + " \n Q: " + question + " \n A:" + tru, + return_tensors='pt').input_ids.cuda() + elif args.dataset_name == 'coqa': + prompt = tokenizer(dataset[i]['prompt'] + tru, return_tensors='pt').input_ids.cuda() + else: + prompt = tokenizer( + f"Answer the question concisely. Q: {question}" + " A:" + tru, + return_tensors='pt').input_ids.cuda() + with torch.no_grad(): + hidden_states = model(prompt, output_hidden_states=True).hidden_states + hidden_states = torch.stack(hidden_states, dim=0).squeeze() + hidden_states = hidden_states.detach().cpu().numpy()[:, -1, :] + embed_generated_t.append(hidden_states) + embed_generated_t = np.asarray(np.stack(embed_generated_t), dtype=np.float32) + np.save(f'save_for_eval/{args.dataset_name}_hal_det/most_likely_{args.model_name}_gene_embeddings_t_layer_wise.npy', embed_generated_t) + + for hal in hallucinations: + + if args.dataset_name == 'tydiqa': + prompt = tokenizer( + "Concisely answer the following question based on the information in the given passage: \n" + \ + " Passage: " + dataset[int(used_indices[i])]['context'] + " \n Q: " + question + " \n A:" + hal, + return_tensors='pt').input_ids.cuda() + elif args.dataset_name == 'coqa': + prompt = tokenizer(dataset[i]['prompt'] + hal, return_tensors='pt').input_ids.cuda() + else: + prompt = tokenizer( + f"Answer the question concisely. Q: {question}" + " A:" + hal, + return_tensors='pt').input_ids.cuda() + with torch.no_grad(): + hidden_states = model(prompt, output_hidden_states=True).hidden_states + hidden_states = torch.stack(hidden_states, dim=0).squeeze() + hidden_states = hidden_states.detach().cpu().numpy()[:, -1, :] + embed_generated_h.append(hidden_states) + embed_generated_h = np.asarray(np.stack(embed_generated_h), dtype=np.float32) + np.save(f'save_for_eval/{args.dataset_name}_hal_det/most_likely_{args.model_name}_gene_embeddings_h_layer_wise.npy', embed_generated_h) + HEADS = [f"model.layers.{i}.self_attn.head_out" for i in range(model.config.num_hidden_layers)] MLPS = [f"model.layers.{i}.mlp" for i in range(model.config.num_hidden_layers)] embed_generated_loc2 = [] @@ -421,7 +468,7 @@ def main(): # returned_results = svd_embed_score(embed_generated_wild, gt_label_wild, # 1, 11, mean=0, svd=0, weight=args.weighted_svd) # get the best hyper-parameters on validation set - returned_results = svd_embed_score(embed_generated_eval, gt_label_val, + returned_results = svd_embed_score(embed_generated_eval, gt_label_val, 1, 11, mean=1, svd=1, wei1ght=args.weighted_svd) pca_model = PCA(n_components=returned_results['k'], whiten=False).fit(embed_generated_wild[:,returned_results['best_layer'],:]) diff --git a/hal_generate.py b/hal_generate.py index df71a3f..32476af 100644 --- a/hal_generate.py +++ b/hal_generate.py @@ -1,4 +1,5 @@ import os +import time import torch import torch.nn.functional as F import evaluate @@ -26,6 +27,10 @@ API={ 'gpt-3.5-turbo':{'base_url':"https://api.agicto.cn/v1",'key':''}, 'deepseek-chat':{'base_url':"https://api.deepseek.com/v1",'key':'sk-5f06261529bb44df86d9b2fdbae1a6b5'}, 'qwen-plus':{'base_url':"https://dashscope.aliyuncs.com/compatible-mode/v1",'key':'sk-5be20597fa574155a9e56d7df1acfc7f'}, + 'step-1-8k':{'base_url':"https://api.stepfun.com/v1",'key':'2hqEtnMCWe5cugi1mAVWRZat5hydLFG8tEJWPRW5XnxglpWxRBp5W0M0dvPAFXhC3'}, + 'moonshot-v1-8k':{'base_url':"https://api.moonshot.cn/v1",'key':'sk-8zjQm3CMAI7qQUWYLgFxSCCQxCOkVfuSkRcs6kNxUZY2L4aV'}, + 'ERNIE-3.5-8K':{'base_url':"https://api.agicto.cn/v1",'key':'sk-BmLsx7BClpqtmIwxLNB5pH5lJ36WJ7GxiV3nV5PiwF7Iwauf'}, + } def seed_everything(seed: int): @@ -56,11 +61,11 @@ def main(): parser = argparse.ArgumentParser() - parser.add_argument('--model_name', type=str, default='qwen-plus') + parser.add_argument('--model_name', type=str, default='step-1-8k') parser.add_argument('--dataset_name', type=str, default='triviaqa') parser.add_argument('--num_gene', type=int, default=1) parser.add_argument('--use_api', type=bool, default=True) - parser.add_argument('--most_likely', type=bool, default=False) + parser.add_argument('--most_likely', type=bool, default=True) parser.add_argument("--model_dir", type=str, default=None, help='local directory with model data') parser.add_argument("--instruction", type=str, default='/home/liwenyun/code/haloscope/generation/qa/qa_one-turn_instruction.txt', help='local directory of instruction file.') args = parser.parse_args() @@ -216,13 +221,6 @@ def main(): top_p=1, temperature = 1, ) - # openai.ChatCompletion.create( - # model=args.model_name, - # messages=prompt, - # temperature=1, - # max_tokens=256, - # top_p=1 - # ) hallucination_response = client.chat.completions.create( model = args.model_name, messages = hallucination_prompt, @@ -254,6 +252,7 @@ def main(): ) decoded=response.choices[0].message.content hallucination_decoded=hallucination_response.choices[0].message.content + time.sleep(20) # decoded = tokenizer.decode(generated[0, prompt.shape[-1]:],