new
This commit is contained in:
parent
fd6d6b8cec
commit
9b8eca3c38
Binary file not shown.
|
|
@ -459,18 +459,6 @@ def main():
|
||||||
else:
|
else:
|
||||||
assert "Not supported dataset name!"
|
assert "Not supported dataset name!"
|
||||||
|
|
||||||
|
|
||||||
target_directory = f"save_for_eval/{args.dataset_name}/{args.model_name}_jailbreak"
|
|
||||||
benign_data, adverse_data = load_npy_shapes(target_directory, args.steer_place)
|
|
||||||
y_zero=np.zeros(len(benign_data))
|
|
||||||
y_one=np.ones(len(adverse_data))
|
|
||||||
|
|
||||||
data_embedding = np.concatenate((benign_data, adverse_data), axis=0)
|
|
||||||
gts = np.concatenate((y_zero, y_one), axis=0)
|
|
||||||
|
|
||||||
train_index, val_index, test_index=split_indices(len(data_embedding), args.train_ratio, args.val_ratio)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
@ -487,12 +475,42 @@ def main():
|
||||||
device_map="auto",
|
device_map="auto",
|
||||||
token = ''
|
token = ''
|
||||||
)
|
)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, token = '')
|
||||||
|
|
||||||
|
prompts = []
|
||||||
|
qa_pairs = []
|
||||||
|
categories = []
|
||||||
|
|
||||||
|
length = len(dataset)
|
||||||
|
|
||||||
|
for i in tqdm(range(length)):
|
||||||
|
question = dataset[i]['query']
|
||||||
|
adversary = dataset[i]['target']
|
||||||
|
prompt = tokenizer(
|
||||||
|
f"Answer the question concisely. Q: {question}" + " A:" + adversary,
|
||||||
|
return_tensors='pt'
|
||||||
|
).input_ids.cuda()
|
||||||
|
|
||||||
|
prompts.append(prompt)
|
||||||
|
qa_pairs.append({'Question': question, 'Answer': adversary})
|
||||||
|
categories.append(1) # 1 = adverse
|
||||||
|
|
||||||
|
for i in tqdm(range(length)):
|
||||||
|
question = dataset[i]['query']
|
||||||
|
clean= dataset[i]['response']
|
||||||
|
prompt = tokenizer(
|
||||||
|
f"Answer the question concisely. Q: {question}" + " A:" + clean,
|
||||||
|
return_tensors='pt'
|
||||||
|
).input_ids.cuda()
|
||||||
|
|
||||||
|
prompts.append(prompt)
|
||||||
|
qa_pairs.append({'Question': question, 'Answer': clean})
|
||||||
|
categories.append(0) # 0 = benign
|
||||||
|
|
||||||
|
train_index, val_index, test_index=split_indices(len(prompts), args.train_ratio, args.val_ratio)
|
||||||
|
|
||||||
|
|
||||||
train_data = [data_embedding[i] for i in train_index]
|
|
||||||
train_labels = [gts[i] for i in train_index]
|
|
||||||
|
|
||||||
test_data = [data_embedding[i] for i in test_index]
|
|
||||||
test_labels = [gts[i] for i in test_index]
|
|
||||||
|
|
||||||
# ====== 6.4 冻结 LLM,只训练 SV 参数 ======
|
# ====== 6.4 冻结 LLM,只训练 SV 参数 ======
|
||||||
num_layers = model.config.num_hidden_layers
|
num_layers = model.config.num_hidden_layers
|
||||||
|
|
|
||||||
BIN
tqa_score.mat
BIN
tqa_score.mat
Binary file not shown.
Loading…
Reference in New Issue