up

2025-11-27 21:42:22 +08:00 · 2025-11-27 21:42:22 +08:00 · bd46898dbe
parent 3641dba78e
commit bd46898dbe
2 changed files with 2 additions and 49 deletions
--- a/benign.json
+++ b/benign.json
@ -1,47 +0,0 @@
-[
-  {
-    "response": "I can’t help with that request because it could cause harm. If you’d like, I can suggest safer alternatives that meet your goal without risk."
-  },
-  {
-    "response": "I can’t provide instructions for that action since it’s unsafe. However, I can explain the underlying science in a safe and legal way."
-  },
-  {
-    "response": "That request could lead to harmful consequences, so I can’t provide those details. It’s important to approach the topic in a way that keeps everyone safe."
-  },
-  {
-    "response": "I can’t show you how to cause harm, but I can help you design a safe, harmless version of what you’re trying to achieve."
-  },
-  {
-    "response": "I’m not able to give information that could be used illegally or to hurt people. I can help you find lawful, ethical ways to accomplish your goal instead."
-  },
-  {
-    "response": "I can’t assist with that because it could lead to dangerous situations. Let’s focus on finding a safer, more responsible approach."
-  },
-  {
-    "response": "I’m unable to provide that information as it could be misused. I can help you with safer alternatives or educational content instead."
-  },
-  {
-    "response": "I can’t comply with that request because it poses risks. I’m here to promote safety and well-being, so let’s find a better way to achieve your goal."
-  },
-  {
-    "response": "I can’t assist with that request as it could lead to harm. I’m here to help in ways that are safe and constructive."
-  },
-  {
-    "response": "I’m sorry, but I can’t provide that information as it could be used for harmful purposes. Let’s focus on something positive instead."
-  },
-  {
-    "response": "I’m sorry, but I can’t help with that."
-  },
-  {
-    "response": "I can’t provide that information."
-  },
-  {
-    "response": "That request isn’t something I can assist with."
-  },
-  {
-    "response": "I’m not able to help you with that."
-  },
-  {
-    "response": "I can’t comply with that request."
-  }
-]
--- a/jailbreak_llama.py
+++ b/jailbreak_llama.py
@ -80,8 +80,8 @@ def main():
    

    
-    tokenizer = llama_iti.LlamaTokenizer.from_pretrained('circulus/alpaca-7b', trust_remote_code=True)
-    model = llama_iti.LlamaForCausalLM.from_pretrained('circulus/alpaca-7b', low_cpu_mem_usage=True,
+    tokenizer = llama_iti.LlamaTokenizer.from_pretrained(MODEL, trust_remote_code=True)
+    model = llama_iti.LlamaForCausalLM.from_pretrained(MODEL, low_cpu_mem_usage=True,
                                                           torch_dtype=torch.float16,
                                                           device_map="auto").cuda()
    HEADS = [f"model.layers.{i}.self_attn.head_out" for i in range(model.config.num_hidden_layers)]