diff --git a/evaluation.py b/evaluation.py index 7fe71ad..34870f4 100644 --- a/evaluation.py +++ b/evaluation.py @@ -8,13 +8,14 @@ from openai import OpenAIError client = OpenAI( - api_key="sk-", # 在这里将 MOONSHOT_API_KEY 替换为你从 Kimi 开放平台申请的 API Key - base_url="https://api.deepseek.com/v1", + api_key="sk-5be20597fa574155a9e56d7df1acfc7f", # 在这里将 MOONSHOT_API_KEY 替换为你从 Kimi 开放平台申请的 API Key + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", ) def get_qa_response(model, question, answer): + # time.sleep(2) message = [ - {"role": "system", "content":"你是一个幻觉检测器。你必须根据世界知识确定问题的答案是否符合事实。你提供的答案必须是 \"YES\" or \"NO\" 并且给出你的理由"}, + {"role": "system", "content":"你是一个幻觉检测器。你必须根据世界知识确定问题的答案是否包含与事实不符合的信息。你提供的答案必须是 \"YES\" or \"NO\" 并且给出你的理由"}, {"role": "user", "content": "\n\n#Question#: " + question + "\n#Answer#: " + answer + @@ -32,27 +33,17 @@ def get_qa_response(model, question, answer): response = res['choices'][0]['message']['content'] else: res = client.chat.completions.create( - model="deepseek-chat", - messages=message, - stream=False + model="qwen-max", + messages=message + # stream=False ) response = res.choices[0].message.content break except OpenAIError: print('openai.error.RateLimitError\nRetrying...') time.sleep(60) - except openai.error.ServiceUnavailableError: - print('openai.error.ServiceUnavailableError\nRetrying...') - time.sleep(20) - except openai.error.Timeout: - print('openai.error.Timeout\nRetrying...') - time.sleep(20) - except openai.error.APIError: - print('openai.error.APIError\nRetrying...') - time.sleep(20) - except openai.error.APIConnectionError: - print('openai.error.APIConnectionError\nRetrying...') - time.sleep(20) + return "NO" + return response @@ -121,7 +112,7 @@ if __name__ == '__main__': output_path = "{}/{}_results.json".format(args.task, args.model) # data = "../data/{}_data.json".format(args.task) - data="/home/leewlving/PycharmProjects/xianxing_cup3/factuality_predict.json" + data="/Users/liwenyun/PycharmProjects/ru/factuality_predict.json" if args.task == "qa": evaluation_qa_dataset(model, data, output_path)