更新 evaluation.py
This commit is contained in:
parent
ea532373ec
commit
988c37c803
|
|
@ -8,13 +8,14 @@ from openai import OpenAIError
|
||||||
|
|
||||||
|
|
||||||
client = OpenAI(
|
client = OpenAI(
|
||||||
api_key="sk-", # 在这里将 MOONSHOT_API_KEY 替换为你从 Kimi 开放平台申请的 API Key
|
api_key="sk-5be20597fa574155a9e56d7df1acfc7f", # 在这里将 MOONSHOT_API_KEY 替换为你从 Kimi 开放平台申请的 API Key
|
||||||
base_url="https://api.deepseek.com/v1",
|
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_qa_response(model, question, answer):
|
def get_qa_response(model, question, answer):
|
||||||
|
# time.sleep(2)
|
||||||
message = [
|
message = [
|
||||||
{"role": "system", "content":"你是一个幻觉检测器。你必须根据世界知识确定问题的答案是否符合事实。你提供的答案必须是 \"YES\" or \"NO\" 并且给出你的理由"},
|
{"role": "system", "content":"你是一个幻觉检测器。你必须根据世界知识确定问题的答案是否包含与事实不符合的信息。你提供的答案必须是 \"YES\" or \"NO\" 并且给出你的理由"},
|
||||||
{"role": "user", "content":
|
{"role": "user", "content":
|
||||||
"\n\n#Question#: " + question +
|
"\n\n#Question#: " + question +
|
||||||
"\n#Answer#: " + answer +
|
"\n#Answer#: " + answer +
|
||||||
|
|
@ -32,27 +33,17 @@ def get_qa_response(model, question, answer):
|
||||||
response = res['choices'][0]['message']['content']
|
response = res['choices'][0]['message']['content']
|
||||||
else:
|
else:
|
||||||
res = client.chat.completions.create(
|
res = client.chat.completions.create(
|
||||||
model="deepseek-chat",
|
model="qwen-max",
|
||||||
messages=message,
|
messages=message
|
||||||
stream=False
|
# stream=False
|
||||||
)
|
)
|
||||||
response = res.choices[0].message.content
|
response = res.choices[0].message.content
|
||||||
break
|
break
|
||||||
except OpenAIError:
|
except OpenAIError:
|
||||||
print('openai.error.RateLimitError\nRetrying...')
|
print('openai.error.RateLimitError\nRetrying...')
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
except openai.error.ServiceUnavailableError:
|
return "NO"
|
||||||
print('openai.error.ServiceUnavailableError\nRetrying...')
|
|
||||||
time.sleep(20)
|
|
||||||
except openai.error.Timeout:
|
|
||||||
print('openai.error.Timeout\nRetrying...')
|
|
||||||
time.sleep(20)
|
|
||||||
except openai.error.APIError:
|
|
||||||
print('openai.error.APIError\nRetrying...')
|
|
||||||
time.sleep(20)
|
|
||||||
except openai.error.APIConnectionError:
|
|
||||||
print('openai.error.APIConnectionError\nRetrying...')
|
|
||||||
time.sleep(20)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
@ -121,7 +112,7 @@ if __name__ == '__main__':
|
||||||
output_path = "{}/{}_results.json".format(args.task, args.model)
|
output_path = "{}/{}_results.json".format(args.task, args.model)
|
||||||
|
|
||||||
# data = "../data/{}_data.json".format(args.task)
|
# data = "../data/{}_data.json".format(args.task)
|
||||||
data="/home/leewlving/PycharmProjects/xianxing_cup3/factuality_predict.json"
|
data="/Users/liwenyun/PycharmProjects/ru/factuality_predict.json"
|
||||||
|
|
||||||
if args.task == "qa":
|
if args.task == "qa":
|
||||||
evaluation_qa_dataset(model, data, output_path)
|
evaluation_qa_dataset(model, data, output_path)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue