From f9110098164b2df31ed5717df4a46b1d7ce548b6 Mon Sep 17 00:00:00 2001
From: Yang Dejian <dejianyang@deepseek.como>
Date: Thu, 9 Nov 2023 22:48:51 +0800
Subject: [PATCH] update eval_instruct.py

---
 Evaluation/HumanEval/eval_instruct.py | 28 ++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/Evaluation/HumanEval/eval_instruct.py b/Evaluation/HumanEval/eval_instruct.py
index f09a37d..71874fe 100644
--- a/Evaluation/HumanEval/eval_instruct.py
+++ b/Evaluation/HumanEval/eval_instruct.py
@@ -7,22 +7,20 @@ from tqdm import tqdm
 
 data_abs_dir = Path(__file__).parent / "data"
 
-from utils.utils import extract_generation_code
+from utils.utils import extract_generation_code, languge_settings
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from human_eval.evaluation import evaluate_functional_correctness
 
 def build_deepseekcoder_instruction(languge: str, question: str):
     return '''
-Please help me to complete the function. Use the given packages only and DO NOT refer any new package. Please return all completed function in a codeblock. 
-Here is the given code to do completion:
+Please continue to complete the function. You are not allowed to modify the given code and do the completion only. Please return all completed function in a codeblock. Here is the given code to do completion:
 ```{}
 {}
 ```
-'''.strip().format(languge.lower(), question)
-
+'''.strip().format(languge.lower(), question.strip())
 
 def generate_one(example, lang, tokenizer, model):
-    prompt = build_deepseekcoder_instruction(lang, example['prompt'])
+    prompt = build_deepseekcoder_instruction(languge_settings[lang]['full_name'], example['prompt'])
     inputs = tokenizer.apply_chat_template(
         [{'role': 'user', 'content': prompt }],
         return_tensors="pt"
@@ -33,11 +31,14 @@ def generate_one(example, lang, tokenizer, model):
 
     outputs = model.generate(
         inputs, 
-        max_new_tokens=512,
-        do_sample=False, 
-        top_p=0.95,
+        max_new_tokens=1024,
+        do_sample=False,
+        # top_p=0.95,
+        # temperature=temperature,
+        pad_token_id=stop_id,
         eos_token_id=stop_id
     )
+
     output = tokenizer.decode(outputs[0][len(inputs[0]):], skip_special_tokens=True)
     example['output'] = output
     
@@ -49,17 +50,18 @@ def generate_main(args):
     saved_path = args.output_path
     temp_dir = args.temp_dir
     os.makedirs(temp_dir, exist_ok=True)
+    problem_file = os.path.join(data_abs_dir, f"humaneval-{lang}.jsonl")
 
+    print("model", model_name_or_path)
     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
     print("load tokenizer {} from {} over.".format(tokenizer.__class__, model_name_or_path))
     model = AutoModelForCausalLM.from_pretrained(
         model_name_or_path,
         torch_dtype=torch.bfloat16,
-        device_map="cuda"
+        device_map="auto",
+        #use_flash_attention_2=True
     )
-    
     model.eval()
-    problem_file = os.path.join(data_abs_dir, f"humaneval-{lang}.jsonl")
     examples = [json.loads(x) for x in open(problem_file) if x.strip()]
     print("Read {} examples for evaluation over.".format(len(examples)))
 
@@ -67,7 +69,7 @@ def generate_main(args):
     for ex in tqdm(examples, desc='Generating'):
         gen_example = generate_one(ex, lang, tokenizer, model)
         generated_examples.append(gen_example)
-    
+
     print("Generate all over!!!")
     with open(saved_path, 'w', encoding='utf-8') as fw:
         for ex in generated_examples: