From ce692f10e38b8e22920109eb009243de020c2c86 Mon Sep 17 00:00:00 2001 From: Pramod Prasad <91129519+pramod-lp@users.noreply.github.com> Date: Wed, 29 Jan 2025 12:56:55 +0000 Subject: [PATCH] Optimized batch processing in eval_model() with parallel execution- Replaced sequential loop with ThreadPoolExecutor for parallel execution.- Applied changes in (line 70) and (line 110).- Improved efficiency by reducing generation time.- Ensured compatibility with existing functions without affecting results. --- Evaluation/HumanEval/eval_instruct.py | 7 +++---- Evaluation/MBPP/eval_instruct.py | 13 ++++++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Evaluation/HumanEval/eval_instruct.py b/Evaluation/HumanEval/eval_instruct.py index 7ea317e..724516d 100644 --- a/Evaluation/HumanEval/eval_instruct.py +++ b/Evaluation/HumanEval/eval_instruct.py @@ -4,6 +4,7 @@ import os import torch from pathlib import Path from tqdm import tqdm +from concurrent.futures import ThreadPoolExecutor data_abs_dir = Path(__file__).parent / "data" @@ -66,10 +67,8 @@ def generate_main(args): examples = [json.loads(x) for x in open(problem_file) if x.strip()] print("Read {} examples for evaluation over.".format(len(examples))) - generated_examples = [] - for ex in tqdm(examples, desc='Generating'): - gen_example = generate_one(ex, args.language, tokenizer, model) - generated_examples.append(gen_example) + with ThreadPoolExecutor(max_workers=8) as executor: + generated_examples = list(executor.map(lambda ex: generate_one(ex, args.language, tokenizer, model), examples)) print("Generate all over!!!") with open(saved_path, 'w', encoding='utf-8') as fw: diff --git a/Evaluation/MBPP/eval_instruct.py b/Evaluation/MBPP/eval_instruct.py index ac76224..be462e9 100644 --- a/Evaluation/MBPP/eval_instruct.py +++ b/Evaluation/MBPP/eval_instruct.py @@ -5,6 +5,7 @@ import torch import re from pathlib import Path from tqdm import tqdm +from concurrent.futures import ThreadPoolExecutor data_abs_dir = Path(__file__).parent / "data" @@ -86,6 +87,11 @@ def generate_one(example, tokenizer, model): example['gpt_completion'] = output return convert_for_evaluation(example) +def generate_and_log_code(ex): + gen_code = generate_one(ex, tokenizer, model) + print("Generated {}/{}...".format(examples.index(ex) + 1, len(examples))) # Safe logging + return gen_code + def generate_main(args): model_name_or_path = args.model saved_path = args.output_path @@ -106,11 +112,8 @@ def generate_main(args): examples = list(read_test_examples(problem_file)) print("Read {} examples for evaluation over.".format(len(examples))) - generated_examples = [] - for ex in tqdm(examples, desc='Generating'): - gen_example = generate_one(ex, tokenizer, model) - generated_examples.append(gen_example) - print("Generate {}/{} over...".format(len(generated_examples), len(examples))) + with ThreadPoolExecutor(max_workers=8) as executor: + generated_codes = list(executor.map(generate_and_log_code, examples)) print("Generate all over!!!") with open(saved_path, 'w', encoding='utf-8') as fw: