From bc50a43521f017311a772f067eb8fc6f3be6960b Mon Sep 17 00:00:00 2001 From: sherwin6180 Date: Thu, 26 Jun 2025 00:27:13 -0400 Subject: [PATCH] support for Scala --- Evaluation/HumanEval/eval.sh | 2 +- Evaluation/HumanEval/human_eval/evaluation.py | 2 + Evaluation/HumanEval/human_eval/execution.py | 50 ++++++++++++++++++ .../utils/__pycache__/utils.cpython-39.pyc | Bin 3468 -> 3570 bytes Evaluation/HumanEval/utils/utils.py | 7 +++ 5 files changed, 60 insertions(+), 1 deletion(-) diff --git a/Evaluation/HumanEval/eval.sh b/Evaluation/HumanEval/eval.sh index 7a9e9ff..d1020f9 100755 --- a/Evaluation/HumanEval/eval.sh +++ b/Evaluation/HumanEval/eval.sh @@ -1,4 +1,4 @@ MODEL_NAME_OR_PATH="/scratch/shared_dir/xinyu/deepseek-1.3b" DATASET_ROOT="data/" -LANGUAGE="rust" +LANGUAGE="scala" CUDA_VISIBLE_DEVICES=1,2,3 python -m accelerate.commands.launch --config_file test_config.yaml eval_pal.py --logdir ${MODEL_NAME_OR_PATH} --language ${LANGUAGE} --dataroot ${DATASET_ROOT} diff --git a/Evaluation/HumanEval/human_eval/evaluation.py b/Evaluation/HumanEval/human_eval/evaluation.py index ef2e184..a3a30fa 100644 --- a/Evaluation/HumanEval/human_eval/evaluation.py +++ b/Evaluation/HumanEval/human_eval/evaluation.py @@ -174,6 +174,8 @@ def process_humaneval_test(sample, problems, example_test=False, is_mbpp=False, if code[:5] != "" + elif language == "scala": + test_string = code + "\n" + test return test_string diff --git a/Evaluation/HumanEval/human_eval/execution.py b/Evaluation/HumanEval/human_eval/execution.py index 14e19db..a79ac35 100644 --- a/Evaluation/HumanEval/human_eval/execution.py +++ b/Evaluation/HumanEval/human_eval/execution.py @@ -34,6 +34,14 @@ def check_correctness( """ def unsafe_execute(tmp_dir): + import os + import shutil + import tempfile + import random + import subprocess + from contextlib import redirect_stderr, redirect_stdout, suppress + from .execution import time_limit, swallow_io, create_tempdir, reliability_guard, TimeoutException + random_id = random.randint(1, 100000) if "python" in language_type.lower(): with create_tempdir(): @@ -546,6 +554,48 @@ def check_correctness( os.chdir(origin_path) shutil.rmtree(tmp_dir) + elif "scala" in language_type.lower(): + tmp_dir_scala = os.path.join(tempfile.gettempdir(), f"scala-eval-{random.randint(1, 100000)}") + os.makedirs(tmp_dir_scala, exist_ok=True) + + file_path = os.path.join(tmp_dir_scala, "Problem.scala") + + try: + with open(file_path, "w", encoding="utf-8") as f: + f.write(sample["test_code"]) + + compile_result = subprocess.run( + ["scalac", file_path], + cwd=tmp_dir_scala, + timeout=30.0, + capture_output=True + ) + + if compile_result.returncode != 0: + error_output = compile_result.stderr.decode("utf-8", "ignore") + result.append(f"failed: compilation error: {error_output}") + else: + run_result = subprocess.run( + ["scala", "-cp", ".", "Problem"], + cwd=tmp_dir_scala, + timeout=timeout, + capture_output=True + ) + + if run_result.returncode == 0: + result.append("passed") + else: + error_output = run_result.stderr.decode("utf-8", "ignore") + result.append(f"failed: {error_output}") + + except subprocess.TimeoutExpired: + result.append("timed out") + except Exception as e: + result.append(f"failed: {e}") + finally: + if os.path.exists(tmp_dir_scala): + shutil.rmtree(tmp_dir_scala) + manager = multiprocessing.Manager() result = manager.list() diff --git a/Evaluation/HumanEval/utils/__pycache__/utils.cpython-39.pyc b/Evaluation/HumanEval/utils/__pycache__/utils.cpython-39.pyc index 4c5ea486c7310b729e0a426ce0298907b2a34db6..448c3196056db8fd50a6d3636041092adf49a50f 100644 GIT binary patch delta 974 zcmZ8fy-yTD6rZ;@`*}+u#{tJT1jJj=6BChy_?ZYOf{CH>LcAO|3!a|0m)S)T4=ton zLqo;_lPlEDLJKIYER2ouFEATB3u9wpsYHFV5+TmZ{^q^+`_0bV_jaxAd)r{XuFfL3 zytlJgClZ8wmciZ?$KW(R{^t7N3}Z1yYYMBcnOSfZS78Z^8d!3bW@D^dW?ageYBAQ# zQmblKWq{VWK;~)0Nw%d%Su1NpR%7j~W0hoe*2%hXHyF$ir)NEUWoFfxLcWxDOz{ZP zdZ_v`+S-I}(2`gTPl}Jq708H@*xH#UOoUK;0a}e$h|bz`nZV2>Q}~?(Cp!+pUK62M_Yk*#H0l delta 918 zcmZ8eOHUI~6rMYszJ_UHOR+$$5Hz&|iVGtqg!o7RMbT(*!=!OC?Y*rHWu`eZC`Js4 zvLr@wVOS8v%cOY~w^=#`(5 zk8NJ+LqW(|FPWt`R@}PXZn?sT$gDuN{RpS%Gl1~vC!VjY0t}guJv0-LmxEe4flVrq zle1cRByuegPyS)HDFT}!2QAOFU1wRC@~KwDdUmweDjSrw?kKriZe6psp!v+7tDWvZmlZ;!@RLXl4vXdIm__K@9sJDgVu5@flxrD|(wW;(NOk*h|11YcSD$L7j&vxZB>dbhU z1kq_bbpcbzLmna28aUo)xLC@JEE%Qzzm38inmv{`i~^gJ4WpBzMZir2GY(PJ*W^dT zT>sDODjIC!^$i3is!3=4EWv;@CYv?WFo8cfix9^A03ceoR`a|zOp|7wfU-v}2j>5G o45_JQkFT`Fc}5HNZ_DSI$