diff --git a/Evaluation/HumanEval/human_eval/execution.py b/Evaluation/HumanEval/human_eval/execution.py index 14e19db..2726fd5 100644 --- a/Evaluation/HumanEval/human_eval/execution.py +++ b/Evaluation/HumanEval/human_eval/execution.py @@ -61,7 +61,10 @@ def check_correctness( # does not perform destructive actions on their host or network. # Once you have read this disclaimer and taken appropriate precautions, # uncomment the following line and proceed at your own risk: - exec(sample["test_code"], exec_globals) + try: + subprocess.run(["python", "-c", sample["test_code"]], timeout=5, check=True) + except subprocess.CalledProcessError as e: + print(f"Execution error: {e}") result.append("passed") except TimeoutException: result.append("timed out") diff --git a/Evaluation/LeetCode/human_eval/execution.py b/Evaluation/LeetCode/human_eval/execution.py index 14e19db..2726fd5 100644 --- a/Evaluation/LeetCode/human_eval/execution.py +++ b/Evaluation/LeetCode/human_eval/execution.py @@ -61,7 +61,10 @@ def check_correctness( # does not perform destructive actions on their host or network. # Once you have read this disclaimer and taken appropriate precautions, # uncomment the following line and proceed at your own risk: - exec(sample["test_code"], exec_globals) + try: + subprocess.run(["python", "-c", sample["test_code"]], timeout=5, check=True) + except subprocess.CalledProcessError as e: + print(f"Execution error: {e}") result.append("passed") except TimeoutException: result.append("timed out") diff --git a/Evaluation/MBPP/human_eval/execution.py b/Evaluation/MBPP/human_eval/execution.py index 14e19db..2726fd5 100644 --- a/Evaluation/MBPP/human_eval/execution.py +++ b/Evaluation/MBPP/human_eval/execution.py @@ -61,7 +61,10 @@ def check_correctness( # does not perform destructive actions on their host or network. # Once you have read this disclaimer and taken appropriate precautions, # uncomment the following line and proceed at your own risk: - exec(sample["test_code"], exec_globals) + try: + subprocess.run(["python", "-c", sample["test_code"]], timeout=5, check=True) + except subprocess.CalledProcessError as e: + print(f"Execution error: {e}") result.append("passed") except TimeoutException: result.append("timed out") diff --git a/Evaluation/PAL-Math/utils/python_executor.py b/Evaluation/PAL-Math/utils/python_executor.py index 833bc1e..bc95cee 100755 --- a/Evaluation/PAL-Math/utils/python_executor.py +++ b/Evaluation/PAL-Math/utils/python_executor.py @@ -34,7 +34,8 @@ class GenericRuntime: def exec_code(self, code_piece: str) -> None: if regex.search(r'(\s|^)?input\(', code_piece) or regex.search(r'(\s|^)?os.system\(', code_piece): raise RuntimeError() - exec(code_piece, self._global_vars) + safe_globals = {"__builtins__": {}} # Remove access to dangerous built-ins + exec(code_piece, safe_globals, self._global_vars) def eval_code(self, expr: str) -> Any: return eval(expr, self._global_vars)