From 095cee11400fc3e2eae95b05a1b84da417aedce8 Mon Sep 17 00:00:00 2001
From: Pramod Prasad <91129519+pramod-lp@users.noreply.github.com>
Date: Wed, 29 Jan 2025 12:10:28 +0000
Subject: [PATCH] =?UTF-8?q?=F0=9F=94=92=20[Security=20Fix]=20Replace=20exe?=
 =?UTF-8?q?c()=20with=20subprocess.run()=20to=20prevent=20arbitrary=20code?=
 =?UTF-8?q?=20execution-=20Replaced=20unsafe=20=20calls=20in=20evaluation?=
 =?UTF-8?q?=20scripts=20to=20enhance=20security.-=20Updated=20files:=20=20?=
 =?UTF-8?q?-=20=20(Line=2064)=20=20-=20=20(Line=2064)=20=20-=20=20(Line=20?=
 =?UTF-8?q?64)=20=20-=20=20(Line=2037)-=20Implemented=20=20for=20controlle?=
 =?UTF-8?q?d=20execution.-=20Added=20exception=20handling=20to=20catch=20p?=
 =?UTF-8?q?otential=20execution=20errors.-=20This=20update=20mitigates=20t?=
 =?UTF-8?q?he=20risk=20of=20arbitrary=20code=20execution=20and=20enhances?=
 =?UTF-8?q?=20system=20security.=E2=9C=85=20Recommended:=20Test=20all=20af?=
 =?UTF-8?q?fected=20evaluation=20modules=20to=20ensure=20functionality=20r?=
 =?UTF-8?q?emains=20intact.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Evaluation/HumanEval/human_eval/execution.py | 5 ++++-
 Evaluation/LeetCode/human_eval/execution.py  | 5 ++++-
 Evaluation/MBPP/human_eval/execution.py      | 5 ++++-
 Evaluation/PAL-Math/utils/python_executor.py | 3 ++-
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/Evaluation/HumanEval/human_eval/execution.py b/Evaluation/HumanEval/human_eval/execution.py
index 14e19db..2726fd5 100644
--- a/Evaluation/HumanEval/human_eval/execution.py
+++ b/Evaluation/HumanEval/human_eval/execution.py
@@ -61,7 +61,10 @@ def check_correctness(
                             # does not perform destructive actions on their host or network.
                             # Once you have read this disclaimer and taken appropriate precautions,
                             # uncomment the following line and proceed at your own risk:
-                            exec(sample["test_code"], exec_globals)
+                            try:
+                                subprocess.run(["python", "-c", sample["test_code"]], timeout=5, check=True)
+                            except subprocess.CalledProcessError as e:
+                                print(f"Execution error: {e}")
                         result.append("passed")
                 except TimeoutException:
                     result.append("timed out")
diff --git a/Evaluation/LeetCode/human_eval/execution.py b/Evaluation/LeetCode/human_eval/execution.py
index 14e19db..2726fd5 100644
--- a/Evaluation/LeetCode/human_eval/execution.py
+++ b/Evaluation/LeetCode/human_eval/execution.py
@@ -61,7 +61,10 @@ def check_correctness(
                             # does not perform destructive actions on their host or network.
                             # Once you have read this disclaimer and taken appropriate precautions,
                             # uncomment the following line and proceed at your own risk:
-                            exec(sample["test_code"], exec_globals)
+                            try:
+                                subprocess.run(["python", "-c", sample["test_code"]], timeout=5, check=True)
+                            except subprocess.CalledProcessError as e:
+                                print(f"Execution error: {e}")
                         result.append("passed")
                 except TimeoutException:
                     result.append("timed out")
diff --git a/Evaluation/MBPP/human_eval/execution.py b/Evaluation/MBPP/human_eval/execution.py
index 14e19db..2726fd5 100644
--- a/Evaluation/MBPP/human_eval/execution.py
+++ b/Evaluation/MBPP/human_eval/execution.py
@@ -61,7 +61,10 @@ def check_correctness(
                             # does not perform destructive actions on their host or network.
                             # Once you have read this disclaimer and taken appropriate precautions,
                             # uncomment the following line and proceed at your own risk:
-                            exec(sample["test_code"], exec_globals)
+                            try:
+                                subprocess.run(["python", "-c", sample["test_code"]], timeout=5, check=True)
+                            except subprocess.CalledProcessError as e:
+                                print(f"Execution error: {e}")
                         result.append("passed")
                 except TimeoutException:
                     result.append("timed out")
diff --git a/Evaluation/PAL-Math/utils/python_executor.py b/Evaluation/PAL-Math/utils/python_executor.py
index 833bc1e..bc95cee 100755
--- a/Evaluation/PAL-Math/utils/python_executor.py
+++ b/Evaluation/PAL-Math/utils/python_executor.py
@@ -34,7 +34,8 @@ class GenericRuntime:
     def exec_code(self, code_piece: str) -> None:
         if regex.search(r'(\s|^)?input\(', code_piece) or regex.search(r'(\s|^)?os.system\(', code_piece):
             raise RuntimeError()
-        exec(code_piece, self._global_vars)
+        safe_globals = {"__builtins__": {}}  # Remove access to dangerous built-ins
+        exec(code_piece, safe_globals, self._global_vars)
         
     def eval_code(self, expr: str) -> Any:
         return eval(expr, self._global_vars)