Spaces:
Build error
Build error
Commit
·
0e7922f
1
Parent(s):
ea58aa2
Add JS
Browse files- code_eval.py +2 -2
- execute.py +43 -2
code_eval.py
CHANGED
|
@@ -152,7 +152,7 @@ class CodeEval(evaluate.Metric):
|
|
| 152 |
license=_LICENSE,
|
| 153 |
)
|
| 154 |
|
| 155 |
-
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0):
|
| 156 |
"""Returns the scores"""
|
| 157 |
|
| 158 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
@@ -170,7 +170,7 @@ class CodeEval(evaluate.Metric):
|
|
| 170 |
for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
|
| 171 |
for candidate in candidates:
|
| 172 |
test_program = candidate + "\n" + test_case
|
| 173 |
-
args = (test_program, timeout, task_id, completion_id[task_id])
|
| 174 |
future = executor.submit(check_correctness, *args)
|
| 175 |
futures.append(future)
|
| 176 |
completion_id[task_id] += 1
|
|
|
|
| 152 |
license=_LICENSE,
|
| 153 |
)
|
| 154 |
|
| 155 |
+
def _compute(self, predictions, references, k=[1, 10, 100], num_workers=4, timeout=3.0, language="python"):
|
| 156 |
"""Returns the scores"""
|
| 157 |
|
| 158 |
if os.getenv("HF_ALLOW_CODE_EVAL", 0) != "1":
|
|
|
|
| 170 |
for task_id, (candidates, test_case) in enumerate(zip(predictions, references)):
|
| 171 |
for candidate in candidates:
|
| 172 |
test_program = candidate + "\n" + test_case
|
| 173 |
+
args = (test_program, timeout, task_id, completion_id[task_id], language)
|
| 174 |
future = executor.submit(check_correctness, *args)
|
| 175 |
futures.append(future)
|
| 176 |
completion_id[task_id] += 1
|
execute.py
CHANGED
|
@@ -24,8 +24,12 @@ import platform
|
|
| 24 |
import signal
|
| 25 |
import tempfile
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
-
def check_correctness(check_program, timeout, task_id, completion_id):
|
| 29 |
"""
|
| 30 |
Evaluates the functional correctness of a completion by running the test
|
| 31 |
suite provided in the problem.
|
|
@@ -36,7 +40,8 @@ def check_correctness(check_program, timeout, task_id, completion_id):
|
|
| 36 |
manager = multiprocessing.Manager()
|
| 37 |
result = manager.list()
|
| 38 |
|
| 39 |
-
p = multiprocessing.Process(target=
|
|
|
|
| 40 |
p.start()
|
| 41 |
p.join(timeout=timeout + 1)
|
| 42 |
if p.is_alive():
|
|
@@ -85,6 +90,42 @@ def unsafe_execute(check_program, result, timeout):
|
|
| 85 |
os.rmdir = rmdir
|
| 86 |
os.chdir = chdir
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
|
| 89 |
@contextlib.contextmanager
|
| 90 |
def time_limit(seconds):
|
|
|
|
| 24 |
import signal
|
| 25 |
import tempfile
|
| 26 |
|
| 27 |
+
LANGUAGE_TO_FUNC = {
|
| 28 |
+
"python": unsafe_execute,
|
| 29 |
+
"javascript": unsafe_execute_js,
|
| 30 |
+
}
|
| 31 |
|
| 32 |
+
def check_correctness(check_program, timeout, task_id, completion_id, language):
|
| 33 |
"""
|
| 34 |
Evaluates the functional correctness of a completion by running the test
|
| 35 |
suite provided in the problem.
|
|
|
|
| 40 |
manager = multiprocessing.Manager()
|
| 41 |
result = manager.list()
|
| 42 |
|
| 43 |
+
p = multiprocessing.Process(target=LANGUAGE_TO_FUNC[language], args=(check_program, result, timeout))
|
| 44 |
+
|
| 45 |
p.start()
|
| 46 |
p.join(timeout=timeout + 1)
|
| 47 |
if p.is_alive():
|
|
|
|
| 90 |
os.rmdir = rmdir
|
| 91 |
os.chdir = chdir
|
| 92 |
|
| 93 |
+
def unsafe_execute_js(check_program, result, timeout):
|
| 94 |
+
|
| 95 |
+
with create_tempdir():
|
| 96 |
+
|
| 97 |
+
open(f"test.js", 'w').write(check_program)
|
| 98 |
+
|
| 99 |
+
# These system calls are needed when cleaning up tempdir.
|
| 100 |
+
import os
|
| 101 |
+
import shutil
|
| 102 |
+
|
| 103 |
+
rmtree = shutil.rmtree
|
| 104 |
+
rmdir = os.rmdir
|
| 105 |
+
chdir = os.chdir
|
| 106 |
+
|
| 107 |
+
# Run program.
|
| 108 |
+
try:
|
| 109 |
+
exec_globals = {}
|
| 110 |
+
with time_limit(timeout):
|
| 111 |
+
exec_result = subprocess.run(["node", "test.js"], timeout=timeout, capture_output=True)
|
| 112 |
+
if exec_result.stderr.decode():
|
| 113 |
+
err = exec_result.stderr.decode()
|
| 114 |
+
result.append(f"failed: {err}")
|
| 115 |
+
elif exec_result.stdout.decode():
|
| 116 |
+
err = exec_result.stdout.decode()
|
| 117 |
+
result.append(f"failed: {err}")
|
| 118 |
+
else:
|
| 119 |
+
result.append("passed")
|
| 120 |
+
|
| 121 |
+
except TimeoutException:
|
| 122 |
+
result.append("timed out")
|
| 123 |
+
|
| 124 |
+
# Needed for cleaning up.
|
| 125 |
+
shutil.rmtree = rmtree
|
| 126 |
+
os.rmdir = rmdir
|
| 127 |
+
os.chdir = chdir
|
| 128 |
+
|
| 129 |
|
| 130 |
@contextlib.contextmanager
|
| 131 |
def time_limit(seconds):
|