Skip to content

Commit 2f4fdca

Browse files
committed
Add unit tests for plot capture, upload, and error handling in evaluation function
Introduces tests for single and multiple plots, graceful upload failure handling, and `_run_code` image capture. Adjusts `_run_code` to ensure plot finalization via `_capture_plots` call.
1 parent 8e74cb8 commit 2f4fdca

2 files changed

Lines changed: 56 additions & 6 deletions

File tree

evaluation_function/evaluation.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313

1414
_PREAMBLE_TEMPLATE = """\
1515
import os as _os
16-
import atexit as _atexit
1716
1817
_plot_dir = {plot_dir!r}
1918
_plot_idx = [0]
@@ -27,16 +26,16 @@ def _capture_plots():
2726
_plot_idx[0] += 1
2827
_plt.figure(num).savefig(
2928
_os.path.join(_plot_dir, str(_plot_idx[0]).zfill(4) + '.png'))
30-
31-
_atexit.register(_capture_plots)
3229
"""
3330

31+
_CAPTURE_CALL = "_capture_plots()\n"
32+
3433

3534
def _run_code(code: str, stdin: str) -> tuple[str, str, bool, list[Image.Image]]:
3635
plot_dir = tempfile.mkdtemp()
3736
preamble = _PREAMBLE_TEMPLATE.format(plot_dir=plot_dir)
3837
with tempfile.NamedTemporaryFile(mode="w", suffix=".py", delete=False) as f:
39-
f.write(preamble + "\n" + code)
38+
f.write(preamble + "\n" + code + "\n" + _CAPTURE_CALL)
4039
tmpfile = f.name
4140
try:
4241
proc = subprocess.run(

evaluation_function/evaluation_test.py

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import unittest
2+
from unittest.mock import patch
23

3-
from .evaluation import evaluation_function
4+
from lf_toolkit.evaluation.image_upload import ImageUploadError
5+
6+
from .evaluation import evaluation_function, _run_code
47

58
_SQUARE_CODE = "n = int(input())\nprint(n * n)"
69
_CRASH_CODE = "raise ValueError('oops')"
@@ -54,4 +57,52 @@ def test_no_tests(self):
5457
result = evaluation_function(_SQUARE_CODE, None, {}).to_dict()
5558

5659
self.assertFalse(result["is_correct"])
57-
self.assertIn("```", result["feedback"])
60+
self.assertIn("```", result["feedback"])
61+
62+
63+
_PLOT_CODE = "import matplotlib.pyplot as plt\nplt.plot([1, 2, 3])\n"
64+
_MULTI_PLOT_CODE = (
65+
"import matplotlib.pyplot as plt\n"
66+
"plt.figure(1); plt.plot([1, 2])\n"
67+
"plt.figure(2); plt.plot([3, 4])\n"
68+
)
69+
_FAKE_URL = "https://example.com/plot.png"
70+
71+
72+
class TestImageGeneration(unittest.TestCase):
73+
74+
@patch("evaluation_function.evaluation.upload_image", return_value=_FAKE_URL)
75+
def test_single_plot_demo_mode(self, mock_upload):
76+
result = evaluation_function(_PLOT_CODE, None, {}).to_dict()
77+
mock_upload.assert_called_once()
78+
self.assertIn("![Plot 1]", result["feedback"])
79+
80+
@patch("evaluation_function.evaluation.upload_image", return_value=_FAKE_URL)
81+
def test_single_plot_passing_test(self, mock_upload):
82+
params = _params(_test("", ""))
83+
result = evaluation_function(_PLOT_CODE, None, params).to_dict()
84+
self.assertIn("![Plot 1]", result["feedback"])
85+
86+
@patch("evaluation_function.evaluation.upload_image", return_value=_FAKE_URL)
87+
def test_single_plot_failing_test(self, mock_upload):
88+
params = _params(_test("", "wrong"))
89+
result = evaluation_function(_PLOT_CODE, None, params).to_dict()
90+
self.assertIn("![Plot 1]", result["feedback"])
91+
92+
@patch("evaluation_function.evaluation.upload_image", return_value=_FAKE_URL)
93+
def test_multiple_plots(self, mock_upload):
94+
result = evaluation_function(_MULTI_PLOT_CODE, None, {}).to_dict()
95+
self.assertEqual(mock_upload.call_count, 2)
96+
self.assertIn("![Plot 1]", result["feedback"])
97+
self.assertIn("![Plot 2]", result["feedback"])
98+
99+
@patch("evaluation_function.evaluation.upload_image", side_effect=ImageUploadError)
100+
def test_upload_failure_graceful(self, mock_upload):
101+
result = evaluation_function(_PLOT_CODE, None, {}).to_dict()
102+
self.assertNotIn("![Plot", result["feedback"])
103+
104+
def test_run_code_captures_images(self):
105+
_, _, timed_out, images = _run_code(_PLOT_CODE, "")
106+
self.assertFalse(timed_out)
107+
self.assertEqual(len(images), 1)
108+
self.assertEqual(images[0].format, "PNG")

0 commit comments

Comments
 (0)