Add inject-based test case support to evaluation function and docs

m-messer · m-messer · commit f17d9ffe421e · 2026-05-26T08:42:59.000+01:00
Implemented support for `inject`-based test cases, allowing variables to be pre-set before student code execution instead of relying on stdin. Updated the evaluation function, added unit tests for `inject` mode, and revised documentation (`CLAU
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -36,9 +36,16 @@ All source lives in `evaluation_function/`:
     "mode": "io_test",
     "tests": [
         {
+            # stdin-based: student code calls input()
             "input": "5\n",            # stdin fed to student code
             "expected_output": "25\n", # expected stdout
             "hidden": False            # True = suppress input/output in feedback
+        },
+        {
+            # inject-based: variables are set before student code runs (no input() needed)
+            "inject": {"n": 5},        # dict of {variable_name: value} to inject
+            "expected_output": "25\n",
+            "hidden": False
         }
     ]
 }
diff --git a/docs/dev.md b/docs/dev.md
@@ -43,21 +43,36 @@ Feedback tags produced: `output` (stdout + any plots), or `error` (timeout / run
 
 Run student code against a list of stdin/stdout test cases.
 
+Each test case uses either `input` (stdin-based) or `inject` (variable injection):
+
 ```json
 {
   "mode": "io_test",
   "tests": [
     {
-      "input":           "5\n",   // stdin fed to the process
-      "expected_output": "25\n",  // expected stdout (trailing whitespace stripped before comparison)
-      "hidden":          false    // true = suppress input/output values from feedback
+      "input":           "5\n",   // stdin — student code calls input()
+      "expected_output": "25\n",
+      "hidden":          false
+    },
+    {
+      "inject":          {"n": 5}, // variables set before student code runs — no input() needed
+      "expected_output": "25\n",
+      "hidden":          false
     }
   ]
 }
 ```
 
+| Field | Description |
+|-------|-------------|
+| `input` | Text piped to stdin. Mutually exclusive with `inject`. |
+| `inject` | Dict of `{variable_name: value}` prepended as assignments before student code. Values can be any JSON type. Mutually exclusive with `input`. |
+| `expected_output` | Expected stdout; trailing whitespace stripped before comparison. |
+| `hidden` | `true` = suppress input/variables and expected output from feedback. |
+
 - `tests` is required; an empty list sets `is_correct = true` with `0/0 tests passed`.
-- `hidden: true` replaces input/output details with `"Hidden test N: failed."` so students cannot reverse-engineer the answer.
+- `hidden: true` replaces details with `"Hidden test N: failed."` so students cannot reverse-engineer the answer.
+- With `inject`, feedback shows a "Variables:" block (e.g. `n = 5`) instead of "Input:".
 - Matplotlib figures generated during a test are uploaded to S3 and embedded in the feedback.
 
 Feedback tags produced per test: `pass`, `fail`, or `hidden_fail`. Global: `summary`, `error` (timeout / runtime error).
diff --git a/docs/user.md b/docs/user.md
@@ -44,11 +44,14 @@ Runs the student's code once per test case, feeding it a string via stdin and co
 
 ### Test case fields
 
-| Field | Required | Description |
-|-------|----------|-------------|
-| `input` | No | Text sent to the program's stdin. Use `\n` for newlines. Omit or use `""` if the program reads no input. |
-| `expected_output` | Yes | The exact stdout the program should produce. Trailing whitespace is ignored during comparison. |
-| `hidden` | No | Set to `true` to hide the input and expected output from the student. They see only "Hidden test N: passed/failed." |
+Each test case uses **either** `input` (student reads via `input()`) **or** `inject` (variables are pre-set, no `input()` needed):
+
+| Field | Description |
+|-------|-------------|
+| `input` | Text sent to stdin. Student code reads it with `input()`. Use `\n` for newlines. |
+| `inject` | Dict of variable names and values injected before student code runs. Student uses the variables directly — no `input()` required. Values can be numbers, strings, lists, or dicts. |
+| `expected_output` | The exact stdout the program should produce. Trailing whitespace is ignored. |
+| `hidden` | `true` = hide the input/variables and expected output from the student. They see only "Hidden test N: passed/failed." |
 
 ### Tips
 
@@ -57,7 +60,7 @@ Runs the student's code once per test case, feeding it a string via stdin and co
 - Matplotlib figures produced during a passing or failing test are shown to the student.
 - A 25-second per-test timeout applies; timed-out tests count as failures.
 
-### Example — square a number
+### Example — square a number (stdin-based)
 
 Student code:
 ```python
@@ -77,6 +80,27 @@ Params:
 }
 ```
 
+### Example — square a number (inject-based)
+
+Use `inject` when students shouldn't need to handle input themselves — they just write an expression or use the named variable directly:
+
+Student code:
+```python
+print(n * n)
+```
+
+Params:
+```json
+{
+  "mode": "io_test",
+  "tests": [
+    { "inject": {"n": 5},  "expected_output": "25\n" },
+    { "inject": {"n": 0},  "expected_output": "0\n"  },
+    { "inject": {"n": -3}, "expected_output": "9\n", "hidden": true }
+  ]
+}
+```
+
 ---
 
 ## Mode: `unit_test`
diff --git a/evaluation_function/evaluation.py b/evaluation_function/evaluation.py
@@ -138,11 +138,22 @@ def _evaluate_io(response: str, tests: list, result: Result) -> Result:
     passed = 0
 
     for i, test in enumerate(tests, 1):
+        inject = test.get("inject")
         stdin = test.get("input", "")
         expected = test.get("expected_output", "").rstrip()
         hidden = test.get("hidden", False)
 
-        stdout, stderr, timed_out, images = _run_code(response, stdin)
+        if inject:
+            prefix = "".join(f"{k} = {v!r}\n" for k, v in inject.items())
+            run_code = prefix + response
+            run_stdin = ""
+            input_block = _code_block("Variables", "\n".join(f"{k} = {v!r}" for k, v in inject.items()))
+        else:
+            run_code = response
+            run_stdin = stdin
+            input_block = _code_block("Input", stdin.rstrip()) if stdin.strip() else None
+
+        stdout, stderr, timed_out, images = _run_code(run_code, run_stdin)
         actual = stdout.rstrip()
         label = f"Hidden test {i}" if hidden else f"Test {i}"
 
@@ -155,8 +166,8 @@ def _evaluate_io(response: str, tests: list, result: Result) -> Result:
                 result.add_feedback(tag, f"{label}: runtime error.")
             else:
                 parts = [f"{label}: runtime error."]
-                if stdin.strip():
-                    parts.append(_code_block("Input", stdin.rstrip()))
+                if input_block:
+                    parts.append(input_block)
                 parts.append(_code_block("Error", stderr.strip()))
                 result.add_feedback(tag, "\n\n".join(parts))
         elif actual == expected:
@@ -165,8 +176,8 @@ def _evaluate_io(response: str, tests: list, result: Result) -> Result:
                 result.add_feedback("pass", f"{label}: passed.")
             else:
                 parts = [f"{label}: passed."]
-                if stdin.strip():
-                    parts.append(_code_block("Input", stdin.rstrip()))
+                if input_block:
+                    parts.append(input_block)
                 parts.append(_code_block("Output", actual or "(no output)"))
                 parts.extend(_upload_plots(images))
                 result.add_feedback("pass", "\n\n".join(parts))
@@ -176,8 +187,8 @@ def _evaluate_io(response: str, tests: list, result: Result) -> Result:
                 result.add_feedback(tag, f"{label}: failed.")
             else:
                 parts = [f"{label}: failed."]
-                if stdin.strip():
-                    parts.append(_code_block("Input", stdin.rstrip()))
+                if input_block:
+                    parts.append(input_block)
                 parts.append(_code_block("Your output", actual or "(no output)"))
                 parts.append(_code_block("Expected", expected))
                 parts.extend(_upload_plots(images))
diff --git a/evaluation_function/evaluation_test.py b/evaluation_function/evaluation_test.py
@@ -18,6 +18,10 @@ def _test(inp, expected, hidden=False):
     return {"input": inp, "expected_output": expected, "hidden": hidden}
 
 
+def _inject_test(inject, expected, hidden=False):
+    return {"inject": inject, "expected_output": expected, "hidden": hidden}
+
+
 class TestEvaluationFunction(unittest.TestCase):
 
     def test_all_pass(self):
@@ -66,6 +70,45 @@ def test_missing_mode(self):
         self.assertIn("mode", result["feedback"])
 
 
+class TestInjectMode(unittest.TestCase):
+
+    def test_inject_pass(self):
+        params = _params(_inject_test({"n": 5}, "25\n"))
+        result = evaluation_function("print(n * n)", None, params).to_dict()
+
+        self.assertTrue(result["is_correct"])
+        self.assertIn("1/1 tests passed", result["feedback"])
+        self.assertIn("Variables", result["feedback"])
+        self.assertIn("n = 5", result["feedback"])
+
+    def test_inject_fail_shows_variables(self):
+        params = _params(_inject_test({"n": 5}, "999\n"))
+        result = evaluation_function("print(n * n)", None, params).to_dict()
+
+        self.assertFalse(result["is_correct"])
+        self.assertIn("Variables", result["feedback"])
+        self.assertIn("n = 5", result["feedback"])
+
+    def test_inject_multiple_vars(self):
+        params = _params(_inject_test({"a": 3, "b": 4}, "7\n"))
+        result = evaluation_function("print(a + b)", None, params).to_dict()
+
+        self.assertTrue(result["is_correct"])
+
+    def test_inject_hidden_suppresses_variables(self):
+        params = _params(_inject_test({"n": 5}, "999\n", hidden=True))
+        result = evaluation_function("print(n * n)", None, params).to_dict()
+
+        self.assertFalse(result["is_correct"])
+        self.assertNotIn("n = 5", result["feedback"])
+
+    def test_inject_string_value(self):
+        params = _params(_inject_test({"name": "Alice"}, "Hello, Alice\n"))
+        result = evaluation_function('print(f"Hello, {name}")', None, params).to_dict()
+
+        self.assertTrue(result["is_correct"])
+
+
 _PLOT_CODE = "import matplotlib.pyplot as plt\nplt.plot([1, 2, 3])\n"
 _MULTI_PLOT_CODE = (
     "import matplotlib.pyplot as plt\n"

Original file line number	Diff line number	Diff line change
@@ -36,9 +36,16 @@ All source lives in `evaluation_function/`:
`36`	`36`	`"mode": "io_test",`
`37`	`37`	`"tests": [`
`38`	`38`	`{`
	`39`	`+ # stdin-based: student code calls input()`
`39`	`40`	`"input": "5\n", # stdin fed to student code`
`40`	`41`	`"expected_output": "25\n", # expected stdout`
`41`	`42`	`"hidden": False # True = suppress input/output in feedback`
	`43`	`+ },`
	`44`	`+ {`
	`45`	`+ # inject-based: variables are set before student code runs (no input() needed)`
	`46`	`+ "inject": {"n": 5}, # dict of {variable_name: value} to inject`
	`47`	`+ "expected_output": "25\n",`
	`48`	`+ "hidden": False`
`42`	`49`	`}`
`43`	`50`	`]`
`44`	`51`	`}`