lambda-feedback · HongleiGu · Jan 27, 2026 · Jan 27, 2026
diff --git a/Dockerfile b/Dockerfile
@@ -31,7 +31,9 @@ ENV FUNCTION_COMMAND="python"
 # Args to start the evaluation function with
 ENV FUNCTION_ARGS="-m,evaluation_function.main"
 
-# The transport to use for the RPC server
-ENV FUNCTION_RPC_TRANSPORT="ipc"
+# Use file-based communication interface instead of RPC
+# This handles larger payloads better (shimmy writes input to file, reads output from file)
+# shimmy will append input/output file paths as the last two arguments
+ENV FUNCTION_INTERFACE="file"
 
 ENV LOG_LEVEL="debug"
diff --git a/evaluation_function/correction/correction.py b/evaluation_function/correction/correction.py
@@ -33,11 +33,12 @@ def _check_minimality(fsa: FSA) -> Tuple[bool, Optional[ValidationError]]:
     try:
         minimized = hopcroft_minimization(fsa)
         if len(minimized.states) < len(fsa.states):
+            diff = len(fsa.states) - len(minimized.states)
             return False, ValidationError(
-                message=f"FSA is not minimal: has {len(fsa.states)} states but can be reduced to {len(minimized.states)}",
+                message=f"Your FSA works correctly, but it's not minimal! You have {len(fsa.states)} states, but only {len(minimized.states)} are needed. You could remove {diff} state(s).",
                 code=ErrorCode.NOT_MINIMAL,
                 severity="error",
-                suggestion="Minimize your FSA by merging equivalent states"
+                suggestion="Look for states that behave identically (same transitions and acceptance) - these can be merged into one"
             )
         return True, None
     except Exception:
@@ -69,9 +70,11 @@ def _build_feedback(
     hints = [e.suggestion for e in all_errors if e.suggestion]
     if structural_info:
         if structural_info.unreachable_states:
-            hints.append("Consider removing unreachable states")
+            unreachable = ", ".join(structural_info.unreachable_states)
+            hints.append(f"Tip: States {{{unreachable}}} can't be reached from your start state - you might want to remove them or add transitions to them")
         if structural_info.dead_states:
-            hints.append("Dead states can never lead to acceptance")
+            dead = ", ".join(structural_info.dead_states)
+            hints.append(f"Tip: States {{{dead}}} can never lead to acceptance - this might be intentional (trap states) or a bug")
 
     # Build language comparison
     language = LanguageComparison(are_equivalent=len(equivalence_errors) == 0)
@@ -92,17 +95,20 @@ def _summarize_errors(errors: List[ValidationError]) -> str:
     for error in errors:
         msg = error.message.lower()
         if "alphabet" in msg:
-            error_types.add("alphabet mismatch")
-        elif "state" in msg and "count" in msg:
-            error_types.add("state count mismatch")
-        elif "accepting" in msg or "incorrectly marked" in msg:
-            error_types.add("acceptance error")
-        elif "transition" in msg:
-            error_types.add("transition error")
+            error_types.add("alphabet issue")
+        elif "states" in msg and ("many" in msg or "few" in msg or "needed" in msg):
+            error_types.add("incorrect number of states")
+        elif "accepting" in msg or "accept" in msg:
+            error_types.add("accepting states issue")
+        elif "transition" in msg or "reading" in msg:
+            error_types.add("transition issue")
 
-    if error_types:
-        return f"Languages differ: {', '.join(error_types)}"
-    return f"Languages differ: {len(errors)} issue(s)"
+    if len(error_types) == 1:
+        issue = list(error_types)[0]
+        return f"Almost there! Your FSA has an {issue}. Check the details below."
+    elif error_types:
+        return f"Your FSA doesn't quite match the expected language. Issues found: {', '.join(error_types)}"
+    return f"Your FSA doesn't accept the correct language. Found {len(errors)} issue(s) to fix."
 
 
 # =============================================================================
@@ -134,7 +140,11 @@ def analyze_fsa_correction(
     # Step 1: Validate student FSA structure
     student_errors = is_valid_fsa(student_fsa)
     if student_errors:
-        summary = "FSA has structural errors"
+        num_errors = len(student_errors)
+        if num_errors == 1:
+            summary = "Your FSA has a structural problem that needs to be fixed first. See the details below."
+        else:
+            summary = f"Your FSA has {num_errors} structural problems that need to be fixed first. See the details below."
         return Result(
             is_correct=False,
             feedback=summary,
@@ -146,7 +156,7 @@ def analyze_fsa_correction(
     if expected_errors:
         return Result(
             is_correct=False,
-            feedback="Internal error: expected FSA is invalid"
+            feedback="Oops! There's an issue with the expected answer. Please contact your instructor."
         )
 
     # Step 3: Check minimality if required
@@ -162,15 +172,18 @@ def analyze_fsa_correction(
     equivalence_errors = fsas_accept_same_language(student_fsa, expected_fsa)
 
     if not equivalence_errors and not validation_errors:
+        # Success message with some stats
+        state_count = len(student_fsa.states)
+        feedback = f"Correct! Your FSA with {state_count} state(s) accepts exactly the right language. Well done!"
         return Result(
             is_correct=True,
-            feedback="Correct! FSA accepts the expected language.",
-            fsa_feedback=_build_feedback("FSA is correct", [], [], structural_info)
+            feedback=feedback,
+            fsa_feedback=_build_feedback("Your FSA is correct!", [], [], structural_info)
         )
 
     # Build result with errors
     is_correct = len(equivalence_errors) == 0 and len(validation_errors) == 0
-    summary = _summarize_errors(equivalence_errors) if equivalence_errors else "FSA has issues"
+    summary = _summarize_errors(equivalence_errors) if equivalence_errors else "Your FSA has some issues to address."
 
     return Result(
         is_correct=is_correct,

diff --git a/evaluation_function/main.py b/evaluation_function/main.py
@@ -1,18 +1,127 @@
+"""
+Main entry point for the FSA evaluation function.
+
+Supports two communication modes with shimmy:
+1. File-based (recommended for large payloads): shimmy passes input/output file paths as args
+2. RPC/IPC (default): Uses lf_toolkit's server for stdio/IPC communication
+"""
+
+import sys
+import json
+from typing import Any, Dict
 
 from lf_toolkit import create_server, run
+from lf_toolkit.evaluation import Params, Result as LFResult
 
 from .evaluation import evaluation_function
 from .preview import preview_function
 
+
+def handle_file_based_communication(input_path: str, output_path: str) -> None:
+    """
+    Handle file-based communication with shimmy.
+
+    Reads input JSON from input_path, processes it, and writes result to output_path.
+    This is used when shimmy is configured with --interface file.
+
+    Args:
+        input_path: Path to the input JSON file
+        output_path: Path to write the output JSON file
+    """
+    # Read input from file
+    with open(input_path, 'r', encoding='utf-8') as f:
+        input_data = json.load(f)
+
+    # Extract command and request data
+    command = input_data.get('command', 'eval')
+    request_id = input_data.get('$id')
+
+    # Build response structure
+    response_data: Dict[str, Any] = {}
+    if request_id is not None:
+        response_data['$id'] = request_id
+    response_data['command'] = command
+
+    try:
+        if command == 'eval':
+            # Extract evaluation inputs
+            response = input_data.get('response')
+            answer = input_data.get('answer')
+            params_dict = input_data.get('params', {})
+
+            # Create params object
+            params = Params(**params_dict) if params_dict else Params()
+
+            # Call evaluation function
+            result = evaluation_function(response, answer, params)
+
+            # Convert result to dict
+            if hasattr(result, 'to_dict'):
+                response_data['result'] = result.to_dict()
+            elif isinstance(result, dict):
+                response_data['result'] = result
+            else:
+                response_data['result'] = {'is_correct': False, 'feedback': str(result)}
+
+        elif command == 'preview':
+            # Extract preview inputs
+            response = input_data.get('response')
+            params_dict = input_data.get('params', {})
+
+            params = Params(**params_dict) if params_dict else Params()
+
+            # Call preview function
+            result = preview_function(response, params)
+
+            if hasattr(result, 'to_dict'):
+                response_data['result'] = result.to_dict()
+            elif isinstance(result, dict):
+                response_data['result'] = result
+            else:
+                response_data['result'] = {'preview': str(result)}
+
+        else:
+            response_data['result'] = {
+                'is_correct': False,
+                'feedback': f'Unknown command: {command}'
+            }
+
+    except Exception as e:
+        response_data['result'] = {
+            'is_correct': False,
+            'feedback': f'Error processing request: {str(e)}'
+        }
+
+    # Write output to file
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(response_data, f, ensure_ascii=False)
+
+
 def main():
-    """Run the IPC server with the evaluation and preview functions.
     """
+    Run the evaluation function.
+
+    Detects communication mode based on command-line arguments:
+    - If 2+ args provided: File-based communication (last 2 args are input/output paths)
+    - Otherwise: RPC/IPC server mode using lf_toolkit
+    """
+    # Check for file-based communication
+    # shimmy passes input and output file paths as the last two arguments
+    if len(sys.argv) >= 3:
+        input_path = sys.argv[-2]
+        output_path = sys.argv[-1]
+
+        # Verify they look like file paths (basic check)
+        if not input_path.startswith('-') and not output_path.startswith('-'):
+            handle_file_based_communication(input_path, output_path)
+            return
+
+    # Fall back to RPC/IPC server mode
     server = create_server()
-
     server.eval(evaluation_function)
     server.preview(preview_function)
-
     run(server)
 
+
 if __name__ == "__main__":
     main()