Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ ENV FUNCTION_COMMAND="python"
# Args to start the evaluation function with
ENV FUNCTION_ARGS="-m,evaluation_function.main"

# The transport to use for the RPC server
ENV FUNCTION_RPC_TRANSPORT="ipc"
# Use file-based communication interface instead of RPC
# This handles larger payloads better (shimmy writes input to file, reads output from file)
# shimmy will append input/output file paths as the last two arguments
ENV FUNCTION_INTERFACE="file"

ENV LOG_LEVEL="debug"
51 changes: 32 additions & 19 deletions evaluation_function/correction/correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ def _check_minimality(fsa: FSA) -> Tuple[bool, Optional[ValidationError]]:
try:
minimized = hopcroft_minimization(fsa)
if len(minimized.states) < len(fsa.states):
diff = len(fsa.states) - len(minimized.states)
return False, ValidationError(
message=f"FSA is not minimal: has {len(fsa.states)} states but can be reduced to {len(minimized.states)}",
message=f"Your FSA works correctly, but it's not minimal! You have {len(fsa.states)} states, but only {len(minimized.states)} are needed. You could remove {diff} state(s).",
code=ErrorCode.NOT_MINIMAL,
severity="error",
suggestion="Minimize your FSA by merging equivalent states"
suggestion="Look for states that behave identically (same transitions and acceptance) - these can be merged into one"
)
return True, None
except Exception:
Expand Down Expand Up @@ -69,9 +70,11 @@ def _build_feedback(
hints = [e.suggestion for e in all_errors if e.suggestion]
if structural_info:
if structural_info.unreachable_states:
hints.append("Consider removing unreachable states")
unreachable = ", ".join(structural_info.unreachable_states)
hints.append(f"Tip: States {{{unreachable}}} can't be reached from your start state - you might want to remove them or add transitions to them")
if structural_info.dead_states:
hints.append("Dead states can never lead to acceptance")
dead = ", ".join(structural_info.dead_states)
hints.append(f"Tip: States {{{dead}}} can never lead to acceptance - this might be intentional (trap states) or a bug")

# Build language comparison
language = LanguageComparison(are_equivalent=len(equivalence_errors) == 0)
Expand All @@ -92,17 +95,20 @@ def _summarize_errors(errors: List[ValidationError]) -> str:
for error in errors:
msg = error.message.lower()
if "alphabet" in msg:
error_types.add("alphabet mismatch")
elif "state" in msg and "count" in msg:
error_types.add("state count mismatch")
elif "accepting" in msg or "incorrectly marked" in msg:
error_types.add("acceptance error")
elif "transition" in msg:
error_types.add("transition error")
error_types.add("alphabet issue")
elif "states" in msg and ("many" in msg or "few" in msg or "needed" in msg):
error_types.add("incorrect number of states")
elif "accepting" in msg or "accept" in msg:
error_types.add("accepting states issue")
elif "transition" in msg or "reading" in msg:
error_types.add("transition issue")

if error_types:
return f"Languages differ: {', '.join(error_types)}"
return f"Languages differ: {len(errors)} issue(s)"
if len(error_types) == 1:
issue = list(error_types)[0]
return f"Almost there! Your FSA has an {issue}. Check the details below."
elif error_types:
return f"Your FSA doesn't quite match the expected language. Issues found: {', '.join(error_types)}"
return f"Your FSA doesn't accept the correct language. Found {len(errors)} issue(s) to fix."


# =============================================================================
Expand Down Expand Up @@ -134,7 +140,11 @@ def analyze_fsa_correction(
# Step 1: Validate student FSA structure
student_errors = is_valid_fsa(student_fsa)
if student_errors:
summary = "FSA has structural errors"
num_errors = len(student_errors)
if num_errors == 1:
summary = "Your FSA has a structural problem that needs to be fixed first. See the details below."
else:
summary = f"Your FSA has {num_errors} structural problems that need to be fixed first. See the details below."
return Result(
is_correct=False,
feedback=summary,
Expand All @@ -146,7 +156,7 @@ def analyze_fsa_correction(
if expected_errors:
return Result(
is_correct=False,
feedback="Internal error: expected FSA is invalid"
feedback="Oops! There's an issue with the expected answer. Please contact your instructor."
)

# Step 3: Check minimality if required
Expand All @@ -162,15 +172,18 @@ def analyze_fsa_correction(
equivalence_errors = fsas_accept_same_language(student_fsa, expected_fsa)

if not equivalence_errors and not validation_errors:
# Success message with some stats
state_count = len(student_fsa.states)
feedback = f"Correct! Your FSA with {state_count} state(s) accepts exactly the right language. Well done!"
return Result(
is_correct=True,
feedback="Correct! FSA accepts the expected language.",
fsa_feedback=_build_feedback("FSA is correct", [], [], structural_info)
feedback=feedback,
fsa_feedback=_build_feedback("Your FSA is correct!", [], [], structural_info)
)

# Build result with errors
is_correct = len(equivalence_errors) == 0 and len(validation_errors) == 0
summary = _summarize_errors(equivalence_errors) if equivalence_errors else "FSA has issues"
summary = _summarize_errors(equivalence_errors) if equivalence_errors else "Your FSA has some issues to address."

return Result(
is_correct=is_correct,
Expand Down
115 changes: 112 additions & 3 deletions evaluation_function/main.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,127 @@
"""
Main entry point for the FSA evaluation function.

Supports two communication modes with shimmy:
1. File-based (recommended for large payloads): shimmy passes input/output file paths as args
2. RPC/IPC (default): Uses lf_toolkit's server for stdio/IPC communication
"""

import sys
import json
from typing import Any, Dict

from lf_toolkit import create_server, run
from lf_toolkit.evaluation import Params, Result as LFResult

from .evaluation import evaluation_function
from .preview import preview_function


def handle_file_based_communication(input_path: str, output_path: str) -> None:
"""
Handle file-based communication with shimmy.

Reads input JSON from input_path, processes it, and writes result to output_path.
This is used when shimmy is configured with --interface file.

Args:
input_path: Path to the input JSON file
output_path: Path to write the output JSON file
"""
# Read input from file
with open(input_path, 'r', encoding='utf-8') as f:
input_data = json.load(f)

# Extract command and request data
command = input_data.get('command', 'eval')
request_id = input_data.get('$id')

# Build response structure
response_data: Dict[str, Any] = {}
if request_id is not None:
response_data['$id'] = request_id
response_data['command'] = command

try:
if command == 'eval':
# Extract evaluation inputs
response = input_data.get('response')
answer = input_data.get('answer')
params_dict = input_data.get('params', {})

# Create params object
params = Params(**params_dict) if params_dict else Params()

# Call evaluation function
result = evaluation_function(response, answer, params)

# Convert result to dict
if hasattr(result, 'to_dict'):
response_data['result'] = result.to_dict()
elif isinstance(result, dict):
response_data['result'] = result
else:
response_data['result'] = {'is_correct': False, 'feedback': str(result)}

elif command == 'preview':
# Extract preview inputs
response = input_data.get('response')
params_dict = input_data.get('params', {})

params = Params(**params_dict) if params_dict else Params()

# Call preview function
result = preview_function(response, params)

if hasattr(result, 'to_dict'):
response_data['result'] = result.to_dict()
elif isinstance(result, dict):
response_data['result'] = result
else:
response_data['result'] = {'preview': str(result)}

else:
response_data['result'] = {
'is_correct': False,
'feedback': f'Unknown command: {command}'
}

except Exception as e:
response_data['result'] = {
'is_correct': False,
'feedback': f'Error processing request: {str(e)}'
}

# Write output to file
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(response_data, f, ensure_ascii=False)


def main():
"""Run the IPC server with the evaluation and preview functions.
"""
Run the evaluation function.

Detects communication mode based on command-line arguments:
- If 2+ args provided: File-based communication (last 2 args are input/output paths)
- Otherwise: RPC/IPC server mode using lf_toolkit
"""
# Check for file-based communication
# shimmy passes input and output file paths as the last two arguments
if len(sys.argv) >= 3:
input_path = sys.argv[-2]
output_path = sys.argv[-1]

# Verify they look like file paths (basic check)
if not input_path.startswith('-') and not output_path.startswith('-'):
handle_file_based_communication(input_path, output_path)
return

# Fall back to RPC/IPC server mode
server = create_server()

server.eval(evaluation_function)
server.preview(preview_function)

run(server)


if __name__ == "__main__":
main()
Loading