diff --git a/evaluation_function/evaluation.py b/evaluation_function/evaluation.py index 8467d79..7ed9066 100755 --- a/evaluation_function/evaluation.py +++ b/evaluation_function/evaluation.py @@ -1,69 +1,53 @@ from typing import Any -from lf_toolkit.evaluation import Result as LFResult, Params - -# note: this file is a temperary workaround, if the frontend -> backend communication succeed, fix this file - -from .schemas import FSA#, FSAFrontend +from lf_toolkit.evaluation import Result as LFResult +from .schemas import FSA from .schemas.result import Result from .correction import analyze_fsa_correction -# def evaluation_function( -# payload: Any -# ) -> LFResult: -# return LFResult( -# is_correct=False, -# feedback_items=[("error", f"{payload}")] -# ) - def validate_fsa(value: str | dict) -> FSA: if isinstance(value, str): return FSA.model_validate_json(value) return FSA.model_validate(value) -def evaluation_function( - response: Any, - answer: Any, - params: Params, -) -> LFResult: +def evaluation_function(payload: Any) -> LFResult: """ Evaluate a student's FSA response against the expected answer. Args: - response: Student's FSA (dict with states, alphabet, transitions, etc.), since frontend constriants, this is FSAFrontend - answer: Expected FSA still, FSAFrontend for the same reason - params: Extra parameters (e.g., require_minimal) + payload: dict with keys 'response', 'answer', 'params' (front-end may wrap everything) Returns: - LFResult with is_correct and feedback + LFResult """ try: - # Parse FSAs from input - # student_fsa_ = FSAFrontend.model_validate(response) - # expected_fsa_ = FSAFrontend.model_validate(answer) + # Extract response/answer from the payload + raw_response = payload.get("response") or payload.get("params", {}).get("response") + raw_answer = payload.get("answer") or payload.get("params", {}).get("answer") + params = payload.get("params", {}) - # student_fsa = student_fsa_.from_flattened() - # expected_fsa = expected_fsa_.from_flattened() + if raw_response is None or raw_answer is None: + raise ValueError("Missing response or answer in payload") - # as a temporary workaround we assume the response and answer are all valid json strings - student_fsa = validate_fsa(response) - expected_fsa = validate_fsa(answer) + # Parse FSAs + student_fsa = validate_fsa(raw_response) + expected_fsa = validate_fsa(raw_answer) + require_minimal = params.get("require_minimal", False) - - # Get require_minimal from params if present - require_minimal = params.get("require_minimal", False) if hasattr(params, "get") else False - - # Run correction pipeline + # Run correction result: Result = analyze_fsa_correction(student_fsa, expected_fsa, require_minimal) - - # Convert to lf_toolkit Result + + # Convert to LFResult return LFResult( is_correct=result.is_correct, feedback_items=[("feedback", result.feedback)] ) - + except Exception as e: return LFResult( is_correct=False, - feedback_items=[("error", f"Invalid FSA format: {str(e)}, received: \n\nresponse: {response}\n\n answer: {answer}, \n\nparams: {params}")] + feedback_items=[( + "error", + f"Invalid FSA format: {str(e)}\n\npayload received:\n{payload}" + )] )