Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 23 additions & 39 deletions evaluation_function/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,53 @@
from typing import Any
from lf_toolkit.evaluation import Result as LFResult, Params

# note: this file is a temperary workaround, if the frontend -> backend communication succeed, fix this file

from .schemas import FSA#, FSAFrontend
from lf_toolkit.evaluation import Result as LFResult
from .schemas import FSA
from .schemas.result import Result
from .correction import analyze_fsa_correction

# def evaluation_function(
# payload: Any
# ) -> LFResult:
# return LFResult(
# is_correct=False,
# feedback_items=[("error", f"{payload}")]
# )

def validate_fsa(value: str | dict) -> FSA:
if isinstance(value, str):
return FSA.model_validate_json(value)
return FSA.model_validate(value)

def evaluation_function(
response: Any,
answer: Any,
params: Params,
) -> LFResult:
def evaluation_function(payload: Any) -> LFResult:
"""
Evaluate a student's FSA response against the expected answer.

Args:
response: Student's FSA (dict with states, alphabet, transitions, etc.), since frontend constriants, this is FSAFrontend
answer: Expected FSA still, FSAFrontend for the same reason
params: Extra parameters (e.g., require_minimal)
payload: dict with keys 'response', 'answer', 'params' (front-end may wrap everything)

Returns:
LFResult with is_correct and feedback
LFResult
"""
try:
# Parse FSAs from input
# student_fsa_ = FSAFrontend.model_validate(response)
# expected_fsa_ = FSAFrontend.model_validate(answer)
# Extract response/answer from the payload
raw_response = payload.get("response") or payload.get("params", {}).get("response")
raw_answer = payload.get("answer") or payload.get("params", {}).get("answer")
params = payload.get("params", {})

# student_fsa = student_fsa_.from_flattened()
# expected_fsa = expected_fsa_.from_flattened()
if raw_response is None or raw_answer is None:
raise ValueError("Missing response or answer in payload")

# as a temporary workaround we assume the response and answer are all valid json strings
student_fsa = validate_fsa(response)
expected_fsa = validate_fsa(answer)
# Parse FSAs
student_fsa = validate_fsa(raw_response)
expected_fsa = validate_fsa(raw_answer)

require_minimal = params.get("require_minimal", False)


# Get require_minimal from params if present
require_minimal = params.get("require_minimal", False) if hasattr(params, "get") else False

# Run correction pipeline
# Run correction
result: Result = analyze_fsa_correction(student_fsa, expected_fsa, require_minimal)
# Convert to lf_toolkit Result

# Convert to LFResult
return LFResult(
is_correct=result.is_correct,
feedback_items=[("feedback", result.feedback)]
)

except Exception as e:
return LFResult(
is_correct=False,
feedback_items=[("error", f"Invalid FSA format: {str(e)}, received: \n\nresponse: {response}\n\n answer: {answer}, \n\nparams: {params}")]
feedback_items=[(
"error",
f"Invalid FSA format: {str(e)}\n\npayload received:\n{payload}"
)]
)
Loading