Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/google/adk/evaluation/local_eval_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,17 @@ async def _evaluate_single_inference_result(
f' {inference_result.eval_set_id}.'
)

if inference_result.inferences is None:
return inference_result, EvalCaseResult(
eval_set_file=inference_result.eval_set_id,
eval_set_id=inference_result.eval_set_id,
eval_id=inference_result.eval_case_id,
final_eval_status=EvalStatus.NOT_EVALUATED,
overall_eval_metric_results=[],
eval_metric_result_per_invocation=[],
session_id=inference_result.session_id or "",
)

# Metric results for each invocation
eval_metric_result_per_invocation = []

Expand Down
31 changes: 31 additions & 0 deletions tests/unittests/evaluation/test_local_eval_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,37 @@ async def test_evaluate_single_inference_result(
assert metric_result.eval_status == EvalStatus.PASSED


@pytest.mark.asyncio
async def test_evaluate_single_inference_result_inferences_none(
eval_service, mock_eval_sets_manager, mocker
):
inference_result = InferenceResult(
app_name="test_app",
eval_set_id="test_eval_set",
eval_case_id="case1",
inferences=None,
session_id="session1",
)
eval_metric = EvalMetric(metric_name="fake_metric", threshold=0.5)
evaluate_config = EvaluateConfig(eval_metrics=[eval_metric], parallelism=1)

mock_eval_case = mocker.MagicMock(spec=EvalCase)
mock_eval_case.conversation = []
mock_eval_case.conversation_scenario = None
mock_eval_case.session_input = None
mock_eval_sets_manager.get_eval_case.return_value = mock_eval_case

_, result = await eval_service._evaluate_single_inference_result(
inference_result=inference_result, evaluate_config=evaluate_config
)

assert isinstance(result, EvalCaseResult)
assert result.eval_id == "case1"
assert result.final_eval_status == EvalStatus.NOT_EVALUATED
assert result.overall_eval_metric_results == []
assert result.eval_metric_result_per_invocation == []


@pytest.mark.asyncio
async def test_evaluate_single_inference_result_for_conversation_scenario(
eval_service, mock_eval_sets_manager, mocker
Expand Down