Skip to content

Commit 99376c1

Browse files
committed
fix(evaluation): handle None inferences in LocalEvalService
When inference fails (e.g. MCP session drop, timeout, API error), _evaluate_single_inference_result() calls len(inference_result.inferences) without a None guard, causing TypeError. Return EvalStatus.NOT_EVALUATED early when inferences is None. Closes #6071
1 parent 22adbe1 commit 99376c1

2 files changed

Lines changed: 42 additions & 0 deletions

File tree

src/google/adk/evaluation/local_eval_service.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,17 @@ async def _evaluate_single_inference_result(
260260
f' {inference_result.eval_set_id}.'
261261
)
262262

263+
if inference_result.inferences is None:
264+
return inference_result, EvalCaseResult(
265+
eval_set_file=inference_result.eval_set_id,
266+
eval_set_id=inference_result.eval_set_id,
267+
eval_id=inference_result.eval_case_id,
268+
final_eval_status=EvalStatus.NOT_EVALUATED,
269+
overall_eval_metric_results=[],
270+
eval_metric_result_per_invocation=[],
271+
session_id=inference_result.session_id or "",
272+
)
273+
263274
# Metric results for each invocation
264275
eval_metric_result_per_invocation = []
265276

tests/unittests/evaluation/test_local_eval_service.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,37 @@ async def test_evaluate_single_inference_result(
465465
assert metric_result.eval_status == EvalStatus.PASSED
466466

467467

468+
@pytest.mark.asyncio
469+
async def test_evaluate_single_inference_result_inferences_none(
470+
eval_service, mock_eval_sets_manager, mocker
471+
):
472+
inference_result = InferenceResult(
473+
app_name="test_app",
474+
eval_set_id="test_eval_set",
475+
eval_case_id="case1",
476+
inferences=None,
477+
session_id="session1",
478+
)
479+
eval_metric = EvalMetric(metric_name="fake_metric", threshold=0.5)
480+
evaluate_config = EvaluateConfig(eval_metrics=[eval_metric], parallelism=1)
481+
482+
mock_eval_case = mocker.MagicMock(spec=EvalCase)
483+
mock_eval_case.conversation = []
484+
mock_eval_case.conversation_scenario = None
485+
mock_eval_case.session_input = None
486+
mock_eval_sets_manager.get_eval_case.return_value = mock_eval_case
487+
488+
_, result = await eval_service._evaluate_single_inference_result(
489+
inference_result=inference_result, evaluate_config=evaluate_config
490+
)
491+
492+
assert isinstance(result, EvalCaseResult)
493+
assert result.eval_id == "case1"
494+
assert result.final_eval_status == EvalStatus.NOT_EVALUATED
495+
assert result.overall_eval_metric_results == []
496+
assert result.eval_metric_result_per_invocation == []
497+
498+
468499
@pytest.mark.asyncio
469500
async def test_evaluate_single_inference_result_for_conversation_scenario(
470501
eval_service, mock_eval_sets_manager, mocker

0 commit comments

Comments
 (0)