google · tcconnally · Jun 15, 2026 · Jun 15, 2026 · Jun 16, 2026
diff --git a/src/google/adk/evaluation/final_response_match_v1.py b/src/google/adk/evaluation/final_response_match_v1.py
@@ -14,6 +14,7 @@
 
 from __future__ import annotations
 
+import re
 from typing import Optional
 
 from google.genai import types as genai_types
@@ -92,6 +93,29 @@ def _get_eval_status(score: float, threshold: float):
   return EvalStatus.PASSED if score >= threshold else EvalStatus.FAILED
 
 
+class _UnicodeTokenizer:
+  """Tokenizer that handles Unicode text with word-boundary awareness.
+
+  The default RougeScorer tokenizer splits on whitespace, which works for
+  ASCII and Latin-script text but produces zero tokens for text in scripts
+  without word boundaries (Chinese, Japanese, Thai, etc.).
+
+  For ASCII-majority text this tokenizer uses Unicode-aware word-character
+  matching (``\\w+`` in re). For non-ASCII text it falls back to whitespace
+  splitting, then character-level tokenization.
+  """
+
+  def tokenize(self, text: str) -> list[str]:
+    """Tokenizes text using Unicode-aware word boundaries."""
+    ascii_chars = sum(1 for c in text if ord(c) < 128)
+    if ascii_chars > len(text) * 0.5:
+      return re.findall(r'\w+', text.lower())
+    tokens = text.lower().split()
+    if tokens:
+      return tokens
+    return list(text.lower())
+
+
 def _calculate_rouge_1_scores(candidate: str, reference: str):
   """Calculates the ROUGE-1 score between a candidate and reference text.
 
@@ -110,7 +134,11 @@ def _calculate_rouge_1_scores(candidate: str, reference: str):
   Returns:
       A dictionary containing the ROUGE-1 precision, recall, and f-measure.
   """
-  scorer = rouge_scorer.RougeScorer(["rouge1"], use_stemmer=True)
+  scorer = rouge_scorer.RougeScorer(
+      ["rouge1"],
+      use_stemmer=True,
+      tokenizer=_UnicodeTokenizer(),
+  )
 
   # The score method returns a dictionary where keys are the ROUGE types
   # and values are Score objects (tuples) with precision, recall, and fmeasure.