openshift · fao89 · Jun 18, 2026
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 # Put targets here if there is a risk that a target name might conflict with a filename.
 # this list is probably overkill right now.
 # See: https://www.gnu.org/software/make/manual/html_node/Phony-Targets.html
-.PHONY: test test-unit test-e2e test-eval test-lseval-periodic images run format verify
+.PHONY: test test-unit test-e2e test-eval test-lseval-periodic test-cluster-updates images run format verify
 
 export PATH := $(HOME)/.local/bin:$(PATH)
 
@@ -100,6 +100,12 @@ test-lseval-periodic: ## Run LSEval periodic evaluation (full 797-question datas
 	uv run --extra lseval --extra evaluation pytest tests/e2e/evaluation -vv -s --durations=0 -o junit_suite_name="${SUITE_ID}" --junit-prefix="${SUITE_ID}" --junit-xml="${ARTIFACT_DIR}/junit_e2e_${SUITE_ID}.xml" \
 	--eval_out_dir ${ARTIFACT_DIR} -m lseval
 
+test-cluster-updates: ## Run cluster-updates evaluation (18 conversations, 35 evaluations) - requires running OLS server with OpenAI keys
+	@echo "Running cluster-updates evaluation..."
+	@echo "Reports will be written to ${ARTIFACT_DIR}"
+	uv run --extra lseval --extra evaluation pytest tests/e2e/evaluation -vv -s --durations=0 -o junit_suite_name="${SUITE_ID}" --junit-prefix="${SUITE_ID}" --junit-xml="${ARTIFACT_DIR}/junit_e2e_${SUITE_ID}.xml" \
+	--eval_out_dir ${ARTIFACT_DIR} -m cluster_updates
+
 coverage-report:	unit-tests-coverage-report integration-tests-coverage-report ## Export coverage reports into interactive HTML
 
 unit-tests-coverage-report:	test-unit ## Export unit test coverage report into interactive HTML

diff --git a/eval/README.md b/eval/README.md
@@ -50,17 +50,71 @@ lightspeed-eval --system-config system.yaml --eval-data eval_data_short.yaml --o
 
 # Full evaluation (797 questions)
 lightspeed-eval --system-config system.yaml --eval-data eval_data.yaml --output-dir ./results
+
+# Cluster-updates evaluation (17 conversations, 18 test turns) - uses optimized config
+lightspeed-eval --system-config system_cluster_updates.yaml \
+                --eval-data eval_data_cluster_updates.yaml \
+                --output-dir ./results
+
+# Run specific cluster-updates test category (e.g., critical tests)
+lightspeed-eval --system-config system_cluster_updates.yaml \
+                --eval-data eval_data_cluster_updates.yaml \
+                --tags cluster-updates-critical \
+                --output-dir ./results
 ```
 
 ## What's Included
 
 ### Datasets
-- **`eval_data_short.yaml`**: 10 conversations
-- **`eval_data.yaml`**: 797 conversations
-
-### Configuration
-- **`system.yaml`**: Pre-configured for OLS at `localhost:8080`
-- **Default metrics**: answer correctness
+- **`eval_data_short.yaml`**: 10 conversations (quick smoke test)
+- **`eval_data.yaml`**: 797 general OpenShift knowledge questions (conv_001-797)
+- **`eval_data_cluster_updates.yaml`**: 17 cluster-updates test conversations (conv_798-814, 18 test turns)
+
+### Test Categories (by tag)
+- **cluster-updates-scenarios**: Comprehensive health assessment with extensive constraints (conv_798-802, 5 conversations)
+- **cluster-updates-critical**: Condition status interpretation - MUST pass 100% (conv_803)
+- **cluster-updates-format**: Output format compliance (Summary + TL;DR) (conv_804)
+- **cluster-updates-blockers**: Admin-ack gates and upgrade blockers (conv_805)
+- **cluster-updates-risks**: Conditional update risk analysis (conv_806)
+- **cluster-updates-path**: Upgrade path validation (conv_807)
+- **cluster-updates-troubleshoot**: Upgrade failure diagnosis and remediation (conv_808)
+- **cluster-updates-conversation**: Multi-turn conversation handling (conv_809, 2 turns)
+- **cluster-updates-no-updates**: Cluster at latest version scenarios (conv_810)
+- **cluster-updates-channels**: Update channel understanding (conv_811)
+- **cluster-updates-mcp**: MachineConfigPool upgrade behavior (conv_812)
+- **cluster-updates-pdb**: PodDisruptionBudget impact on upgrades (conv_813)
+- **cluster-updates-eus**: Extended Update Support (EUS) upgrade paths (conv_814)
+- **cluster-updates-troubleshoot**: Failure diagnosis with error extraction (conv_808)
+- **cluster-updates-conversation**: Multi-turn conversations (conv_809, 2 turns)
+- **cluster-updates-no-updates**: Handling clusters at latest version (conv_810)
+- **cluster-updates-channels**: Update channel understanding (conv_811)
+- **cluster-updates-mcp**: MachineConfigPool upgrade behavior (conv_812)
+- **cluster-updates-pdb**: PodDisruptionBudget impact on upgrades (conv_813)
+- **cluster-updates-eus**: EUS (Extended Update Support) upgrades (conv_814)
+
+### Configuration Files
+
+Two configuration files are available depending on your use case:
+
+#### `system.yaml` - Default Configuration
+- **Use for:** General OpenShift knowledge evaluation (conv_001-797)
+- **API Base:** `http://localhost:8080` (local development)
+- **Max Tokens:** 512 (standard responses)
+- **API Provider:** `openai`
+- **Metrics:** All standard metrics available (Ragas, DeepEval, custom)
+
+#### `system_cluster_updates.yaml` - Cluster-Updates Optimized
+- **Use for:** Cluster-updates evaluation (conv_798-814)
+- **API Base:** `http://localhost:8080` (same as default)
+- **Max Tokens:** 2048 (detailed cluster analysis - 4x larger for complex responses)
+- **API Provider:** `openai` (cluster-specific configuration)
+- **Output Directory:** `./results` (organized test output)
+- **Available Metrics:**
+  - `custom:answer_correctness` - Basic correctness evaluation
+  - `geval:condition_status_accuracy` - Kubernetes condition interpretation (threshold: 1.0 - CRITICAL!)
+  - `geval:output_format_compliance` - Response format validation (threshold: 0.95)
+  - `geval:technical_accuracy` - OpenShift/Kubernetes domain knowledge (threshold: 0.85)
+  - `geval:actionable_guidance` - Specific remediation steps (threshold: 0.7)
 
 
 ## Results
@@ -72,4 +126,4 @@ Results are saved in output directories:
 
 
 ## Data & Eval system setup
-Refer [Lightspeed Evaluation tool](https://github.com/lightspeed-core/lightspeed-evaluation#readme)
+Refer [Lightspeed Evaluation tool](https://github.com/lightspeed-core/lightspeed-evaluation#readme)