diff --git a/Makefile b/Makefile index 7f90924..009c5a5 100755 --- a/Makefile +++ b/Makefile @@ -219,7 +219,7 @@ test-helm: ## Test Helm charts (lint, template, validate) helm template test-release $(HELM_CHART_DIR)/ \ --set config.resourceType=nodepools \ --set config.pollInterval=10s \ - --set config.maxAgeReady=1h > /dev/null + --set config.conditions.rules[0].maxAge=1h > /dev/null @echo "Custom resource selector template OK" @echo "" @echo "All Helm chart tests passed!" diff --git a/README.md b/README.md index 5ad85c7..cd98079 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ Create a configuration file based on the examples in the `configs/` directory: | Field | Type | Default | Description | |-------|------|---------|-------------| | `poll_interval` | duration | `5s` | How often to poll the API for resource updates | -| `max_age_not_ready` | duration | `10s` | Max age interval for resources not ready | -| `max_age_ready` | duration | `30m` | Max age interval for ready resources | +| `conditions.reference_time` | string | `conditionTime(resource, "Ready")` | CEL expression for reference timestamp | +| `conditions.rules` | array | See below | CEL expression rules with name, expression, and max_age | | `hyperfleet_api.timeout` | duration | `5s` | Request timeout for API calls | | `resource_selector` | array | `[]` | Label selectors for filtering resources (enables sharding) | | `message_data` | map | `{}` | Template fields for CloudEvents data payload | @@ -229,8 +229,16 @@ This uses all defaults. Broker configuration is managed via `broker.yaml` or env ```yaml resource_type: clusters poll_interval: 5s -max_age_not_ready: 10s -max_age_ready: 30m + +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 30m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 10s resource_selector: - label: shard diff --git a/charts/README.md b/charts/README.md index af794aa..fad2166 100644 --- a/charts/README.md +++ b/charts/README.md @@ -80,8 +80,8 @@ The following table lists the configurable parameters of the Sentinel chart and |-----------|-------------|---------| | `config.resourceType` | Resource type to watch | `clusters` | | `config.pollInterval` | Polling interval | `5s` | -| `config.maxAgeNotReady` | Max age for not ready resources | `10s` | -| `config.maxAgeReady` | Max age for ready resources | `30m` | +| `config.conditions.referenceTime` | CEL expression for reference timestamp | `conditionTime(resource, "Ready")` | +| `config.conditions.rules` | CEL expression rules with name, expression, and max_age | See values.yaml | | `config.resourceSelector` | Resource selector for sharding | See values.yaml | | `config.hyperfleetApi.baseUrl` | HyperFleet API base URL | `http://hyperfleet-api:8000` | | `config.hyperfleetApi.timeout` | API timeout | `5s` | diff --git a/charts/templates/configmap.yaml b/charts/templates/configmap.yaml index 5efa0a3..355a1a6 100644 --- a/charts/templates/configmap.yaml +++ b/charts/templates/configmap.yaml @@ -9,8 +9,14 @@ data: # Sentinel configuration resource_type: {{ .Values.config.resourceType }} poll_interval: {{ .Values.config.pollInterval }} - max_age_not_ready: {{ .Values.config.maxAgeNotReady }} - max_age_ready: {{ .Values.config.maxAgeReady }} + conditions: + reference_time: {{ .Values.config.conditions.referenceTime | quote }} + rules: + {{- range .Values.config.conditions.rules }} + - name: {{ .name | quote }} + expression: {{ .expression | quote }} + max_age: {{ .maxAge }} + {{- end }} {{- if .Values.config.resourceSelector }} # Resource selector for horizontal sharding diff --git a/charts/values.yaml b/charts/values.yaml index 8d7c0ad..854629a 100644 --- a/charts/values.yaml +++ b/charts/values.yaml @@ -81,11 +81,20 @@ config: # How often to poll the API for resource updates pollInterval: 5s - # Max age interval for resources not ready - maxAgeNotReady: 10s - - # Max age interval for ready resources - maxAgeReady: 30m + # Condition rules for decision making using CEL expressions + # Available CEL helper functions: + # condition(resource, type) → map - returns full condition map + # status(resource, type) → string - returns condition status + # conditionTime(resource, type) → string - returns last_updated_time (RFC3339) + conditions: + referenceTime: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + maxAge: 30m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + maxAge: 10s # Resource selector for horizontal sharding # Deploy multiple Sentinel instances with different shard values diff --git a/cmd/sentinel/main.go b/cmd/sentinel/main.go index 3149d4c..31f32eb 100755 --- a/cmd/sentinel/main.go +++ b/cmd/sentinel/main.go @@ -171,7 +171,12 @@ func runServe(cfg *config.SentinelConfig, logCfg *logger.LogConfig, healthBindAd return fmt.Errorf("failed to initialize OpenAPI client: %w", err) } - decisionEngine := engine.NewDecisionEngine(cfg.MaxAgeNotReady, cfg.MaxAgeReady) + compiledConditions, err := engine.CompileConditions(cfg.Conditions) + if err != nil { + log.Errorf(ctx, "Failed to compile condition expressions: %v", err) + return fmt.Errorf("failed to compile condition expressions: %w", err) + } + decisionEngine := engine.NewDecisionEngine(compiledConditions) // Initialize broker metrics recorder // Broker metrics (messages_published_total, errors_total, etc.) are registered diff --git a/configs/dev-example.yaml b/configs/dev-example.yaml index 5111b3d..00e9d84 100644 --- a/configs/dev-example.yaml +++ b/configs/dev-example.yaml @@ -22,9 +22,22 @@ resource_type: clusters # Faster polling for development - see changes quickly. poll_interval: 2s -# Shorter max age intervals for development. -max_age_not_ready: 5s -max_age_ready: 2m +# Condition rules for development - shorter intervals for faster feedback. +# Uses CEL expressions for flexible condition matching. +# +# Available CEL helper functions: +# condition(resource, type) → map - returns full condition map +# status(resource, type) → string - returns condition status +# conditionTime(resource, type) → string - returns last_updated_time (RFC3339) +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 2m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 5s # No resource selector - watch all resources in development. # resource_selector: [] diff --git a/configs/gcp-pubsub-example.yaml b/configs/gcp-pubsub-example.yaml index 2ebf6fc..d8570dc 100644 --- a/configs/gcp-pubsub-example.yaml +++ b/configs/gcp-pubsub-example.yaml @@ -18,13 +18,22 @@ resource_type: clusters # Accepts Go duration format: ns, us/µs, ms, s, m, h. poll_interval: 5s -# Max age interval for resources that are not ready. -# Resources in transitional states are re-checked more frequently. -max_age_not_ready: 10s - -# Max age interval for resources that are ready and stable. -# Stable resources are checked less frequently to reduce API load. -max_age_ready: 30m +# Condition rules for decision making. +# Uses CEL expressions for flexible condition matching. +# +# Available CEL helper functions: +# condition(resource, type) → map - returns full condition map +# status(resource, type) → string - returns condition status +# conditionTime(resource, type) → string - returns last_updated_time (RFC3339) +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 30m # Stable resources checked less frequently + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 10s # Transitional resources re-checked more frequently # Resource selector (optional) - filter resources by labels. # If empty or omitted, all resources of the specified type are watched. diff --git a/configs/rabbitmq-example.yaml b/configs/rabbitmq-example.yaml index d194330..04dae7a 100644 --- a/configs/rabbitmq-example.yaml +++ b/configs/rabbitmq-example.yaml @@ -18,13 +18,22 @@ resource_type: clusters # Accepts Go duration format: ns, us/µs, ms, s, m, h. poll_interval: 5s -# Max age interval for resources that are not ready. -# Resources in transitional states are re-checked more frequently. -max_age_not_ready: 10s - -# Max age interval for resources that are ready and stable. -# Stable resources are checked less frequently to reduce API load. -max_age_ready: 30m +# Condition rules for decision making. +# Uses CEL expressions for flexible condition matching. +# +# Available CEL helper functions: +# condition(resource, type) → map - returns full condition map +# status(resource, type) → string - returns condition status +# conditionTime(resource, type) → string - returns last_updated_time (RFC3339) +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 30m # Stable resources checked less frequently + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 10s # Transitional resources re-checked more frequently # Resource selector (optional) - filter resources by labels. # If empty or omitted, all resources of the specified type are watched. diff --git a/docs/sentinel-operator-guide.md b/docs/sentinel-operator-guide.md index 0cdb0f4..7e7b1eb 100644 --- a/docs/sentinel-operator-guide.md +++ b/docs/sentinel-operator-guide.md @@ -139,7 +139,7 @@ Time-based reconciliation ensures **eventual consistency** by publishing events **How It Works:** -Sentinel uses two configurable max age intervals based on the resource's status (`Ready` condition): +Sentinel uses configurable **CEL expressions** to evaluate conditions and determine max age intervals. The default configuration uses the `Ready` condition: | Resource State | Default Interval | Rationale | |----------------|------------------|----------------------------------------------------------------------------------------| @@ -150,13 +150,14 @@ Sentinel uses two configurable max age intervals based on the resource's status When the resource's `generation` matches the `Ready` condition's `ObservedGeneration` (indicating the condition reflects the current state), Sentinel checks if enough time has elapsed: -1. Calculate reference timestamp: - - If `status.last_updated` exists → use it (adapter has processed resource) - - Otherwise → use `created_time` (new resource never processed) +1. Calculate reference timestamp using the `reference_time` CEL expression: + - Default: `conditionTime(resource, "Ready")` returns the condition's `last_updated_time` + - If evaluation fails or returns zero → falls back to `created_time` -2. Determine max age interval: - - If resource is ready (`Ready` condition status == True) → use `max_age_ready` (default: 30m) - - If resource is not ready (`Ready` condition status == False) → use `max_age_not_ready` (default: 10s) +2. Determine max age interval using `conditions.rules`: + - Evaluate each rule's CEL `expression` in order against the resource + - First matching rule's `max_age` is used + - If no rule matches, uses the smallest `max_age` (most conservative fallback) 3. Calculate next event time: ```text @@ -171,13 +172,13 @@ When the resource's `generation` matches the `Ready` condition's `ObservedGenera ```mermaid graph TD - A[Determine Reference Time] --> B{last_updated exists?} - B -->|Yes| C[Use last_updated] - B -->|No| D[Use created_time] - C --> E{Resource Ready?} + A[Evaluate reference_time CEL] --> B{Evaluation succeeded?} + B -->|Yes| C[Use result as reference time] + B -->|No| D[Use created_time as fallback] + C --> E{Evaluate rules in order} D --> E - E -->|Yes| F[Max Age = 30m] - E -->|No| G[Max Age = 10s] + E -->|First match| F[Use matching rule's max_age] + E -->|No match| G[Use smallest max_age as fallback] F --> H{now >= reference + max_age?} G --> H H -->|Yes| I[Publish: max age exceeded] @@ -273,10 +274,19 @@ Sentinel uses YAML-based configuration with environment variable overrides for s # Required: Resource type to watch resource_type: clusters -# Optional: Polling and age intervals +# Optional: Polling interval poll_interval: 5s -max_age_not_ready: 10s -max_age_ready: 30m + +# Optional: Condition rules for decision making (CEL expressions) +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 30m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 10s # Optional: Resource filtering resource_selector: @@ -318,8 +328,8 @@ These fields have sensible defaults and can be omitted: | Field | Type | Default | Description | |-------|------|---------|-------------| | `poll_interval` | duration | `5s` | How often to poll the API for resource updates | -| `max_age_not_ready` | duration | `10s` | Max age interval for not-ready resources | -| `max_age_ready` | duration | `30m` | Max age interval for ready resources | +| `conditions.reference_time` | string | `conditionTime(resource, "Ready")` | CEL expression for reference timestamp | +| `conditions.rules` | array | See defaults | CEL expression rules with name, expression, and max_age | | `hyperfleet_api.timeout` | duration | `5s` | Request timeout for API calls | | `resource_selector` | array | `[]` | Label selectors for filtering (empty = all resources) | | `message_data` | map | `{}` | CEL expressions for CloudEvents payload | @@ -563,8 +573,7 @@ Follow this checklist to ensure successful Sentinel deployment and operation. - [ ] Review and adjust polling intervals: - `poll_interval` - How often to poll the HyperFleet API (default: `5s`) - - `max_age_not_ready` - Reconciliation interval for not-ready resources (default: `10s`) - - `max_age_ready` - Reconciliation interval for ready resources (default: `30m`) + - `conditions` - CEL-based condition rules (default: Ready condition, True→30m, False→10s) - Reference: [Optional Fields](#optional-fields) **Design CloudEvents Payload** @@ -678,7 +687,7 @@ For detailed deployment guidance, see [docs/running-sentinel.md](running-sentine | Symptom | Likely Cause | Solution | |------------------------------------------------------------------------------------------------------------------------------|--------------|----------| | **Events not published, resources not found** | Resource selector mismatch | Verify `resource_selector` matches resource labels. Empty selector watches ALL resources. Check logs: `kubectl logs -n hyperfleet-system -l app.kubernetes.io/name=sentinel` | -| **Events not published, resources found but skipped** | Max age not exceeded | Normal behavior. Events publish when `generation > observed_generation` OR max age interval elapsed (`max_age_ready`: 30m, `max_age_not_ready`: 10s). | +| **Events not published, resources found but skipped** | Max age not exceeded | Normal behavior. Events publish when `generation > observed_generation` OR max age interval elapsed (configured via `conditions`). | | **API connection errors, DNS lookup fails** | Wrong service name or namespace | Verify endpoint format: `http://..svc.cluster.local:8080`. Check API is running: `kubectl get pods -n hyperfleet-system -l app=hyperfleet-api` | | **API returns 401 Unauthorized** | Missing authentication | Add auth headers to `hyperfleet_api` config if API requires authentication. | | **API returns 404 Not Found** | Wrong API version in path | Verify endpoint uses correct API version: `/api/v1/clusters` or `/api/hyperfleet/v1/clusters` | diff --git a/internal/config/config.go b/internal/config/config.go index 3aae158..b397e7f 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -20,12 +20,24 @@ type LabelSelector struct { // LabelSelectorList is a list of label selectors type LabelSelectorList []LabelSelector +// ConditionRule defines a CEL expression that, when matched, determines the max age for a resource +type ConditionRule struct { + Name string `mapstructure:"name"` + Expression string `mapstructure:"expression"` + MaxAge time.Duration `mapstructure:"max_age"` +} + +// Conditions configures CEL-based condition evaluation for the decision engine +type Conditions struct { + ReferenceTime string `mapstructure:"reference_time"` + Rules []ConditionRule `mapstructure:"rules"` +} + // SentinelConfig represents the Sentinel configuration type SentinelConfig struct { ResourceType string `mapstructure:"resource_type"` PollInterval time.Duration `mapstructure:"poll_interval"` - MaxAgeNotReady time.Duration `mapstructure:"max_age_not_ready"` - MaxAgeReady time.Duration `mapstructure:"max_age_ready"` + Conditions Conditions `mapstructure:"conditions"` ResourceSelector LabelSelectorList `mapstructure:"resource_selector"` HyperFleetAPI *HyperFleetAPIConfig `mapstructure:"hyperfleet_api"` MessageData map[string]interface{} `mapstructure:"message_data"` @@ -57,9 +69,14 @@ func (ls LabelSelectorList) ToMap() map[string]string { func NewSentinelConfig() *SentinelConfig { return &SentinelConfig{ // ResourceType is required and must be set in config file - PollInterval: 5 * time.Second, - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + PollInterval: 5 * time.Second, + Conditions: Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + }, ResourceSelector: []LabelSelector{}, // Empty means watch all resources HyperFleetAPI: &HyperFleetAPIConfig{ // Endpoint is required and must be set in config file @@ -138,12 +155,24 @@ func (c *SentinelConfig) Validate() error { return fmt.Errorf("poll_interval must be positive") } - if c.MaxAgeNotReady <= 0 { - return fmt.Errorf("max_age_not_ready must be positive") + if strings.TrimSpace(c.Conditions.ReferenceTime) == "" { + return fmt.Errorf("conditions.reference_time is required") } - if c.MaxAgeReady <= 0 { - return fmt.Errorf("max_age_ready must be positive") + if len(c.Conditions.Rules) == 0 { + return fmt.Errorf("conditions.rules must have at least one rule") + } + + for i, rule := range c.Conditions.Rules { + if strings.TrimSpace(rule.Name) == "" { + return fmt.Errorf("conditions.rules[%d].name is required", i) + } + if strings.TrimSpace(rule.Expression) == "" { + return fmt.Errorf("conditions.rules[%d].expression is required", i) + } + if rule.MaxAge <= 0 { + return fmt.Errorf("conditions.rules[%d].max_age must be positive", i) + } } if c.MessageData == nil { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 83b1549..1d01101 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -41,11 +41,19 @@ func TestLoadConfig_ValidComplete(t *testing.T) { if cfg.PollInterval != 5*time.Second { t.Errorf("Expected poll_interval 5s, got %v", cfg.PollInterval) } - if cfg.MaxAgeNotReady != 10*time.Second { - t.Errorf("Expected max_age_not_ready 10s, got %v", cfg.MaxAgeNotReady) + + // Verify conditions + if cfg.Conditions.ReferenceTime != `conditionTime(resource, "Ready")` { + t.Errorf("Expected reference_time 'conditionTime(resource, \"Ready\")', got '%s'", cfg.Conditions.ReferenceTime) + } + if len(cfg.Conditions.Rules) != 2 { + t.Fatalf("Expected 2 condition rules, got %d", len(cfg.Conditions.Rules)) } - if cfg.MaxAgeReady != 30*time.Minute { - t.Errorf("Expected max_age_ready 30m, got %v", cfg.MaxAgeReady) + if cfg.Conditions.Rules[0].Name != "isReady" || cfg.Conditions.Rules[0].MaxAge != 30*time.Minute { + t.Errorf("Expected first rule isReady/30m, got %s/%v", cfg.Conditions.Rules[0].Name, cfg.Conditions.Rules[0].MaxAge) + } + if cfg.Conditions.Rules[1].Name != "isNotReady" || cfg.Conditions.Rules[1].MaxAge != 10*time.Second { + t.Errorf("Expected second rule isNotReady/10s, got %s/%v", cfg.Conditions.Rules[1].Name, cfg.Conditions.Rules[1].MaxAge) } // Verify resource selector @@ -86,11 +94,12 @@ func TestLoadConfig_Minimal(t *testing.T) { if cfg.PollInterval != 5*time.Second { t.Errorf("Expected default poll_interval 5s, got %v", cfg.PollInterval) } - if cfg.MaxAgeNotReady != 10*time.Second { - t.Errorf("Expected default max_age_not_ready 10s, got %v", cfg.MaxAgeNotReady) + // Verify default conditions + if cfg.Conditions.ReferenceTime != `conditionTime(resource, "Ready")` { + t.Errorf("Expected default reference_time, got '%s'", cfg.Conditions.ReferenceTime) } - if cfg.MaxAgeReady != 30*time.Minute { - t.Errorf("Expected default max_age_ready 30m, got %v", cfg.MaxAgeReady) + if len(cfg.Conditions.Rules) != 2 { + t.Fatalf("Expected 2 default condition rules, got %d", len(cfg.Conditions.Rules)) } } @@ -139,14 +148,21 @@ func TestNewSentinelConfig_Defaults(t *testing.T) { if cfg.PollInterval != 5*time.Second { t.Errorf("Expected default poll_interval 5s, got %v", cfg.PollInterval) } - if cfg.MaxAgeNotReady != 10*time.Second { - t.Errorf("Expected default max_age_not_ready 10s, got %v", cfg.MaxAgeNotReady) + // Verify default conditions + if cfg.Conditions.ReferenceTime != `conditionTime(resource, "Ready")` { + t.Errorf("Expected default reference_time, got '%s'", cfg.Conditions.ReferenceTime) + } + if len(cfg.Conditions.Rules) != 2 { + t.Fatalf("Expected 2 default condition rules, got %d", len(cfg.Conditions.Rules)) } - if cfg.MaxAgeReady != 30*time.Minute { - t.Errorf("Expected default max_age_ready 30m, got %v", cfg.MaxAgeReady) + if cfg.Conditions.Rules[0].Name != "isReady" || cfg.Conditions.Rules[0].MaxAge != 30*time.Minute { + t.Errorf("Expected first default rule isReady/30m, got %s/%v", cfg.Conditions.Rules[0].Name, cfg.Conditions.Rules[0].MaxAge) + } + if cfg.Conditions.Rules[1].Name != "isNotReady" || cfg.Conditions.Rules[1].MaxAge != 10*time.Second { + t.Errorf("Expected second default rule isNotReady/10s, got %s/%v", cfg.Conditions.Rules[1].Name, cfg.Conditions.Rules[1].MaxAge) } if cfg.HyperFleetAPI.Timeout != 5*time.Second { - t.Errorf("Expected default timeout 30s, got %v", cfg.HyperFleetAPI.Timeout) + t.Errorf("Expected default timeout 5s, got %v", cfg.HyperFleetAPI.Timeout) } // Endpoint has no default - must be set in config file if cfg.HyperFleetAPI.Endpoint != "" { @@ -258,15 +274,15 @@ func TestValidate_NegativeDurations(t *testing.T) { }, }, { - name: "negative max_age_not_ready", + name: "negative conditions max_age", modifier: func(c *SentinelConfig) { - c.MaxAgeNotReady = -10 * time.Second + c.Conditions.Rules[0].MaxAge = -10 * time.Second }, }, { - name: "zero max_age_ready", + name: "zero conditions max_age", modifier: func(c *SentinelConfig) { - c.MaxAgeReady = 0 + c.Conditions.Rules[0].MaxAge = 0 }, }, } @@ -274,7 +290,9 @@ func TestValidate_NegativeDurations(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { cfg := NewSentinelConfig() + cfg.ResourceType = "clusters" cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.MessageData = map[string]interface{}{"id": "resource.id"} tt.modifier(cfg) err := cfg.Validate() @@ -285,6 +303,78 @@ func TestValidate_NegativeDurations(t *testing.T) { } } +// ============================================================================ +// Conditions Validation Tests +// ============================================================================ + +func TestValidate_MissingReferenceTime(t *testing.T) { + cfg := NewSentinelConfig() + cfg.ResourceType = "clusters" + cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.MessageData = map[string]interface{}{"id": "resource.id"} + cfg.Conditions.ReferenceTime = "" + + err := cfg.Validate() + if err == nil { + t.Fatal("Expected error for missing reference_time, got nil") + } + if err.Error() != "conditions.reference_time is required" { + t.Errorf("Expected 'conditions.reference_time is required' error, got: %v", err) + } +} + +func TestValidate_EmptyConditionRules(t *testing.T) { + cfg := NewSentinelConfig() + cfg.ResourceType = "clusters" + cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.MessageData = map[string]interface{}{"id": "resource.id"} + cfg.Conditions.Rules = []ConditionRule{} + + err := cfg.Validate() + if err == nil { + t.Fatal("Expected error for empty rules, got nil") + } + if err.Error() != "conditions.rules must have at least one rule" { + t.Errorf("Expected 'conditions.rules must have at least one rule' error, got: %v", err) + } +} + +func TestValidate_EmptyRuleName(t *testing.T) { + cfg := NewSentinelConfig() + cfg.ResourceType = "clusters" + cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.MessageData = map[string]interface{}{"id": "resource.id"} + cfg.Conditions.Rules = []ConditionRule{ + {Name: "", Expression: `status(resource, "Ready") == "True"`, MaxAge: 10 * time.Second}, + } + + err := cfg.Validate() + if err == nil { + t.Fatal("Expected error for empty rule name, got nil") + } + if !strings.Contains(err.Error(), "name is required") { + t.Errorf("Expected error about name, got: %v", err) + } +} + +func TestValidate_EmptyRuleExpression(t *testing.T) { + cfg := NewSentinelConfig() + cfg.ResourceType = "clusters" + cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.MessageData = map[string]interface{}{"id": "resource.id"} + cfg.Conditions.Rules = []ConditionRule{ + {Name: "test", Expression: "", MaxAge: 10 * time.Second}, + } + + err := cfg.Validate() + if err == nil { + t.Fatal("Expected error for empty rule expression, got nil") + } + if !strings.Contains(err.Error(), "expression is required") { + t.Errorf("Expected error about expression, got: %v", err) + } +} + // ============================================================================ // Label Selector Tests // ============================================================================ diff --git a/internal/config/testdata/full-workflow.yaml b/internal/config/testdata/full-workflow.yaml index ce8c07b..ad2762d 100644 --- a/internal/config/testdata/full-workflow.yaml +++ b/internal/config/testdata/full-workflow.yaml @@ -1,7 +1,15 @@ resource_type: nodepools poll_interval: 3s -max_age_not_ready: 5s -max_age_ready: 15m + +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 15m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 5s resource_selector: - label: cluster-id diff --git a/internal/config/testdata/message-data-blank-id.yaml b/internal/config/testdata/message-data-blank-id.yaml index 7b9c69a..aeb6c01 100644 --- a/internal/config/testdata/message-data-blank-id.yaml +++ b/internal/config/testdata/message-data-blank-id.yaml @@ -2,8 +2,17 @@ # it has a nil in the message_data.id property resource_type: clusters poll_interval: 2s -max_age_not_ready: 5s -max_age_ready: 2m + +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 2m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 5s + hyperfleet_api: endpoint: http://localhost:8000 timeout: 10s diff --git a/internal/config/testdata/valid-complete.yaml b/internal/config/testdata/valid-complete.yaml index 62dee5f..f06699c 100644 --- a/internal/config/testdata/valid-complete.yaml +++ b/internal/config/testdata/valid-complete.yaml @@ -1,7 +1,15 @@ resource_type: clusters poll_interval: 5s -max_age_not_ready: 10s -max_age_ready: 30m + +conditions: + reference_time: 'conditionTime(resource, "Ready")' + rules: + - name: isReady + expression: 'status(resource, "Ready") == "True"' + max_age: 30m + - name: isNotReady + expression: 'status(resource, "Ready") == "False"' + max_age: 10s resource_selector: - label: region diff --git a/internal/engine/cel.go b/internal/engine/cel.go new file mode 100644 index 0000000..92f8437 --- /dev/null +++ b/internal/engine/cel.go @@ -0,0 +1,286 @@ +package engine + +import ( + "fmt" + "time" + + "github.com/google/cel-go/cel" + "github.com/google/cel-go/common/types" + "github.com/google/cel-go/common/types/ref" + "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/config" +) + +// compiledRule holds a pre-compiled CEL program for a condition rule +type compiledRule struct { + name string + program cel.Program + maxAge time.Duration +} + +// CompiledConditions holds pre-compiled CEL programs for the condition evaluation +type CompiledConditions struct { + referenceTime cel.Program + rules []compiledRule + minMaxAge time.Duration +} + +// newConditionsCELEnv creates a CEL environment with custom functions for condition evaluation. +// +// Variables: +// - resource (DynType) - the resource map +// +// Functions: +// - condition(resource, type) → map - returns the full condition map for the given type +// - status(resource, type) → string - returns the status string of a condition +// - conditionTime(resource, type) → string - returns the last_updated_time (RFC3339) of a condition +func newConditionsCELEnv() (*cel.Env, error) { + return cel.NewEnv( + cel.Variable("resource", cel.DynType), + + cel.Function("condition", + cel.Overload("condition_resource_type", + []*cel.Type{cel.DynType, cel.StringType}, + cel.DynType, + cel.BinaryBinding(celCondition), + ), + ), + + cel.Function("status", + cel.Overload("status_resource_type", + []*cel.Type{cel.DynType, cel.StringType}, + cel.StringType, + cel.BinaryBinding(celStatus), + ), + ), + + cel.Function("conditionTime", + cel.Overload("conditionTime_resource_type", + []*cel.Type{cel.DynType, cel.StringType}, + cel.StringType, + cel.BinaryBinding(celConditionTime), + ), + ), + ) +} + +// celCondition implements the condition(resource, type) CEL function. +// It finds the condition with the given type in resource.status.conditions and returns the full map. +func celCondition(lhs ref.Val, rhs ref.Val) ref.Val { + condType := string(rhs.(types.String)) + + cond, err := findCondition(lhs, condType) + if err != nil { + return types.NewErr("condition(%s): %s", condType, err) + } + + return types.DefaultTypeAdapter.NativeToValue(cond) +} + +// celStatus implements the status(resource, type) CEL function. +// It returns the status string of the condition with the given type. +func celStatus(lhs ref.Val, rhs ref.Val) ref.Val { + condType := string(rhs.(types.String)) + + cond, err := findCondition(lhs, condType) + if err != nil { + return types.NewErr("status(%s): %s", condType, err) + } + + statusVal, ok := cond["status"] + if !ok { + return types.NewErr("status(%s): condition has no status field", condType) + } + + statusStr, ok := statusVal.(string) + if !ok { + return types.NewErr("status(%s): status is not a string", condType) + } + + return types.String(statusStr) +} + +// celConditionTime implements the conditionTime(resource, type) CEL function. +// It returns the last_updated_time string of the condition with the given type. +func celConditionTime(lhs ref.Val, rhs ref.Val) ref.Val { + condType := string(rhs.(types.String)) + + cond, err := findCondition(lhs, condType) + if err != nil { + return types.NewErr("conditionTime(%s): %s", condType, err) + } + + timeVal, ok := cond["last_updated_time"] + if !ok { + return types.NewErr("conditionTime(%s): condition has no last_updated_time field", condType) + } + + timeStr, ok := timeVal.(string) + if !ok { + return types.NewErr("conditionTime(%s): last_updated_time is not a string", condType) + } + + return types.String(timeStr) +} + +// findCondition extracts the condition map matching the given type from a resource map. +func findCondition(resourceVal ref.Val, condType string) (map[string]interface{}, error) { + resourceMap, ok := resourceVal.Value().(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("resource is not a map") + } + + statusVal, ok := resourceMap["status"] + if !ok { + return nil, fmt.Errorf("resource has no status field") + } + + statusMap, ok := statusVal.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("status is not a map") + } + + conditionsVal, ok := statusMap["conditions"] + if !ok { + return nil, fmt.Errorf("no conditions found") + } + + conditions, ok := conditionsVal.([]interface{}) + if !ok { + return nil, fmt.Errorf("conditions is not a list") + } + + for _, c := range conditions { + condMap, ok := c.(map[string]interface{}) + if !ok { + continue + } + if condMap["type"] == condType { + return condMap, nil + } + } + + return nil, fmt.Errorf("condition type %q not found", condType) +} + +// CompileConditions pre-compiles all CEL expressions from the conditions config. +// This should be called at startup and will fail fast on invalid expressions. +func CompileConditions(conditions config.Conditions) (*CompiledConditions, error) { + if len(conditions.Rules) == 0 { + return nil, fmt.Errorf("conditions.Rules must have at least one rule") + } + + env, err := newConditionsCELEnv() + if err != nil { + return nil, fmt.Errorf("failed to create CEL environment: %w", err) + } + + // Compile reference_time expression + refAst, issues := env.Compile(conditions.ReferenceTime) + if issues != nil && issues.Err() != nil { + return nil, fmt.Errorf("failed to compile reference_time expression %q: %w", conditions.ReferenceTime, issues.Err()) + } + // Enforce result type: reference_time must produce a string (or dyn) + if refOutType := refAst.OutputType(); !refOutType.IsEquivalentType(cel.StringType) && !refOutType.IsEquivalentType(cel.DynType) { + return nil, fmt.Errorf("reference_time expression %q must return string, got %s", conditions.ReferenceTime, refOutType) + } + refPrg, err := env.Program(refAst) + if err != nil { + return nil, fmt.Errorf("failed to create program for reference_time %q: %w", conditions.ReferenceTime, err) + } + + // Compile rule expressions + compiled := make([]compiledRule, len(conditions.Rules)) + var minAge time.Duration + for i, rule := range conditions.Rules { + if rule.MaxAge <= 0 { + return nil, fmt.Errorf("rule %q has non-positive max_age %v", rule.Name, rule.MaxAge) + } + ast, issues := env.Compile(rule.Expression) + if issues != nil && issues.Err() != nil { + return nil, fmt.Errorf("failed to compile rule %q expression %q: %w", rule.Name, rule.Expression, issues.Err()) + } + // Enforce result type: rule expressions must produce a bool (or dyn) + if outType := ast.OutputType(); !outType.IsEquivalentType(cel.BoolType) && !outType.IsEquivalentType(cel.DynType) { + return nil, fmt.Errorf("rule %q expression %q must return bool, got %s", rule.Name, rule.Expression, outType) + } + prg, err := env.Program(ast) + if err != nil { + return nil, fmt.Errorf("failed to create program for rule %q expression %q: %w", rule.Name, rule.Expression, err) + } + compiled[i] = compiledRule{ + name: rule.Name, + program: prg, + maxAge: rule.MaxAge, + } + if i == 0 || rule.MaxAge < minAge { + minAge = rule.MaxAge + } + } + + return &CompiledConditions{ + referenceTime: refPrg, + rules: compiled, + minMaxAge: minAge, + }, nil +} + +// EvalReferenceTime evaluates the reference_time expression against the resource map. +// Returns the parsed time and true if successful, or zero time and false if evaluation fails. +func (cc *CompiledConditions) EvalReferenceTime(resourceMap map[string]interface{}) (time.Time, bool) { + if cc.referenceTime == nil { + return time.Time{}, false + } + out, _, err := cc.referenceTime.Eval(map[string]interface{}{ + "resource": resourceMap, + }) + if err != nil { + return time.Time{}, false + } + + timeStr, ok := out.Value().(string) + if !ok { + return time.Time{}, false + } + + t, err := time.Parse(time.RFC3339Nano, timeStr) + if err != nil { + return time.Time{}, false + } + + return t, true +} + +// EvalMaxAge evaluates rules in order against the resource map. +// Returns the max_age of the first matching rule. +// If no rule matches, returns the smallest max_age (most conservative fallback). +func (cc *CompiledConditions) EvalMaxAge(resourceMap map[string]interface{}) time.Duration { + activation := map[string]interface{}{ + "resource": resourceMap, + } + + for _, rule := range cc.rules { + out, _, err := rule.program.Eval(activation) + if err != nil { + continue + } + if out.Value() == true { + return rule.maxAge + } + } + + return cc.minMaxAge +} + +// MinMaxAge returns the smallest max age across all rules (most conservative fallback). +func (cc *CompiledConditions) MinMaxAge() time.Duration { + return cc.minMaxAge +} + +// RuleNames returns the names of all compiled rules. +func (cc *CompiledConditions) RuleNames() []string { + names := make([]string, len(cc.rules)) + for i, r := range cc.rules { + names[i] = r.name + } + return names +} diff --git a/internal/engine/cel_test.go b/internal/engine/cel_test.go new file mode 100644 index 0000000..b73c193 --- /dev/null +++ b/internal/engine/cel_test.go @@ -0,0 +1,380 @@ +package engine + +import ( + "testing" + "time" + + "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/config" +) + +// ============================================================================ +// CEL Custom Function Tests +// ============================================================================ + +func TestCELFunctions_Status(t *testing.T) { + resourceMap := newTestResourceMap("True", time.Now()) + + env, err := newConditionsCELEnv() + if err != nil { + t.Fatalf("Failed to create CEL env: %v", err) + } + + ast, issues := env.Compile(`status(resource, "Ready")`) + if issues != nil && issues.Err() != nil { + t.Fatalf("Failed to compile: %v", issues.Err()) + } + prg, err := env.Program(ast) + if err != nil { + t.Fatalf("Failed to create program: %v", err) + } + + out, _, err := prg.Eval(map[string]interface{}{"resource": resourceMap}) + if err != nil { + t.Fatalf("Eval failed: %v", err) + } + + if out.Value() != "True" { + t.Errorf("Expected 'True', got %v", out.Value()) + } +} + +func TestCELFunctions_ConditionTime(t *testing.T) { + refTime := time.Date(2025, 6, 15, 10, 30, 0, 0, time.UTC) + resourceMap := newTestResourceMap("True", refTime) + + env, err := newConditionsCELEnv() + if err != nil { + t.Fatalf("Failed to create CEL env: %v", err) + } + + ast, issues := env.Compile(`conditionTime(resource, "Ready")`) + if issues != nil && issues.Err() != nil { + t.Fatalf("Failed to compile: %v", issues.Err()) + } + prg, err := env.Program(ast) + if err != nil { + t.Fatalf("Failed to create program: %v", err) + } + + out, _, err := prg.Eval(map[string]interface{}{"resource": resourceMap}) + if err != nil { + t.Fatalf("Eval failed: %v", err) + } + + parsed, parseErr := time.Parse(time.RFC3339Nano, out.Value().(string)) + if parseErr != nil { + t.Fatalf("Failed to parse time: %v", parseErr) + } + if !parsed.Equal(refTime) { + t.Errorf("Expected %v, got %v", refTime, parsed) + } +} + +func TestCELFunctions_Condition(t *testing.T) { + resourceMap := newTestResourceMap("False", time.Now()) + + env, err := newConditionsCELEnv() + if err != nil { + t.Fatalf("Failed to create CEL env: %v", err) + } + + ast, issues := env.Compile(`condition(resource, "Ready")`) + if issues != nil && issues.Err() != nil { + t.Fatalf("Failed to compile: %v", issues.Err()) + } + prg, err := env.Program(ast) + if err != nil { + t.Fatalf("Failed to create program: %v", err) + } + + out, _, err := prg.Eval(map[string]interface{}{"resource": resourceMap}) + if err != nil { + t.Fatalf("Eval failed: %v", err) + } + + condMap, ok := out.Value().(map[string]interface{}) + if !ok { + t.Fatalf("Expected map, got %T", out.Value()) + } + if condMap["type"] != "Ready" { + t.Errorf("Expected type 'Ready', got %v", condMap["type"]) + } + if condMap["status"] != "False" { + t.Errorf("Expected status 'False', got %v", condMap["status"]) + } +} + +func TestCELFunctions_ConditionNotFound(t *testing.T) { + resourceMap := newTestResourceMap("True", time.Now()) + + env, err := newConditionsCELEnv() + if err != nil { + t.Fatalf("Failed to create CEL env: %v", err) + } + + ast, issues := env.Compile(`status(resource, "NonExistent")`) + if issues != nil && issues.Err() != nil { + t.Fatalf("Failed to compile: %v", issues.Err()) + } + prg, err := env.Program(ast) + if err != nil { + t.Fatalf("Failed to create program: %v", err) + } + + _, _, err = prg.Eval(map[string]interface{}{"resource": resourceMap}) + if err == nil { + t.Fatal("Expected error for non-existent condition, got nil") + } +} + +// ============================================================================ +// CompileConditions Tests +// ============================================================================ + +func TestCompileConditions_Valid(t *testing.T) { + conditions := defaultTestConditions() + + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + if compiled == nil { + t.Fatal("Expected non-nil compiled conditions") + } + if compiled.MinMaxAge() != 10*time.Second { + t.Errorf("Expected min max age 10s, got %v", compiled.MinMaxAge()) + } + names := compiled.RuleNames() + if len(names) != 2 { + t.Fatalf("Expected 2 rule names, got %d", len(names)) + } + if names[0] != "isReady" || names[1] != "isNotReady" { + t.Errorf("Unexpected rule names: %v", names) + } +} + +func TestCompileConditions_InvalidReferenceTime(t *testing.T) { + conditions := config.Conditions{ + ReferenceTime: "invalid expression !!!", + Rules: []config.ConditionRule{ + {Name: "test", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + }, + } + + _, err := CompileConditions(conditions) + if err == nil { + t.Fatal("Expected error for invalid reference_time expression") + } +} + +func TestCompileConditions_InvalidRuleExpression(t *testing.T) { + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "bad", Expression: "invalid expression !!!", MaxAge: 30 * time.Minute}, + }, + } + + _, err := CompileConditions(conditions) + if err == nil { + t.Fatal("Expected error for invalid rule expression") + } +} + +// ============================================================================ +// EvalReferenceTime Tests +// ============================================================================ + +func TestEvalReferenceTime_Success(t *testing.T) { + conditions := defaultTestConditions() + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + + refTime := time.Date(2025, 6, 15, 10, 30, 0, 0, time.UTC) + resourceMap := newTestResourceMap("True", refTime) + + result, ok := compiled.EvalReferenceTime(resourceMap) + if !ok { + t.Fatal("Expected EvalReferenceTime to succeed") + } + if !result.Equal(refTime) { + t.Errorf("Expected %v, got %v", refTime, result) + } +} + +func TestEvalReferenceTime_NoCondition(t *testing.T) { + conditions := defaultTestConditions() + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + + // Resource with no conditions + resourceMap := map[string]interface{}{ + "status": map[string]interface{}{}, + } + + _, ok := compiled.EvalReferenceTime(resourceMap) + if ok { + t.Fatal("Expected EvalReferenceTime to fail for resource without conditions") + } +} + +// ============================================================================ +// EvalMaxAge Tests +// ============================================================================ + +func TestEvalMaxAge_MatchReady(t *testing.T) { + conditions := defaultTestConditions() + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + + resourceMap := newTestResourceMap("True", time.Now()) + maxAge := compiled.EvalMaxAge(resourceMap) + + if maxAge != 30*time.Minute { + t.Errorf("Expected 30m for ready, got %v", maxAge) + } +} + +func TestEvalMaxAge_MatchNotReady(t *testing.T) { + conditions := defaultTestConditions() + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + + resourceMap := newTestResourceMap("False", time.Now()) + maxAge := compiled.EvalMaxAge(resourceMap) + + if maxAge != 10*time.Second { + t.Errorf("Expected 10s for not ready, got %v", maxAge) + } +} + +func TestEvalMaxAge_NoMatch_FallbackToMinAge(t *testing.T) { + conditions := defaultTestConditions() + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + + // Resource with Unknown status - no rule matches + resourceMap := newTestResourceMap("Unknown", time.Now()) + maxAge := compiled.EvalMaxAge(resourceMap) + + if maxAge != 10*time.Second { + t.Errorf("Expected 10s (min fallback), got %v", maxAge) + } +} + +func TestEvalMaxAge_CompoundExpression(t *testing.T) { + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + { + Name: "readyAndAvailable", + Expression: `status(resource, "Ready") == "True" && status(resource, "Available") == "True"`, + MaxAge: 1 * time.Hour, + }, + { + Name: "readyOnly", + Expression: `status(resource, "Ready") == "True"`, + MaxAge: 30 * time.Minute, + }, + { + Name: "fallback", + Expression: `status(resource, "Ready") == "False"`, + MaxAge: 10 * time.Second, + }, + }, + } + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + + // Resource with both Ready=True and Available=True should match first rule + resourceMap := newTestResourceMapWithConditions([]testCondition{ + {Type: "Ready", Status: "True", LastUpdated: time.Now()}, + {Type: "Available", Status: "True", LastUpdated: time.Now()}, + }) + + maxAge := compiled.EvalMaxAge(resourceMap) + if maxAge != 1*time.Hour { + t.Errorf("Expected 1h for compound match, got %v", maxAge) + } + + // Resource with Ready=True but Available=False should match second rule + resourceMap2 := newTestResourceMapWithConditions([]testCondition{ + {Type: "Ready", Status: "True", LastUpdated: time.Now()}, + {Type: "Available", Status: "False", LastUpdated: time.Now()}, + }) + + maxAge2 := compiled.EvalMaxAge(resourceMap2) + if maxAge2 != 30*time.Minute { + t.Errorf("Expected 30m for readyOnly match, got %v", maxAge2) + } +} + +// ============================================================================ +// Test helpers +// ============================================================================ + +func defaultTestConditions() config.Conditions { + return config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + } +} + +type testCondition struct { + Type string + Status string + LastUpdated time.Time +} + +func newTestResourceMap(readyStatus string, lastUpdated time.Time) map[string]interface{} { + return newTestResourceMapWithConditions([]testCondition{ + {Type: "Ready", Status: readyStatus, LastUpdated: lastUpdated}, + }) +} + +func newTestResourceMapWithConditions(conditions []testCondition) map[string]interface{} { + condList := make([]interface{}, len(conditions)) + for i, c := range conditions { + condList[i] = map[string]interface{}{ + "type": c.Type, + "status": c.Status, + "last_updated_time": c.LastUpdated.Format(time.RFC3339Nano), + "last_transition_time": c.LastUpdated.Format(time.RFC3339Nano), + "observed_generation": int32(1), + } + } + + ready := false + if len(conditions) > 0 { + ready = conditions[0].Status == "True" + } + + return map[string]interface{}{ + "id": "test-resource-1", + "kind": "Cluster", + "href": "/api/v1/clusters/test-resource-1", + "created_time": time.Now().Add(-1 * time.Hour).Format(time.RFC3339Nano), + "updated_time": time.Now().Format(time.RFC3339Nano), + "generation": int32(1), + "status": map[string]interface{}{ + "ready": ready, + "observed_generation": int32(1), + "conditions": condList, + }, + } +} diff --git a/internal/engine/decision.go b/internal/engine/decision.go index 3b2a922..b3e4c2e 100644 --- a/internal/engine/decision.go +++ b/internal/engine/decision.go @@ -5,6 +5,7 @@ import ( "time" "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/client" + "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/resource" ) // Decision reasons @@ -17,18 +18,20 @@ const ( // DecisionEngine evaluates whether a resource needs an event published type DecisionEngine struct { - maxAgeNotReady time.Duration - maxAgeReady time.Duration + conditions *CompiledConditions } -// NewDecisionEngine creates a new decision engine -func NewDecisionEngine(maxAgeNotReady, maxAgeReady time.Duration) *DecisionEngine { +// NewDecisionEngine creates a new decision engine with pre-compiled CEL conditions. +// If conditions is nil, the engine will never publish (safe default). +func NewDecisionEngine(conditions *CompiledConditions) *DecisionEngine { return &DecisionEngine{ - maxAgeNotReady: maxAgeNotReady, - maxAgeReady: maxAgeReady, + conditions: conditions, } } +// emptyCompiledConditions is a safe sentinel for nil conditions +var emptyCompiledConditions = &CompiledConditions{} + // Decision represents the result of evaluating a resource type Decision struct { // ShouldPublish indicates whether an event should be published for the resource @@ -43,23 +46,21 @@ type Decision struct { // 1. Generation-based reconciliation: If resource.Generation > status.ObservedGeneration, // publish immediately (spec has changed, adapter needs to reconcile) // 2. Time-based reconciliation: If max age exceeded since last update, publish -// - Uses status.LastUpdated as reference timestamp -// - If LastUpdated is zero (never processed), falls back to created_time -// -// Max Age Intervals: -// - Resources with Ready=true: maxAgeReady (default 30m) -// - Resources with Ready=false: maxAgeNotReady (default 10s) +// - Evaluates the reference_time CEL expression to get the reference timestamp +// - If reference_time evaluation fails or returns zero, falls back to created_time +// - Evaluates rules in order; first matching rule's max_age is used +// - If no rule matches, uses the smallest max_age (most conservative) // // Adapter Contract: -// - Adapters MUST update status.LastUpdated on EVERY evaluation +// - Adapters MUST update condition LastUpdatedTime on EVERY evaluation // - Adapters MUST update status.ObservedGeneration to resource.Generation when processing // - This prevents infinite event loops when adapters skip work due to unmet preconditions // // Returns a Decision indicating whether to publish and why. Returns ShouldPublish=false // for invalid inputs (nil resource, zero now time). -func (e *DecisionEngine) Evaluate(resource *client.Resource, now time.Time) Decision { +func (e *DecisionEngine) Evaluate(r *client.Resource, now time.Time) Decision { // Validate inputs - if resource == nil { + if r == nil { return Decision{ ShouldPublish: false, Reason: ReasonNilResource, @@ -75,32 +76,33 @@ func (e *DecisionEngine) Evaluate(resource *client.Resource, now time.Time) Deci // Check for generation mismatch // This triggers immediate reconciliation regardless of max age - if resource.Generation > resource.Status.ObservedGeneration { + if r.Generation > r.Status.ObservedGeneration { return Decision{ ShouldPublish: true, Reason: ReasonGenerationChanged, } } - // Determine the reference timestamp for max age calculation - // Use LastUpdated if available (adapter has processed the resource) - // Otherwise fall back to created_time (resource is newly created) - referenceTime := resource.Status.LastUpdated - if referenceTime.IsZero() { - referenceTime = resource.CreatedTime + // Guard against nil conditions (safe default: never publish) + conditions := e.conditions + if conditions == nil { + conditions = emptyCompiledConditions } - // Determine the appropriate max age based on resource ready status - var maxAge time.Duration - if resource.Status.Ready { - maxAge = e.maxAgeReady - } else { - maxAge = e.maxAgeNotReady + // Convert resource to map for CEL evaluation + resourceMap := resource.ToMap(r) + + // Evaluate reference time from CEL expression + referenceTime, ok := conditions.EvalReferenceTime(resourceMap) + if !ok || referenceTime.IsZero() { + // Fall back to created_time if reference_time evaluation fails or returns zero + referenceTime = r.CreatedTime } + // Determine max age by evaluating rules (first match wins) + maxAge := conditions.EvalMaxAge(resourceMap) + // Calculate the next event time based on reference timestamp - // Adapters update LastUpdated on every check, enabling proper max age - // calculation even when resources stay in the same status nextEventTime := referenceTime.Add(maxAge) // Check if enough time has passed diff --git a/internal/engine/decision_test.go b/internal/engine/decision_test.go index 6ea800a..66668fd 100644 --- a/internal/engine/decision_test.go +++ b/internal/engine/decision_test.go @@ -6,12 +6,20 @@ import ( "time" "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/client" + "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/config" ) // Test helpers and factories -// newTestResource creates a test resource with the given parameters -// This follows TRex pattern of using test factories for consistent test data +// readyStatus converts a bool to a condition status string +func readyStatus(ready bool) string { + if ready { + return "True" + } + return "False" +} + +// newTestResource creates a test resource with a Ready condition func newTestResource(ready bool, lastUpdated time.Time) *client.Resource { return &client.Resource{ ID: testResourceID, @@ -22,6 +30,13 @@ func newTestResource(ready bool, lastUpdated time.Time) *client.Resource { Ready: ready, LastUpdated: lastUpdated, ObservedGeneration: 1, // Default: in sync with generation + Conditions: []client.Condition{ + { + Type: "Ready", + Status: readyStatus(ready), + LastUpdatedTime: lastUpdated, + }, + }, }, } } @@ -37,6 +52,13 @@ func newTestResourceWithCreatedTime(id, kind string, ready bool, createdTime, la Ready: ready, LastUpdated: lastUpdated, ObservedGeneration: 1, // Default: in sync with generation + Conditions: []client.Condition{ + { + Type: "Ready", + Status: readyStatus(ready), + LastUpdatedTime: lastUpdated, + }, + }, }, } } @@ -52,13 +74,25 @@ func newTestResourceWithGeneration(id, kind string, ready bool, lastUpdated time Ready: ready, LastUpdated: lastUpdated, ObservedGeneration: observedGeneration, + Conditions: []client.Condition{ + { + Type: "Ready", + Status: readyStatus(ready), + LastUpdatedTime: lastUpdated, + }, + }, }, } } -// newTestEngine creates a decision engine with standard test values -func newTestEngine() *DecisionEngine { - return NewDecisionEngine(testMaxAgeNotReady, testMaxAgeReady) +// newTestEngine creates a decision engine with standard test values using CEL conditions +func newTestEngine(t *testing.T) *DecisionEngine { + t.Helper() + compiled, err := CompileConditions(defaultTestConditions()) + if err != nil { + t.Fatalf("Failed to compile test conditions: %v", err) + } + return NewDecisionEngine(compiled) } // assertDecision verifies a decision matches expected values @@ -87,24 +121,24 @@ const ( ) func TestNewDecisionEngine(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) if engine == nil { t.Fatal("NewDecisionEngine returned nil") } - if engine.maxAgeNotReady != testMaxAgeNotReady { - t.Errorf("maxAgeNotReady = %v, want %v", engine.maxAgeNotReady, testMaxAgeNotReady) + if engine.conditions == nil { + t.Fatal("conditions should not be nil") } - if engine.maxAgeReady != testMaxAgeReady { - t.Errorf("maxAgeReady = %v, want %v", engine.maxAgeReady, testMaxAgeReady) + if len(engine.conditions.RuleNames()) != 2 { + t.Errorf("rules count = %d, want 2", len(engine.conditions.RuleNames())) } } func TestDecisionEngine_Evaluate(t *testing.T) { now := time.Now() - engine := newTestEngine() + engine := newTestEngine(t) tests := []struct { name string @@ -248,105 +282,91 @@ func TestDecisionEngine_Evaluate(t *testing.T) { } } -// TestDecisionEngine_Evaluate_ZeroMaxAge tests edge case with zero max age intervals -func TestDecisionEngine_Evaluate_ZeroMaxAge(t *testing.T) { - now := time.Now() - +// TestCompileConditions_RejectsZeroMaxAge tests that CompileConditions rejects zero max_age +func TestCompileConditions_RejectsZeroMaxAge(t *testing.T) { tests := []struct { - name string - maxAgeNotReady time.Duration - maxAgeReady time.Duration - ready bool - lastUpdated time.Time - wantShouldPublish bool + name string + conditions config.Conditions }{ { - name: "zero maxAgeNotReady - not ready", - maxAgeNotReady: 0, - maxAgeReady: 30 * time.Minute, - ready: false, - lastUpdated: now, // Even with now, should publish due to zero max age - wantShouldPublish: true, - }, - { - name: "zero maxAgeReady - ready", - maxAgeNotReady: 10 * time.Second, - maxAgeReady: 0, - ready: true, - lastUpdated: now, // Even with now, should publish due to zero max age - wantShouldPublish: true, - }, - { - name: "both zero max ages - ready", - maxAgeNotReady: 0, - maxAgeReady: 0, - ready: true, - lastUpdated: now, - wantShouldPublish: true, - }, - { - name: "both zero max ages - not ready", - maxAgeNotReady: 0, - maxAgeReady: 0, - ready: false, - lastUpdated: now, - wantShouldPublish: true, + name: "zero max age for False status", + conditions: config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 0}, + }, + }, + }, + { + name: "zero max age for True status", + conditions: config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 0}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - engine := NewDecisionEngine(tt.maxAgeNotReady, tt.maxAgeReady) - resource := newTestResource(tt.ready, tt.lastUpdated) - decision := engine.Evaluate(resource, now) - - assertDecision(t, decision, tt.wantShouldPublish, "") + _, err := CompileConditions(tt.conditions) + if err == nil { + t.Fatal("Expected CompileConditions to reject zero max_age") + } + if !strings.Contains(err.Error(), "non-positive max_age") { + t.Errorf("Expected error about non-positive max_age, got: %v", err) + } }) } } -// TestDecisionEngine_Evaluate_NegativeMaxAge tests edge case with negative max age intervals -func TestDecisionEngine_Evaluate_NegativeMaxAge(t *testing.T) { - now := time.Now() - lastUpdated := now.Add(-5 * time.Second) - +// TestCompileConditions_RejectsNegativeMaxAge tests that CompileConditions rejects negative max_age +func TestCompileConditions_RejectsNegativeMaxAge(t *testing.T) { tests := []struct { - name string - maxAgeNotReady time.Duration - maxAgeReady time.Duration - ready bool - wantShouldPublish bool + name string + conditions config.Conditions }{ { - name: "negative maxAgeNotReady", - maxAgeNotReady: -10 * time.Second, - maxAgeReady: 30 * time.Minute, - ready: false, - wantShouldPublish: true, // Negative max age means nextEventTime is in the past - }, - { - name: "negative maxAgeReady", - maxAgeNotReady: 10 * time.Second, - maxAgeReady: -10 * time.Minute, - ready: true, - wantShouldPublish: true, + name: "negative max age for not ready", + conditions: config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: -10 * time.Second}, + }, + }, + }, + { + name: "negative max age for ready", + conditions: config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: -10 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + }, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - engine := NewDecisionEngine(tt.maxAgeNotReady, tt.maxAgeReady) - resource := newTestResource(tt.ready, lastUpdated) - decision := engine.Evaluate(resource, now) - - assertDecision(t, decision, tt.wantShouldPublish, "") + _, err := CompileConditions(tt.conditions) + if err == nil { + t.Fatal("Expected CompileConditions to reject negative max_age") + } + if !strings.Contains(err.Error(), "non-positive max_age") { + t.Errorf("Expected error about non-positive max_age, got: %v", err) + } }) } } // TestDecisionEngine_Evaluate_ConsistentBehavior tests that multiple calls with same inputs produce same results func TestDecisionEngine_Evaluate_ConsistentBehavior(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) now := time.Now() resource := newTestResource(true, now.Add(-31*time.Minute)) @@ -366,7 +386,7 @@ func TestDecisionEngine_Evaluate_ConsistentBehavior(t *testing.T) { // TestDecisionEngine_Evaluate_InvalidInputs tests handling of invalid inputs func TestDecisionEngine_Evaluate_InvalidInputs(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) now := time.Now() tests := []struct { @@ -407,9 +427,9 @@ func TestDecisionEngine_Evaluate_InvalidInputs(t *testing.T) { } } -// TestDecisionEngine_Evaluate_CreatedTimeFallback tests that created_time is used when lastUpdated is zero +// TestDecisionEngine_Evaluate_CreatedTimeFallback tests that created_time is used when condition LastUpdatedTime is zero func TestDecisionEngine_Evaluate_CreatedTimeFallback(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) now := time.Now() tests := []struct { @@ -487,7 +507,7 @@ func TestDecisionEngine_Evaluate_CreatedTimeFallback(t *testing.T) { // TestDecisionEngine_Evaluate_GenerationBasedReconciliation tests generation-based reconciliation func TestDecisionEngine_Evaluate_GenerationBasedReconciliation(t *testing.T) { - engine := newTestEngine() + engine := newTestEngine(t) now := time.Now() tests := []struct { @@ -630,3 +650,194 @@ func TestDecisionEngine_Evaluate_GenerationBasedReconciliation(t *testing.T) { }) } } + +// TestDecisionEngine_Evaluate_NoMatchingRule tests fallback when no rule matches the condition status +func TestDecisionEngine_Evaluate_NoMatchingRule(t *testing.T) { + now := time.Now() + + compiled, err := CompileConditions(defaultTestConditions()) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + engine := NewDecisionEngine(compiled) + + // Resource with Unknown status - no rule matches + resource := &client.Resource{ + ID: testResourceID, + Kind: testResourceKind, + Generation: 1, + CreatedTime: now.Add(-1 * time.Hour), + Status: client.ResourceStatus{ + ObservedGeneration: 1, + Conditions: []client.Condition{ + { + Type: "Ready", + Status: "Unknown", + LastUpdatedTime: now.Add(-15 * time.Second), // 15s ago + }, + }, + }, + } + + decision := engine.Evaluate(resource, now) + + // Should use smallest max age (10s) as fallback - 15s > 10s so should publish + assertDecision(t, decision, true, ReasonMaxAgeExceeded) +} + +// TestDecisionEngine_Evaluate_ConditionNotFound tests behavior when condition type is not found on resource +func TestDecisionEngine_Evaluate_ConditionNotFound(t *testing.T) { + now := time.Now() + + compiled, err := CompileConditions(defaultTestConditions()) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + engine := NewDecisionEngine(compiled) + + // Resource with no Ready condition (only Available) + resource := &client.Resource{ + ID: testResourceID, + Kind: testResourceKind, + Generation: 1, + CreatedTime: now.Add(-20 * time.Second), // Created 20s ago + Status: client.ResourceStatus{ + ObservedGeneration: 1, + Conditions: []client.Condition{ + { + Type: "Available", + Status: "True", + LastUpdatedTime: now.Add(-5 * time.Second), + }, + }, + }, + } + + decision := engine.Evaluate(resource, now) + + // Condition not found → falls back to created_time (20s ago) with smallest max age (10s) + // 20s > 10s → should publish + assertDecision(t, decision, true, ReasonMaxAgeExceeded) +} + +// TestDecisionEngine_Evaluate_CustomConditionType tests using a non-Ready condition type +func TestDecisionEngine_Evaluate_CustomConditionType(t *testing.T) { + now := time.Now() + + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Available")`, + Rules: []config.ConditionRule{ + {Name: "isAvailable", Expression: `status(resource, "Available") == "True"`, MaxAge: 1 * time.Hour}, + {Name: "isNotAvailable", Expression: `status(resource, "Available") == "False"`, MaxAge: 30 * time.Second}, + }, + } + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + engine := NewDecisionEngine(compiled) + + // Resource with both Ready and Available conditions + resource := &client.Resource{ + ID: testResourceID, + Kind: testResourceKind, + Generation: 1, + CreatedTime: now.Add(-2 * time.Hour), + Status: client.ResourceStatus{ + ObservedGeneration: 1, + Conditions: []client.Condition{ + { + Type: "Ready", + Status: "True", + LastUpdatedTime: now.Add(-5 * time.Minute), + }, + { + Type: "Available", + Status: "False", + LastUpdatedTime: now.Add(-45 * time.Second), // 45s ago + }, + }, + }, + } + + decision := engine.Evaluate(resource, now) + + // Should use Available condition (False → 30s max age) and its LastUpdatedTime (45s ago) + // 45s > 30s → should publish + assertDecision(t, decision, true, ReasonMaxAgeExceeded) +} + +// TestDecisionEngine_Evaluate_NoConditions tests resource with empty conditions list +func TestDecisionEngine_Evaluate_NoConditions(t *testing.T) { + now := time.Now() + engine := newTestEngine(t) + + resource := &client.Resource{ + ID: testResourceID, + Kind: testResourceKind, + Generation: 1, + CreatedTime: now.Add(-20 * time.Second), // Created 20s ago + Status: client.ResourceStatus{ + ObservedGeneration: 1, + Conditions: nil, // No conditions at all + }, + } + + decision := engine.Evaluate(resource, now) + + // No conditions → falls back to created_time (20s ago) with smallest max age (10s) + // 20s > 10s → should publish + assertDecision(t, decision, true, ReasonMaxAgeExceeded) +} + +// TestDecisionEngine_Evaluate_CompoundExpression tests compound CEL expressions +func TestDecisionEngine_Evaluate_CompoundExpression(t *testing.T) { + now := time.Now() + + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + { + Name: "readyAndAvailable", + Expression: `status(resource, "Ready") == "True" && status(resource, "Available") == "True"`, + MaxAge: 1 * time.Hour, + }, + { + Name: "readyOnly", + Expression: `status(resource, "Ready") == "True"`, + MaxAge: 30 * time.Minute, + }, + { + Name: "notReady", + Expression: `status(resource, "Ready") == "False"`, + MaxAge: 10 * time.Second, + }, + }, + } + compiled, err := CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + engine := NewDecisionEngine(compiled) + + // Resource with Ready=True and Available=True, updated 45m ago + resource := &client.Resource{ + ID: testResourceID, + Kind: testResourceKind, + Generation: 1, + CreatedTime: now.Add(-2 * time.Hour), + Status: client.ResourceStatus{ + Ready: true, + ObservedGeneration: 1, + Conditions: []client.Condition{ + {Type: "Ready", Status: "True", LastUpdatedTime: now.Add(-45 * time.Minute)}, + {Type: "Available", Status: "True", LastUpdatedTime: now.Add(-45 * time.Minute)}, + }, + }, + } + + decision := engine.Evaluate(resource, now) + + // First rule matches (readyAndAvailable, 1h max_age), 45m < 1h → should NOT publish + assertDecision(t, decision, false, "max age not exceeded") +} diff --git a/internal/payload/builder.go b/internal/payload/builder.go index 5b106f0..9c76c21 100644 --- a/internal/payload/builder.go +++ b/internal/payload/builder.go @@ -3,10 +3,10 @@ package payload import ( "context" "fmt" - "time" "github.com/google/cel-go/cel" "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/client" + "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/resource" "github.com/openshift-hyperfleet/hyperfleet-sentinel/pkg/logger" ) @@ -126,71 +126,8 @@ func compileNode(raw interface{}, env *cel.Env) (*compiledNode, error) { // BuildPayload builds an event payload map from the given resource and decision reason. // The reason is available to CEL expressions as the "reason" variable. // ctx is used for correlated warning logs if CEL evaluation fails. -func (b *Builder) BuildPayload(ctx context.Context, resource *client.Resource, reason string) map[string]interface{} { - return b.evalCompiledMap(ctx, b.compiled, resourceToMap(resource), reason) -} - -// resourceToMap converts a Resource into a plain map[string]interface{} for CEL evaluation. -// Time fields are formatted as RFC3339Nano strings to match their JSON representation. -func resourceToMap(r *client.Resource) map[string]interface{} { - status := map[string]interface{}{ - "ready": r.Status.Ready, - "last_transition_time": r.Status.LastTransitionTime.Format(time.RFC3339Nano), - "last_updated": r.Status.LastUpdated.Format(time.RFC3339Nano), - "observed_generation": r.Status.ObservedGeneration, - } - if len(r.Status.Conditions) > 0 { - conditions := make([]interface{}, len(r.Status.Conditions)) - for i, c := range r.Status.Conditions { - cond := map[string]interface{}{ - "type": c.Type, - "status": c.Status, - "last_transition_time": c.LastTransitionTime.Format(time.RFC3339Nano), - "last_updated_time": c.LastUpdatedTime.Format(time.RFC3339Nano), - "observed_generation": c.ObservedGeneration, - } - if c.Reason != "" { - cond["reason"] = c.Reason - } - if c.Message != "" { - cond["message"] = c.Message - } - conditions[i] = cond - } - status["conditions"] = conditions - } - - m := map[string]interface{}{ - "id": r.ID, - "href": r.Href, - "kind": r.Kind, - "created_time": r.CreatedTime.Format(time.RFC3339Nano), - "updated_time": r.UpdatedTime.Format(time.RFC3339Nano), - "generation": r.Generation, - "status": status, - } - - if len(r.Labels) > 0 { - labels := make(map[string]interface{}, len(r.Labels)) - for k, v := range r.Labels { - labels[k] = v - } - m["labels"] = labels - } - - if r.OwnerReferences != nil { - m["owner_references"] = map[string]interface{}{ - "id": r.OwnerReferences.ID, - "href": r.OwnerReferences.Href, - "kind": r.OwnerReferences.Kind, - } - } - - if r.Metadata != nil { - m["metadata"] = r.Metadata - } - - return m +func (b *Builder) BuildPayload(ctx context.Context, r *client.Resource, reason string) map[string]interface{} { + return b.evalCompiledMap(ctx, b.compiled, resource.ToMap(r), reason) } // evalCompiledMap evaluates a compiled map against the resource and reason. diff --git a/internal/resource/convert.go b/internal/resource/convert.go new file mode 100644 index 0000000..aa842ac --- /dev/null +++ b/internal/resource/convert.go @@ -0,0 +1,88 @@ +package resource + +import ( + "time" + + "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/client" +) + +// ToMap converts a Resource into a plain map[string]interface{} for CEL evaluation. +// Time fields are formatted as RFC3339Nano strings to match their JSON representation. +// Returns a zero-value map if r is nil. +func ToMap(r *client.Resource) map[string]interface{} { + if r == nil { + return map[string]interface{}{ + "id": "", + "href": "", + "kind": "", + "created_time": "", + "updated_time": "", + "generation": int32(0), + "status": map[string]interface{}{ + "ready": false, + "last_transition_time": "", + "last_updated": "", + "observed_generation": int32(0), + }, + } + } + + status := map[string]interface{}{ + "ready": r.Status.Ready, + "last_transition_time": r.Status.LastTransitionTime.Format(time.RFC3339Nano), + "last_updated": r.Status.LastUpdated.Format(time.RFC3339Nano), + "observed_generation": r.Status.ObservedGeneration, + } + if len(r.Status.Conditions) > 0 { + conditions := make([]interface{}, len(r.Status.Conditions)) + for i, c := range r.Status.Conditions { + cond := map[string]interface{}{ + "type": c.Type, + "status": c.Status, + "last_transition_time": c.LastTransitionTime.Format(time.RFC3339Nano), + "last_updated_time": c.LastUpdatedTime.Format(time.RFC3339Nano), + "observed_generation": c.ObservedGeneration, + } + if c.Reason != "" { + cond["reason"] = c.Reason + } + if c.Message != "" { + cond["message"] = c.Message + } + conditions[i] = cond + } + status["conditions"] = conditions + } + + m := map[string]interface{}{ + "id": r.ID, + "href": r.Href, + "kind": r.Kind, + "created_time": r.CreatedTime.Format(time.RFC3339Nano), + "updated_time": r.UpdatedTime.Format(time.RFC3339Nano), + "generation": r.Generation, + "status": status, + } + + if len(r.Labels) > 0 { + labels := make(map[string]interface{}, len(r.Labels)) + for k, v := range r.Labels { + labels[k] = v + } + m["labels"] = labels + } + + if r.OwnerReferences != nil { + m["owner_references"] = map[string]interface{}{ + "id": r.OwnerReferences.ID, + "href": r.OwnerReferences.Href, + "kind": r.OwnerReferences.Kind, + } + } + + if r.Metadata != nil { + m["metadata"] = r.Metadata + } + + return m +} diff --git a/internal/sentinel/sentinel.go b/internal/sentinel/sentinel.go index f0dcf60..74551fd 100644 --- a/internal/sentinel/sentinel.go +++ b/internal/sentinel/sentinel.go @@ -65,8 +65,12 @@ func (s *Sentinel) LastSuccessfulPoll() time.Time { // Start starts the polling loop func (s *Sentinel) Start(ctx context.Context) error { - s.logger.Infof(ctx, "Starting sentinel resource_type=%s poll_interval=%s max_age_not_ready=%s max_age_ready=%s", - s.config.ResourceType, s.config.PollInterval, s.config.MaxAgeNotReady, s.config.MaxAgeReady) + ruleNames := make([]string, len(s.config.Conditions.Rules)) + for i, r := range s.config.Conditions.Rules { + ruleNames[i] = r.Name + } + s.logger.Infof(ctx, "Starting sentinel resource_type=%s poll_interval=%s reference_time=%s rules=%v", + s.config.ResourceType, s.config.PollInterval, s.config.Conditions.ReferenceTime, ruleNames) ticker := time.NewTicker(s.config.PollInterval) defer ticker.Stop() diff --git a/internal/sentinel/sentinel_test.go b/internal/sentinel/sentinel_test.go index ba11af9..13e26f3 100644 --- a/internal/sentinel/sentinel_test.go +++ b/internal/sentinel/sentinel_test.go @@ -111,6 +111,25 @@ func (m *MockPublisherWithLogger) Close() error { return nil } func (m *MockPublisherWithLogger) Health(ctx context.Context) error { return nil } +// testConditions is the standard conditions used across sentinel tests +var testConditions = config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, +} + +// newTestDecisionEngine creates a decision engine with standard test conditions +func newTestDecisionEngine(t *testing.T) *engine.DecisionEngine { + t.Helper() + compiled, err := engine.CompileConditions(testConditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + return engine.NewDecisionEngine(compiled) +} + // TestTrigger_Success tests successful event publishing func TestTrigger_Success(t *testing.T) { ctx := context.Background() @@ -129,7 +148,7 @@ func TestTrigger_Success(t *testing.T) { // Setup components hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -140,8 +159,7 @@ func TestTrigger_Success(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -203,7 +221,7 @@ func TestTrigger_NoEventsPublished(t *testing.T) { // Setup components hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -214,8 +232,7 @@ func TestTrigger_NoEventsPublished(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -254,7 +271,7 @@ func TestTrigger_FetchError(t *testing.T) { // Setup components hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 1*time.Second) // Short timeout - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -265,8 +282,7 @@ func TestTrigger_FetchError(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -308,7 +324,7 @@ func TestTrigger_PublishError(t *testing.T) { // Setup components hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{ publishError: errors.New("broker connection failed"), } @@ -321,8 +337,7 @@ func TestTrigger_PublishError(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -365,7 +380,7 @@ func TestTrigger_MixedResources(t *testing.T) { // Setup components hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -376,8 +391,7 @@ func TestTrigger_MixedResources(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -428,7 +442,7 @@ func TestTrigger_WithMessageDataConfig(t *testing.T) { defer server.Close() hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -438,8 +452,7 @@ func TestTrigger_WithMessageDataConfig(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -494,7 +507,7 @@ func TestTrigger_WithNestedMessageData(t *testing.T) { defer server.Close() hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + decisionEngine := newTestDecisionEngine(t) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -504,8 +517,7 @@ func TestTrigger_WithNestedMessageData(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", Topic: "test-topic", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "resource": map[string]interface{}{ @@ -547,9 +559,8 @@ func TestTrigger_WithNestedMessageData(t *testing.T) { // TestBuildEventData_WithBuilder tests buildEventData directly with a configured builder. func TestBuildEventData_WithBuilder(t *testing.T) { cfg := &config.SentinelConfig{ - ResourceType: "clusters", - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + ResourceType: "clusters", + Conditions: testConditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index 47c2327..90319c5 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -146,7 +146,18 @@ func TestIntegration_EndToEnd(t *testing.T) { // Setup components with real RabbitMQ broker hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + } + compiledConditions, err := engine.CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + decisionEngine := engine.NewDecisionEngine(compiledConditions) log := logger.NewHyperFleetLogger() // Create metrics with a test registry @@ -156,8 +167,7 @@ func TestIntegration_EndToEnd(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", PollInterval: 100 * time.Millisecond, // Short interval for testing - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: conditions, MessageData: map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -220,7 +230,7 @@ func TestIntegration_LabelSelectorFiltering(t *testing.T) { 2, 2, true, - now.Add(-31*time.Minute), // Exceeds max_age_ready (30m) + now.Add(-31*time.Minute), // Exceeds max age for "True" status (30m) map[string]string{"shard": "1"}, ), // This cluster has shard:2 - should NOT match selector @@ -262,7 +272,18 @@ func TestIntegration_LabelSelectorFiltering(t *testing.T) { // Setup components with real RabbitMQ broker hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + } + compiledConditions, err := engine.CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + decisionEngine := engine.NewDecisionEngine(compiledConditions) log := logger.NewHyperFleetLogger() // Create metrics with a test registry @@ -272,8 +293,7 @@ func TestIntegration_LabelSelectorFiltering(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", PollInterval: 100 * time.Millisecond, - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: conditions, ResourceSelector: []config.LabelSelector{ {Label: "shard", Value: "1"}, }, @@ -373,7 +393,18 @@ func TestIntegration_TSLSyntaxMultipleLabels(t *testing.T) { // Setup components hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + } + compiledConditions, err := engine.CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + decisionEngine := engine.NewDecisionEngine(compiledConditions) log := logger.NewHyperFleetLogger() registry := prometheus.NewRegistry() @@ -382,8 +413,7 @@ func TestIntegration_TSLSyntaxMultipleLabels(t *testing.T) { cfg := &config.SentinelConfig{ ResourceType: "clusters", PollInterval: 100 * time.Millisecond, - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: conditions, ResourceSelector: []config.LabelSelector{ {Label: "region", Value: "us-east"}, {Label: "env", Value: "production"}, @@ -464,9 +494,9 @@ func TestIntegration_BrokerLoggerContext(t *testing.T) { } } clusters := []map[string]interface{}{ - // This cluster will trigger max_age_ready exceeded event + // This cluster will trigger max age exceeded for "True" status createMockCluster("cluster-old", 2, 2, true, now.Add(-35*time.Minute)), // Exceeds 30min - // This cluster will trigger max_age_not_ready exceeded event + // This cluster will trigger max age exceeded for "False" status createMockCluster("cluster-not-ready", 1, 1, false, now.Add(-15*time.Second)), // Exceeds 10sec } response := createMockClusterList(clusters) @@ -476,14 +506,24 @@ func TestIntegration_BrokerLoggerContext(t *testing.T) { defer server.Close() hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) - decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) + conditions := config.Conditions{ + ReferenceTime: `conditionTime(resource, "Ready")`, + Rules: []config.ConditionRule{ + {Name: "isReady", Expression: `status(resource, "Ready") == "True"`, MaxAge: 30 * time.Minute}, + {Name: "isNotReady", Expression: `status(resource, "Ready") == "False"`, MaxAge: 10 * time.Second}, + }, + } + compiledConditions, err := engine.CompileConditions(conditions) + if err != nil { + t.Fatalf("CompileConditions failed: %v", err) + } + decisionEngine := engine.NewDecisionEngine(compiledConditions) sentinelConfig := &config.SentinelConfig{ ResourceType: "clusters", Topic: TEST_TOPIC, PollInterval: 100 * time.Millisecond, - MaxAgeNotReady: 10 * time.Second, - MaxAgeReady: 30 * time.Minute, + Conditions: conditions, ResourceSelector: []config.LabelSelector{ {Label: "region", Value: "us-east"}, {Label: "env", Value: "production"},