diff --git a/Makefile b/Makefile index 7f90924..ff90bac 100755 --- a/Makefile +++ b/Makefile @@ -195,7 +195,7 @@ test-helm: ## Test Helm charts (lint, template, validate) @echo "Testing template with Google Pub/Sub broker..." helm template test-release $(HELM_CHART_DIR)/ \ --set broker.type=googlepubsub \ - --set broker.googlepubsub.projectId=test-project > /dev/null + --set broker.googlepubsub.project_id=test-project > /dev/null @echo "Google Pub/Sub broker template OK" @echo "" @echo "Testing template with PodMonitoring enabled..." diff --git a/README.md b/README.md index 5ad85c7..3f96e69 100644 --- a/README.md +++ b/README.md @@ -190,9 +190,9 @@ Broker configuration is managed by the [hyperfleet-broker library](https://githu | Variable | Description | Example | |----------|-------------|---------| -| `BROKER_TOPIC` | Topic name for publishing events | `hyperfleet-dev-clusters` | +| `HYPERFLEET_BROKER_TOPIC` | Topic name for publishing events | `hyperfleet-dev-clusters` | -The `BROKER_TOPIC` environment variable sets the full topic name where events will be published. When using Helm, the default topic is `{namespace}-{resourceType}` (e.g., `hyperfleet-dev-clusters`, `hyperfleet-dev-nodepools`). This enables isolation between different environments or tenants sharing the same broker. See [Naming Strategy](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/components/sentinel/sentinel-naming-strategy.md) for details. +The `HYPERFLEET_BROKER_TOPIC` environment variable sets the full topic name where events will be published. When using Helm, the default topic is `{namespace}-{resourceType}` (e.g., `hyperfleet-dev-clusters`, `hyperfleet-dev-nodepools`). This enables isolation between different environments or tenants sharing the same broker. See [Naming Strategy](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/components/sentinel/sentinel-naming-strategy.md) for details. For detailed broker configuration options, see the [hyperfleet-broker documentation](https://github.com/openshift-hyperfleet/hyperfleet-broker). diff --git a/charts/README.md b/charts/README.md index af794aa..7e5d9f4 100644 --- a/charts/README.md +++ b/charts/README.md @@ -78,14 +78,14 @@ The following table lists the configurable parameters of the Sentinel chart and | Parameter | Description | Default | |-----------|-------------|---------| -| `config.resourceType` | Resource type to watch | `clusters` | -| `config.pollInterval` | Polling interval | `5s` | -| `config.maxAgeNotReady` | Max age for not ready resources | `10s` | -| `config.maxAgeReady` | Max age for ready resources | `30m` | -| `config.resourceSelector` | Resource selector for sharding | See values.yaml | -| `config.hyperfleetApi.baseUrl` | HyperFleet API base URL | `http://hyperfleet-api:8000` | -| `config.hyperfleetApi.timeout` | API timeout | `5s` | -| `config.messageData` | CloudEvents data payload fields | See values.yaml | +| `config.resource_type` | Resource type to watch | `clusters` | +| `config.poll_interval` | Polling interval | `5s` | +| `config.max_age_not_ready` | Max age for not ready resources | `10s` | +| `config.max_age_ready` | Max age for ready resources | `30m` | +| `config.resource_selector` | Resource selector for sharding | See values.yaml | +| `config.clients.hyperfleet_api.base_url` | HyperFleet API base URL | `http://hyperfleet-api:8000` | +| `config.clients.hyperfleet_api.timeout` | API timeout | `10s` | +| `config.message_data` | CloudEvents data payload fields | See values.yaml | ### Broker Configuration @@ -94,13 +94,13 @@ The following table lists the configurable parameters of the Sentinel chart and | Parameter | Description | Default | |-----------|-------------|---------| | `broker.type` | Broker type (`rabbitmq` or `googlepubsub`) | `rabbitmq` | -| `broker.topic` | Topic name for broker publishing (supports Helm templates) | `{{ .Release.Namespace }}-{{ .Values.config.resourceType }}` | +| `broker.topic` | Topic name for broker publishing (supports Helm templates) | `{{ .Release.Namespace }}-{{ .Values.config.resource_type }}` | | `broker.rabbitmq.url` | RabbitMQ connection URL (format: `amqp://user:pass@host:port/vhost`) | `amqp://sentinel-user:change-me-in-production@rabbitmq.hyperfleet-system.svc.cluster.local:5672/hyperfleet` | -| `broker.rabbitmq.exchangeType` | RabbitMQ exchange type | `topic` | -| `broker.googlepubsub.projectId` | GCP project ID (for Pub/Sub) | `your-gcp-project-id` | -| `broker.googlepubsub.maxOutstandingMessages` | Max outstanding messages (for Pub/Sub) | `1000` | -| `broker.googlepubsub.numGoroutines` | Number of goroutines (for Pub/Sub) | `10` | -| `broker.googlepubsub.createTopicIfMissing` | Auto-create topic if it doesn't exist (for Pub/Sub) | `false` | +| `broker.rabbitmq.exchange_type` | RabbitMQ exchange type | `topic` | +| `broker.googlepubsub.project_id` | GCP project ID (for Pub/Sub) | `your-gcp-project-id` | +| `broker.googlepubsub.max_outstanding_messages` | Max outstanding messages (for Pub/Sub) | `1000` | +| `broker.googlepubsub.num_goroutines` | Number of goroutines (for Pub/Sub) | `10` | +| `broker.googlepubsub.create_topic_if_missing` | Auto-create topic if it doesn't exist (for Pub/Sub) | `false` | | `subscriber.parallelism` | Number of parallel workers for message processing | `1` | | `existingSecret` | Use existing secret for broker credentials | `""` | @@ -135,10 +135,10 @@ broker: rabbitmq: # Connection URL with credentials, host, port, and vhost url: amqp://sentinel-prod:super-secret-password@rabbitmq.messaging.svc.cluster.local:5672/prod - exchangeType: topic + exchange_type: topic config: - resourceSelector: + resource_selector: - label: environment value: production ``` @@ -156,9 +156,9 @@ helm install sentinel ./charts \ broker: type: googlepubsub googlepubsub: - projectId: my-gcp-project - maxOutstandingMessages: 1000 - numGoroutines: 10 + project_id: my-gcp-project + max_outstanding_messages: 1000 + num_goroutines: 10 ``` ```bash @@ -179,7 +179,7 @@ kubectl create secret generic my-broker-credentials \ --from-literal=BROKER_RABBITMQ_URL=amqp://user:pass@rabbitmq.local:5672/ # Note: Google Pub/Sub doesn't require Secret -# projectId is configured in values.yaml (not sensitive) +# project_id is configured in values.yaml (not sensitive) # Authentication uses Workload Identity in GKE ``` @@ -190,7 +190,7 @@ Deploy multiple Sentinel instances watching different resource shards: ```yaml # values-shard-1.yaml config: - resourceSelector: + resource_selector: - label: shard value: "1" ``` @@ -198,7 +198,7 @@ config: ```yaml # values-shard-2.yaml config: - resourceSelector: + resource_selector: - label: shard value: "2" ``` diff --git a/charts/templates/configmap.yaml b/charts/templates/configmap.yaml index 5efa0a3..c8de8b7 100644 --- a/charts/templates/configmap.yaml +++ b/charts/templates/configmap.yaml @@ -6,30 +6,58 @@ metadata: {{- include "sentinel.labels" . | nindent 4 }} data: config.yaml: | + # Sentinel information + sentinel: + name: {{ tpl .Values.config.sentinel.name . }} + + # Debug configuration + debug_config: {{ .Values.config.debug_config }} + + # Logging configuration + log: + level: {{ .Values.config.log.level | quote }} + format: {{ .Values.config.log.format | quote }} + output: {{ .Values.config.log.output | quote }} + + # Client configurations + clients: + # HyperFleet API client + hyperfleet_api: + base_url: {{ .Values.config.clients.hyperfleet_api.base_url }} + version: {{ .Values.config.clients.hyperfleet_api.version | quote }} + timeout: {{ .Values.config.clients.hyperfleet_api.timeout }} + retry_attempts: {{ .Values.config.clients.hyperfleet_api.retry_attempts }} + retry_backoff: {{ .Values.config.clients.hyperfleet_api.retry_backoff | quote }} + base_delay: {{ .Values.config.clients.hyperfleet_api.base_delay }} + max_delay: {{ .Values.config.clients.hyperfleet_api.max_delay }} + {{- if .Values.config.clients.hyperfleet_api.default_headers }} + default_headers: + {{- toYaml .Values.config.clients.hyperfleet_api.default_headers | nindent 10 }} + {{- end }} + + # Broker client + broker: + topic: {{ tpl .Values.broker.topic . | quote }} + # Sentinel configuration - resource_type: {{ .Values.config.resourceType }} - poll_interval: {{ .Values.config.pollInterval }} - max_age_not_ready: {{ .Values.config.maxAgeNotReady }} - max_age_ready: {{ .Values.config.maxAgeReady }} + resource_type: {{ .Values.config.resource_type }} + poll_interval: {{ .Values.config.poll_interval }} + max_age_not_ready: {{ .Values.config.max_age_not_ready }} + max_age_ready: {{ .Values.config.max_age_ready }} - {{- if .Values.config.resourceSelector }} + {{- if .Values.config.resource_selector }} # Resource selector for horizontal sharding resource_selector: - {{- range .Values.config.resourceSelector }} + {{- range .Values.config.resource_selector }} - label: {{ .label }} value: {{ .value | quote }} {{- end }} {{- end }} - # HyperFleet API configuration - hyperfleet_api: - endpoint: {{ .Values.config.hyperfleetApi.baseUrl }} - timeout: {{ .Values.config.hyperfleetApi.timeout }} - - {{- if .Values.config.messageData }} + {{- if .Values.config.message_data }} # CloudEvents data payload configuration message_data: - {{- toYaml .Values.config.messageData | nindent 6 }} + {{- toYaml .Values.config.message_data | nindent 6 }} {{- end }} --- apiVersion: v1 @@ -46,24 +74,24 @@ data: {{- if eq .Values.broker.type "rabbitmq" }} rabbitmq: url: {{ .Values.broker.rabbitmq.url | quote }} - exchange_type: {{ .Values.broker.rabbitmq.exchangeType | default "topic" }} - {{- if .Values.broker.rabbitmq.prefetchCount }} - prefetch_count: {{ .Values.broker.rabbitmq.prefetchCount }} + exchange_type: {{ .Values.broker.rabbitmq.exchange_type | default "topic" }} + {{- if .Values.broker.rabbitmq.prefetch_count }} + prefetch_count: {{ .Values.broker.rabbitmq.prefetch_count }} {{- end }} - {{- if .Values.broker.rabbitmq.prefetchSize }} - prefetch_size: {{ .Values.broker.rabbitmq.prefetchSize }} + {{- if .Values.broker.rabbitmq.prefetch_size }} + prefetch_size: {{ .Values.broker.rabbitmq.prefetch_size }} {{- end }} {{- else if eq .Values.broker.type "googlepubsub" }} googlepubsub: - project_id: {{ .Values.broker.googlepubsub.projectId | quote }} - {{- if .Values.broker.googlepubsub.maxOutstandingMessages }} - max_outstanding_messages: {{ .Values.broker.googlepubsub.maxOutstandingMessages }} + project_id: {{ .Values.broker.googlepubsub.project_id | quote }} + {{- if .Values.broker.googlepubsub.max_outstanding_messages }} + max_outstanding_messages: {{ .Values.broker.googlepubsub.max_outstanding_messages }} {{- end }} - {{- if .Values.broker.googlepubsub.numGoroutines }} - num_goroutines: {{ .Values.broker.googlepubsub.numGoroutines }} + {{- if .Values.broker.googlepubsub.num_goroutines }} + num_goroutines: {{ .Values.broker.googlepubsub.num_goroutines }} {{- end }} - {{- if hasKey .Values.broker.googlepubsub "createTopicIfMissing" }} - create_topic_if_missing: {{ .Values.broker.googlepubsub.createTopicIfMissing }} + {{- if hasKey .Values.broker.googlepubsub "create_topic_if_missing" }} + create_topic_if_missing: {{ .Values.broker.googlepubsub.create_topic_if_missing }} {{- end }} {{- end }} diff --git a/charts/templates/deployment.yaml b/charts/templates/deployment.yaml index 643e137..57b896a 100644 --- a/charts/templates/deployment.yaml +++ b/charts/templates/deployment.yaml @@ -64,7 +64,7 @@ spec: - name: BROKER_CONFIG_FILE value: /etc/sentinel/broker.yaml # Topic name for broker publishing - - name: BROKER_TOPIC + - name: HYPERFLEET_BROKER_TOPIC value: {{ tpl .Values.broker.topic . | quote }} # Broker credentials can be overridden via environment variables from Secret {{- if eq .Values.broker.type "rabbitmq" }} diff --git a/charts/values.yaml b/charts/values.yaml index 8d7c0ad..ac43d55 100644 --- a/charts/values.yaml +++ b/charts/values.yaml @@ -75,32 +75,63 @@ podDisruptionBudget: # Sentinel configuration config: + # Sentinel information + sentinel: + # Sentinel component name - will be templated with shard value if resource selector is used + # Example: hyperfleet-sentinel-clusters-shard-1 + name: hyperfleet-sentinel-{{ .Values.config.resource_type }} + + # Debug configuration - log merged config on startup + debug_config: false + + # Logging configuration + log: + level: info + format: json + output: stdout + + # Client configurations + clients: + # HyperFleet API client configuration + hyperfleet_api: + # Use in-cluster service name for API endpoint + base_url: http://hyperfleet-api:8000 + version: v1 + timeout: 10s + retry_attempts: 3 + retry_backoff: exponential + base_delay: 1s + max_delay: 30s + # Optional default headers + # default_headers: + # X-Custom-Header: "value" + + # Broker configuration + # Note: broker implementation details (RabbitMQ URL, etc.) are in broker section below + broker: + # Topic will be set from broker.topic template below + topic: "" + # Resource type to watch (clusters, nodepools) - resourceType: clusters + resource_type: clusters # How often to poll the API for resource updates - pollInterval: 5s + poll_interval: 5s # Max age interval for resources not ready - maxAgeNotReady: 10s + max_age_not_ready: 10s # Max age interval for ready resources - maxAgeReady: 30m + max_age_ready: 30m # Resource selector for horizontal sharding # Deploy multiple Sentinel instances with different shard values - resourceSelector: + resource_selector: - label: shard value: "1" - # HyperFleet API configuration - hyperfleetApi: - # Use in-cluster service name for API endpoint - baseUrl: http://hyperfleet-api:8000 - timeout: 5s - # CloudEvents data payload configuration - messageData: + message_data: id: resource.id kind: resource.kind href: resource.href @@ -125,25 +156,25 @@ broker: # Default uses Helm template: {namespace}-{resourceType} for multi-tenant isolation # Example result: hyperfleet-dev-clusters, hyperfleet-dev-nodepools # Override with a static value if needed: topic: "my-custom-topic" - topic: '{{ .Release.Namespace }}-{{ .Values.config.resourceType }}' + topic: '{{ .Release.Namespace }}-{{ .Values.config.resource_type }}' # RabbitMQ configuration # Uses BROKER_RABBITMQ_URL environment variable (single connection string) rabbitmq: # Connection URL format: amqp://user:password@host:port/vhost url: amqp://:@rabbitmq.hyperfleet-system.svc.cluster.local:5672/hyperfleet - exchangeType: topic + exchange_type: topic # Google Pub/Sub configuration (alternative to RabbitMQ) - # projectId is written to broker.yaml ConfigMap (not Secret - it's not sensitive) + # project_id is written to broker.yaml ConfigMap (not Secret - it's not sensitive) googlepubsub: #REQUIRED: Replace with your actual GCP project ID - projectId: your-gcp-project-id - maxOutstandingMessages: 1000 - numGoroutines: 10 + project_id: your-gcp-project-id + max_outstanding_messages: 1000 + num_goroutines: 10 # Auto-creation flags (default: false - manual creation required) # Set to true to automatically create topics/subscriptions if they don't exist - createTopicIfMissing: false + create_topic_if_missing: false # Subscriber configuration (optional) subscriber: diff --git a/cmd/sentinel/main.go b/cmd/sentinel/main.go index 3149d4c..192ce1e 100755 --- a/cmd/sentinel/main.go +++ b/cmd/sentinel/main.go @@ -13,6 +13,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/spf13/cobra" + "github.com/spf13/pflag" + "gopkg.in/yaml.v3" "github.com/openshift-hyperfleet/hyperfleet-broker/broker" "github.com/openshift-hyperfleet/hyperfleet-sentinel/internal/client" @@ -40,6 +42,7 @@ reconciliation events to a message broker based on configurable max age interval } rootCmd.AddCommand(newServeCommand()) + rootCmd.AddCommand(newConfigDumpCommand()) if err := rootCmd.Execute(); err != nil { // Print error to stderr since SilenceErrors is true and logging may not be initialized @@ -55,9 +58,6 @@ reconciliation events to a message broker based on configurable max age interval func newServeCommand() *cobra.Command { var ( configFile string - logLevel string - logFormat string - logOutput string healthBindAddress string metricsBindAddress string ) @@ -69,81 +69,122 @@ func newServeCommand() *cobra.Command { SilenceUsage: true, // Don't print usage on error SilenceErrors: true, // Don't print errors - we handle logging ourselves RunE: func(cmd *cobra.Command, args []string) error { - // Initialize logging configuration - // Precedence: flags → environment variables → defaults - logCfg, err := initLogging(logLevel, logFormat, logOutput) + // Load configuration with CLI flags, env vars, and file + // Precedence: flags → environment variables → config file → defaults + cfg, err := config.LoadConfig(configFile, cmd.Flags()) if err != nil { - return fmt.Errorf("failed to initialize logging: %w", err) + return err } - // Load and validate configuration from YAML and env vars - cfg, err := config.LoadConfig(configFile) + // Initialize logging with merged configuration + logCfg, err := initLogging(&cfg.Log) if err != nil { - return err + return fmt.Errorf("failed to initialize logging: %w", err) } + return runServe(cfg, logCfg, healthBindAddress, metricsBindAddress) }, } - // Add --config flag for YAML file path + // Config file path cmd.Flags().StringVarP(&configFile, "config", "c", "", "Path to configuration file (YAML)") - // Add logging flags per HyperFleet logging specification - cmd.Flags().StringVar(&logLevel, "log-level", "", "Log level: debug, info, warn, error (default: info)") - cmd.Flags().StringVar(&logFormat, "log-format", "", "Log format: text, json (default: text)") - cmd.Flags().StringVar(&logOutput, "log-output", "", "Log output: stdout, stderr (default: stdout)") - - // Server bind address flags (consistent with hyperfleet-api) + // Server bind address flags cmd.Flags().StringVar(&healthBindAddress, "health-server-bindaddress", ":8080", "Health server bind address") cmd.Flags().StringVar(&metricsBindAddress, "metrics-server-bindaddress", ":9090", "Metrics server bind address") + // Add config override flags + addConfigOverrideFlags(cmd) + return cmd } -// getConfigValue returns the flag value if set, otherwise falls back to the environment variable. -// This implements the precedence: flags → environment variables → defaults -func getConfigValue(flag, envVar string) string { - if flag != "" { - return flag +func newConfigDumpCommand() *cobra.Command { + var configFile string + + cmd := &cobra.Command{ + Use: "config-dump", + Short: "Load and print the merged sentinel configuration as YAML", + Long: `Load the sentinel configuration from config file, environment variables, +and CLI flags, then print the merged result as YAML to stdout. +Exits with code 0 on success, non-zero on error. + +Priority order (lowest to highest): config file < env vars < CLI flags`, + SilenceUsage: true, + SilenceErrors: true, + RunE: func(cmd *cobra.Command, args []string) error { + return runConfigDump(configFile, cmd.Flags()) + }, } - return os.Getenv(envVar) + + cmd.Flags().StringVarP(&configFile, "config", "c", "", "Path to configuration file (YAML)") + addConfigOverrideFlags(cmd) + + return cmd } -// initLogging initializes the logging configuration following the precedence: -// flags → environment variables → defaults -func initLogging(flagLevel, flagFormat, flagOutput string) (*logger.LogConfig, error) { +// addConfigOverrideFlags adds CLI flags for overriding configuration values +func addConfigOverrideFlags(cmd *cobra.Command) { + // General + cmd.Flags().Bool("debug-config", false, "Log the full merged configuration after load. Env: HYPERFLEET_DEBUG_CONFIG") + + // Sentinel + cmd.Flags().String("sentinel-name", "", "Sentinel component name. Env: HYPERFLEET_SENTINEL_NAME") + + cmd.Flags().String("log-level", "", "Log level: debug, info, warn, error. Env: HYPERFLEET_LOG_LEVEL") + cmd.Flags().String("log-format", "", "Log format: text, json. Env: HYPERFLEET_LOG_FORMAT") + cmd.Flags().String("log-output", "", "Log output: stdout, stderr. Env: HYPERFLEET_LOG_OUTPUT") + + // HyperFleet API + cmd.Flags().String("hyperfleet-api-base-url", "", "HyperFleet API base URL. Env: HYPERFLEET_API_BASE_URL") + cmd.Flags().String("hyperfleet-api-version", "", "HyperFleet API version. Env: HYPERFLEET_API_VERSION") + cmd.Flags().String("hyperfleet-api-timeout", "", "HyperFleet API timeout (e.g., 10s). Env: HYPERFLEET_API_TIMEOUT") + cmd.Flags().Int("hyperfleet-api-retry-attempts", 0, "HyperFleet API retry attempts. Env: HYPERFLEET_API_RETRY_ATTEMPTS") + cmd.Flags().String("hyperfleet-api-retry-backoff", "", "HyperFleet API retry backoff strategy. Env: HYPERFLEET_API_RETRY_BACKOFF") + cmd.Flags().String("hyperfleet-api-base-delay", "", "HyperFleet API base retry delay. Env: HYPERFLEET_API_BASE_DELAY") + cmd.Flags().String("hyperfleet-api-max-delay", "", "HyperFleet API max retry delay. Env: HYPERFLEET_API_MAX_DELAY") + + // Broker + cmd.Flags().String("broker-topic", "", "Broker topic. Env: HYPERFLEET_BROKER_TOPIC") + + // Sentinel-specific + cmd.Flags().String("resource-type", "", "Resource type to watch (clusters, nodepools). Env: HYPERFLEET_RESOURCE_TYPE") + cmd.Flags().String("poll-interval", "", "Poll interval (e.g., 5s). Env: HYPERFLEET_POLL_INTERVAL") + cmd.Flags().String("max-age-not-ready", "", "Max age for not-ready resources. Env: HYPERFLEET_MAX_AGE_NOT_READY") + cmd.Flags().String("max-age-ready", "", "Max age for ready resources. Env: HYPERFLEET_MAX_AGE_READY") +} + +// initLogging initializes the logging configuration from the already-merged LogConfig. +// Precedence (config file < env vars < CLI flags) is resolved by LoadConfig via viper. +func initLogging(logCfg *config.LogConfig) (*logger.LogConfig, error) { cfg := logger.DefaultConfig() cfg.Version = version cfg.Component = "sentinel" - // Apply log level - if levelStr := getConfigValue(flagLevel, "LOG_LEVEL"); levelStr != "" { - level, err := logger.ParseLogLevel(levelStr) + if logCfg.Level != "" { + parsed, err := logger.ParseLogLevel(logCfg.Level) if err != nil { return nil, err } - cfg.Level = level + cfg.Level = parsed } - // Apply log format - if formatStr := getConfigValue(flagFormat, "LOG_FORMAT"); formatStr != "" { - format, err := logger.ParseLogFormat(formatStr) + if logCfg.Format != "" { + parsed, err := logger.ParseLogFormat(logCfg.Format) if err != nil { return nil, err } - cfg.Format = format + cfg.Format = parsed } - // Apply log output - if outputStr := getConfigValue(flagOutput, "LOG_OUTPUT"); outputStr != "" { - output, err := logger.ParseLogOutput(outputStr) + if logCfg.Output != "" { + parsed, err := logger.ParseLogOutput(logCfg.Output) if err != nil { return nil, err } - cfg.Output = output + cfg.Output = parsed } - // Set global config so all loggers use the same configuration logger.SetGlobalConfig(cfg) return cfg, nil @@ -159,13 +200,23 @@ func runServe(cfg *config.SentinelConfig, logCfg *logger.LogConfig, healthBindAd Extra("log_format", logCfg.Format.String()). Info(ctx, "Starting HyperFleet Sentinel") + // Log full merged configuration if debug_config is enabled; sensitive values are redacted + if cfg.DebugConfig { + data, err := yaml.Marshal(cfg.RedactedCopy()) + if err != nil { + log.Warnf(ctx, "Failed to marshal config for debug logging: %v", err) + } else { + log.Infof(ctx, "Debug config enabled - merged configuration:\n%s", string(data)) + } + } + // Initialize Prometheus metrics registry registry := prometheus.NewRegistry() // Register metrics once (uses sync.Once internally) metrics.NewSentinelMetrics(registry, version) // Initialize components - hyperfleetClient, err := client.NewHyperFleetClient(cfg.HyperFleetAPI.Endpoint, cfg.HyperFleetAPI.Timeout) + hyperfleetClient, err := client.NewHyperFleetClient(cfg.Clients.HyperfleetAPI.BaseURL, cfg.Clients.HyperfleetAPI.Timeout, cfg.Sentinel.Name, version) if err != nil { log.Errorf(ctx, "Failed to initialize OpenAPI client: %v", err) return fmt.Errorf("failed to initialize OpenAPI client: %w", err) @@ -292,3 +343,18 @@ func runServe(cfg *config.SentinelConfig, logCfg *logger.LogConfig, healthBindAd log.Info(ctx, "Sentinel stopped gracefully") return nil } + +// runConfigDump loads the full sentinel configuration and prints it as YAML to stdout. +func runConfigDump(configFile string, flags *pflag.FlagSet) error { + cfg, err := config.LoadConfig(configFile, flags) + if err != nil { + return err + } + + data, err := yaml.Marshal(cfg) + if err != nil { + return fmt.Errorf("failed to marshal config: %w", err) + } + fmt.Print(string(data)) + return nil +} diff --git a/configs/dev-example.yaml b/configs/dev-example.yaml index 5111b3d..0f1fa3c 100644 --- a/configs/dev-example.yaml +++ b/configs/dev-example.yaml @@ -10,34 +10,56 @@ # export BROKER_RABBITMQ_URL=amqp://guest:guest@localhost:5672/ # ./bin/sentinel serve --config=configs/dev-example.yaml # -# For multi-tenant isolation (optional): -# export BROKER_TOPIC_PREFIX=my-namespace -# Topics will be named: my-namespace-Cluster, my-namespace-NodePool -# # Or configure via broker.yaml (see broker.yaml in project root) -# Resource type to watch - must be one of: clusters, nodepools. +# Sentinel information +sentinel: + name: hyperfleet-sentinel-clusters + +# Debug configuration - log merged config on startup +debug_config: false + +# Logging configuration (can be overridden by HYPERFLEET_LOG_* env vars or --log-* flags) +log: + level: "info" + format: "text" + output: "stdout" + +# Client configurations +clients: + # HyperFleet API client configuration + hyperfleet_api: + # Local development API endpoint + # Adjust this to match your local HyperFleet API instance + base_url: http://localhost:8000 + version: "v1" + timeout: 10s + retry_attempts: 3 + retry_backoff: "exponential" + base_delay: "1s" + max_delay: "30s" + # Optional default headers + # default_headers: + # X-Custom-Header: "value" + + # Broker configuration + # Note: broker implementation details (RabbitMQ URL, etc.) are in broker.yaml + broker: + topic: "" # Can be overridden by HYPERFLEET_BROKER_TOPIC + +# Resource type to watch - must be one of: clusters, nodepools resource_type: clusters -# Faster polling for development - see changes quickly. +# Faster polling for development - see changes quickly poll_interval: 2s -# Shorter max age intervals for development. +# Shorter max age intervals for development max_age_not_ready: 5s max_age_ready: 2m -# No resource selector - watch all resources in development. +# No resource selector - watch all resources in development # resource_selector: [] -# HyperFleet API configuration. -hyperfleet_api: - # Local development API endpoint. - # Adjust this to match your local HyperFleet API instance. - endpoint: http://localhost:8000 - - # Shorter timeout for faster failure detection in dev. - timeout: 10s - # CloudEvent payload configuration. # # Each property value is a CEL expression evaluated against "resource" and "reason". diff --git a/configs/gcp-pubsub-example.yaml b/configs/gcp-pubsub-example.yaml index 2ebf6fc..7a14811 100644 --- a/configs/gcp-pubsub-example.yaml +++ b/configs/gcp-pubsub-example.yaml @@ -11,39 +11,62 @@ # # For more details, see: https://github.com/openshift-hyperfleet/hyperfleet-broker -# Resource type to watch - must be one of: clusters, nodepools. +# Sentinel information +sentinel: + name: hyperfleet-sentinel-clusters-shard-1 + +# Debug configuration - log merged config on startup +debug_config: false + +# Logging configuration (can be overridden by LOG_* env vars or --log-* flags) +log: + level: "info" + format: "json" # Use JSON format for production + output: "stdout" + +# Client configurations +clients: + # HyperFleet API client configuration + hyperfleet_api: + # API base URL (required) + base_url: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 + version: "v1" + timeout: 10s + retry_attempts: 3 + retry_backoff: "exponential" + base_delay: "1s" + max_delay: "30s" + + # Broker configuration + # Note: broker implementation details (GCP project ID, etc.) are in broker.yaml + broker: + topic: "" # Can be overridden by HYPERFLEET_BROKER_TOPIC + +# Resource type to watch - must be one of: clusters, nodepools resource_type: clusters -# How often to poll the HyperFleet API for resource updates. -# Accepts Go duration format: ns, us/µs, ms, s, m, h. +# How often to poll the HyperFleet API for resource updates +# Accepts Go duration format: ns, us/µs, ms, s, m, h poll_interval: 5s -# Max age interval for resources that are not ready. -# Resources in transitional states are re-checked more frequently. +# Max age interval for resources that are not ready +# Resources in transitional states are re-checked more frequently max_age_not_ready: 10s -# Max age interval for resources that are ready and stable. -# Stable resources are checked less frequently to reduce API load. +# Max age interval for resources that are ready and stable +# Stable resources are checked less frequently to reduce API load max_age_ready: 30m -# Resource selector (optional) - filter resources by labels. -# If empty or omitted, all resources of the specified type are watched. -# This enables horizontal scaling by having multiple Sentinels watch different resource subsets. -# Multiple selectors use AND logic (all labels must match). +# Resource selector (optional) - filter resources by labels +# If empty or omitted, all resources of the specified type are watched +# This enables horizontal scaling by having multiple Sentinels watch different resource subsets +# Multiple selectors use AND logic (all labels must match) resource_selector: - label: shard value: "1" - label: region value: us-east-1 -# HyperFleet API configuration. -hyperfleet_api: - # API base URL (required). - endpoint: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 - - # Request timeout for API calls. - timeout: 5s - # CloudEvent payload configuration. # # Each property value is a CEL expression evaluated against "resource" and "reason". diff --git a/configs/rabbitmq-example.yaml b/configs/rabbitmq-example.yaml index d194330..b13154d 100644 --- a/configs/rabbitmq-example.yaml +++ b/configs/rabbitmq-example.yaml @@ -11,39 +11,62 @@ # # For more details, see: https://github.com/openshift-hyperfleet/hyperfleet-broker -# Resource type to watch - must be one of: clusters, nodepools. +# Sentinel information +sentinel: + name: hyperfleet-sentinel-clusters-shard-1 + +# Debug configuration - log merged config on startup +debug_config: false + +# Logging configuration (can be overridden by HYPERFLEET_LOG_* env vars or --log-* flags) +log: + level: "info" + format: "json" # Use JSON format for production + output: "stdout" + +# Client configurations +clients: + # HyperFleet API client configuration + hyperfleet_api: + # API base URL (required) + base_url: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 + version: "v1" + timeout: 10s + retry_attempts: 3 + retry_backoff: "exponential" + base_delay: "1s" + max_delay: "30s" + + # Broker configuration + # Note: broker implementation details (RabbitMQ URL, etc.) are in broker.yaml + broker: + topic: "" # Can be overridden by HYPERFLEET_BROKER_TOPIC + +# Resource type to watch - must be one of: clusters, nodepools resource_type: clusters -# How often to poll the HyperFleet API for resource updates. -# Accepts Go duration format: ns, us/µs, ms, s, m, h. +# How often to poll the HyperFleet API for resource updates +# Accepts Go duration format: ns, us/µs, ms, s, m, h poll_interval: 5s -# Max age interval for resources that are not ready. -# Resources in transitional states are re-checked more frequently. +# Max age interval for resources that are not ready +# Resources in transitional states are re-checked more frequently max_age_not_ready: 10s -# Max age interval for resources that are ready and stable. -# Stable resources are checked less frequently to reduce API load. +# Max age interval for resources that are ready and stable +# Stable resources are checked less frequently to reduce API load max_age_ready: 30m -# Resource selector (optional) - filter resources by labels. -# If empty or omitted, all resources of the specified type are watched. -# This enables horizontal scaling by having multiple Sentinels watch different resource subsets. -# Multiple selectors use AND logic (all labels must match). +# Resource selector (optional) - filter resources by labels +# If empty or omitted, all resources of the specified type are watched +# This enables horizontal scaling by having multiple Sentinels watch different resource subsets +# Multiple selectors use AND logic (all labels must match) resource_selector: - label: shard value: "1" - label: datacenter value: dc-east -# HyperFleet API configuration. -hyperfleet_api: - # API base URL (required). - endpoint: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 - - # Request timeout for API calls. - timeout: 5s - # CloudEvent payload configuration. # # Each property value is a CEL expression evaluated against "resource" and "reason". diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..959da30 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,203 @@ +# Sentinel Configuration Reference + +This document describes the Sentinel configuration options and how to set them +in three formats: YAML, command-line flags, and environment variables. + +Overrides are applied in this order: CLI flags > environment variables > YAML file > defaults. + +## Config file location + +You can point the sentinel at a config file with: + +- CLI: `--config` (or `-c`) +- Required for startup + +## YAML options (SentinelConfig) + +All fields use **snake_case** naming. + +```yaml +sentinel: + name: hyperfleet-sentinel-clusters + +debug_config: false + +log: + level: "info" + format: "text" + output: "stdout" + +clients: + hyperfleet_api: + base_url: "http://hyperfleet-api:8000" + version: "v1" + timeout: "10s" + retry_attempts: 3 + retry_backoff: "exponential" + base_delay: "1s" + max_delay: "30s" + default_headers: + X-Example: "value" + broker: + topic: "" + +resource_type: "clusters" +poll_interval: "5s" +max_age_not_ready: "10s" +max_age_ready: "30m" + +resource_selector: + - label: shard + value: "1" + - label: region + value: us-east-1 + +message_data: + id: "resource.id" + kind: "resource.kind" +``` + +### Top-level fields + +- `sentinel.name` (string, required): Sentinel component name/identifier. +- `debug_config` (bool, optional): Log the merged config after load. Default: `false`. + +### Logging (`log`) + +- `log.level` (string, optional): Log level (`debug`, `info`, `warn`, `error`). Default: `info`. +- `log.format` (string, optional): Log format (`text`, `json`). Default: `text`. +- `log.output` (string, optional): Log output destination (`stdout`, `stderr`). Default: `stdout`. + +### HyperFleet API client (`clients.hyperfleet_api`) + +- `base_url` (string, required): Base URL for HyperFleet API requests. +- `version` (string, optional): API version. Default: `v1`. +- `timeout` (duration string, optional): HTTP client timeout. Default: `10s`. +- `retry_attempts` (int, optional): Retry attempts. Default: `3`. +- `retry_backoff` (string, optional): Backoff strategy (`exponential`, `linear`, `constant`). Default: `exponential`. +- `base_delay` (duration string, optional): Initial retry delay. Default: `1s`. +- `max_delay` (duration string, optional): Maximum retry delay. Default: `30s`. +- `default_headers` (map[string]string, optional): Headers added to all API requests. + +### Broker (`clients.broker`) + +- `topic` (string, optional): Broker topic for publishing events. + +Note: Broker implementation details (RabbitMQ URL, GCP project ID, etc.) are configured +separately via `broker.yaml` or the hyperfleet-broker library environment variables. + +### Sentinel-specific + +- `resource_type` (string, required): Resource type to watch (`clusters`, `nodepools`). +- `poll_interval` (duration string, required): How often to poll the API. Default: `5s`. +- `max_age_not_ready` (duration string, required): Max age for not-ready resources. Default: `10s`. +- `max_age_ready` (duration string, required): Max age for ready resources. Default: `30m`. +- `resource_selector` (list, optional): Label selectors to filter resources. Empty means watch all. +- `message_data` (map, required): CEL expressions defining the CloudEvent payload structure. + +## Command-line parameters + +The following CLI flags override YAML values: + +**General** + +- `--debug-config` -> `debug_config` +- `--sentinel-name` -> `sentinel.name` +- `--log-level` -> `log.level` +- `--log-format` -> `log.format` +- `--log-output` -> `log.output` + +**HyperFleet API** + +- `--hyperfleet-api-base-url` -> `clients.hyperfleet_api.base_url` +- `--hyperfleet-api-version` -> `clients.hyperfleet_api.version` +- `--hyperfleet-api-timeout` -> `clients.hyperfleet_api.timeout` +- `--hyperfleet-api-retry-attempts` -> `clients.hyperfleet_api.retry_attempts` +- `--hyperfleet-api-retry-backoff` -> `clients.hyperfleet_api.retry_backoff` +- `--hyperfleet-api-base-delay` -> `clients.hyperfleet_api.base_delay` +- `--hyperfleet-api-max-delay` -> `clients.hyperfleet_api.max_delay` + +**Broker** + +- `--broker-topic` -> `clients.broker.topic` + +**Sentinel** + +- `--resource-type` -> `resource_type` +- `--poll-interval` -> `poll_interval` +- `--max-age-not-ready` -> `max_age_not_ready` +- `--max-age-ready` -> `max_age_ready` + +## Environment variables + +All deployment overrides use the `HYPERFLEET_` prefix unless noted. + +**General** + +- `HYPERFLEET_DEBUG_CONFIG` -> `debug_config` +- `HYPERFLEET_SENTINEL_NAME` -> `sentinel.name` +- `HYPERFLEET_LOG_LEVEL` -> `log.level` +- `HYPERFLEET_LOG_FORMAT` -> `log.format` +- `HYPERFLEET_LOG_OUTPUT` -> `log.output` + +**HyperFleet API** + +- `HYPERFLEET_API_BASE_URL` -> `clients.hyperfleet_api.base_url` +- `HYPERFLEET_API_VERSION` -> `clients.hyperfleet_api.version` +- `HYPERFLEET_API_TIMEOUT` -> `clients.hyperfleet_api.timeout` +- `HYPERFLEET_API_RETRY_ATTEMPTS` -> `clients.hyperfleet_api.retry_attempts` +- `HYPERFLEET_API_RETRY_BACKOFF` -> `clients.hyperfleet_api.retry_backoff` +- `HYPERFLEET_API_BASE_DELAY` -> `clients.hyperfleet_api.base_delay` +- `HYPERFLEET_API_MAX_DELAY` -> `clients.hyperfleet_api.max_delay` + +**Broker** + +- `HYPERFLEET_BROKER_TOPIC` -> `clients.broker.topic` + + +**Sentinel** + +- `HYPERFLEET_RESOURCE_TYPE` -> `resource_type` +- `HYPERFLEET_POLL_INTERVAL` -> `poll_interval` +- `HYPERFLEET_MAX_AGE_NOT_READY` -> `max_age_not_ready` +- `HYPERFLEET_MAX_AGE_READY` -> `max_age_ready` + +## Examples + +### Override API endpoint via environment variable + +```bash +export HYPERFLEET_API_BASE_URL=http://localhost:8080 +./bin/sentinel serve --config=config.yaml +``` + +### Override log level via CLI flag + +```bash +./bin/sentinel serve --config=config.yaml --log-level=debug +``` + +### Override multiple settings + +```bash +export HYPERFLEET_API_BASE_URL=http://api-staging:8000 +export HYPERFLEET_LOG_LEVEL=debug +export HYPERFLEET_LOG_FORMAT=json +./bin/sentinel serve --config=config.yaml --poll-interval=2s +``` + +### Precedence example + +Given this config file: +```yaml +log: + level: "info" +``` + +And these overrides: +```bash +export HYPERFLEET_LOG_LEVEL=warn +./bin/sentinel serve --config=config.yaml --log-level=debug +``` + +The final log level will be `debug` (CLI flag wins over env var and config file). diff --git a/docs/multi-instance-deployment.md b/docs/multi-instance-deployment.md index 75e850e..a44bff1 100644 --- a/docs/multi-instance-deployment.md +++ b/docs/multi-instance-deployment.md @@ -12,20 +12,20 @@ Deploy multiple Sentinel instances by installing the Helm chart multiple times w # Instance 1: Watch clusters in us-east region helm install sentinel-us-east ./charts \ --namespace hyperfleet-system \ - --set config.resourceSelector[0].label=region \ - --set config.resourceSelector[0].value=us-east + --set config.resource_selector[0].label=region \ + --set config.resource_selector[0].value=us-east # Instance 2: Watch clusters in us-west region helm install sentinel-us-west ./charts \ --namespace hyperfleet-system \ - --set config.resourceSelector[0].label=region \ - --set config.resourceSelector[0].value=us-west + --set config.resource_selector[0].label=region \ + --set config.resource_selector[0].value=us-west # Instance 3: Watch clusters in eu-central region helm install sentinel-eu-central ./charts \ --namespace hyperfleet-system \ - --set config.resourceSelector[0].label=region \ - --set config.resourceSelector[0].value=eu-central + --set config.resource_selector[0].label=region \ + --set config.resource_selector[0].value=eu-central ``` ## Using Values Files for Complex Configurations @@ -35,8 +35,8 @@ For more complex setups, create separate values files for each instance: **values-us-east.yaml:** ```yaml config: - resourceType: clusters - resourceSelector: + resource_type: clusters + resource_selector: - label: region value: us-east - label: environment @@ -46,8 +46,8 @@ config: **values-us-west.yaml:** ```yaml config: - resourceType: clusters - resourceSelector: + resource_type: clusters + resource_selector: - label: region value: us-west - label: environment @@ -84,15 +84,15 @@ When deploying multiple Sentinel instances, consider using separate broker topic # Instance for us-east with dedicated topic helm install sentinel-us-east ./charts \ --namespace hyperfleet-system \ - --set config.resourceSelector[0].label=region \ - --set config.resourceSelector[0].value=us-east \ + --set config.resource_selector[0].label=region \ + --set config.resource_selector[0].value=us-east \ --set broker.topic=hyperfleet-clusters-us-east # Instance for us-west with dedicated topic helm install sentinel-us-west ./charts \ --namespace hyperfleet-system \ - --set config.resourceSelector[0].label=region \ - --set config.resourceSelector[0].value=us-west \ + --set config.resource_selector[0].label=region \ + --set config.resource_selector[0].value=us-west \ --set broker.topic=hyperfleet-clusters-us-west ``` @@ -104,8 +104,8 @@ For initial deployments, start with a **single Sentinel instance** watching all ```yaml config: - resourceType: clusters - resourceSelector: [] # Empty = watch all resources + resource_type: clusters + resource_selector: [] # Empty = watch all resources ``` Scale to multiple instances as your cluster count grows or when you need regional isolation. diff --git a/docs/running-sentinel.md b/docs/running-sentinel.md index 2ef753c..90ccaeb 100644 --- a/docs/running-sentinel.md +++ b/docs/running-sentinel.md @@ -100,6 +100,7 @@ export BROKER_RABBITMQ_URL="amqp://guest:guest@localhost:5672/" **For Google Pub/Sub Emulator** (requires `broker.yaml` modification): 1. Edit `broker.yaml` to use `googlepubsub`: + ```yaml broker: type: googlepubsub @@ -108,6 +109,7 @@ export BROKER_RABBITMQ_URL="amqp://guest:guest@localhost:5672/" ``` 2. Set the emulator host (required for the Google SDK): + ```bash export PUBSUB_EMULATOR_HOST=localhost:8085 ``` @@ -118,10 +120,10 @@ Set the topic name for event publishing: ```bash # For clusters -export BROKER_TOPIC=hyperfleet-dev-${USER}-clusters +export HYPERFLEET_BROKER_TOPIC=hyperfleet-dev-${USER}-clusters # For nodepools -export BROKER_TOPIC=hyperfleet-dev-${USER}-nodepools +export HYPERFLEET_BROKER_TOPIC=hyperfleet-dev-${USER}-nodepools ``` This sets the full topic name where events will be published (e.g., `hyperfleet-dev-rafael-clusters`). See [Naming Strategy](https://github.com/openshift-hyperfleet/architecture/blob/main/hyperfleet/components/sentinel/sentinel-naming-strategy.md) for details. @@ -148,16 +150,16 @@ make build BROKER_CONFIG_FILE=broker.yaml go run ./cmd/sentinel serve --config=configs/dev-example.yaml # With environment variables for logging -LOG_LEVEL=debug LOG_FORMAT=json go run ./cmd/sentinel serve --config=configs/dev-example.yaml +HYPERFLEET_LOG_LEVEL=debug HYPERFLEET_LOG_FORMAT=json go run ./cmd/sentinel serve --config=configs/dev-example.yaml ``` #### Logging Configuration | Flag | Environment Variable | Values | Default | |------|---------------------|--------|---------| -| `--log-level` | `LOG_LEVEL` | debug, info, warn, error | info | -| `--log-format` | `LOG_FORMAT` | text, json | text | -| `--log-output` | `LOG_OUTPUT` | stdout, stderr | stdout | +| `--log-level` | `HYPERFLEET_LOG_LEVEL` | debug, info, warn, error | info | +| `--log-format` | `HYPERFLEET_LOG_FORMAT` | text, json | text | +| `--log-output` | `HYPERFLEET_LOG_OUTPUT` | stdout, stderr | stdout | **Precedence**: flags → environment variables → defaults @@ -222,7 +224,7 @@ Watch console output for startup and broker connection messages. 2025-12-17T14:07:30.137382Z INFO [sentinel] [dev] [hostname] Starting HyperFleet Sentinel ``` -> **Note**: Log format can be configured via `--log-format` flag or `LOG_FORMAT` environment variable. Use `json` for production (structured logging) and `text` for development (human-readable). +> **Note**: Log format can be configured via `--log-format` flag or `HYPERFLEET_LOG_FORMAT` environment variable. Use `json` for production (structured logging) and `text` for development (human-readable). **For RabbitMQ**, you should also see the broker connection log: @@ -282,6 +284,7 @@ gcloud container clusters get-credentials hyperfleet-dev --zone=us-central1-a -- ``` **Usage guidelines:** + - For personal work, create a namespace named after yourself to isolate resources - For team collaboration, use a designated namespace to separate resources among members @@ -372,7 +375,7 @@ helm upgrade --install sentinel-test ./charts \ --set image.repository=${USER}/hyperfleet-sentinel \ --set image.tag=dev-$(git rev-parse --short HEAD) \ --set broker.type=googlepubsub \ - --set broker.googlepubsub.projectId=${GCP_PROJECT} \ + --set broker.googlepubsub.project_id=${GCP_PROJECT} \ --set monitoring.podMonitoring.enabled=true ``` @@ -387,7 +390,7 @@ helm upgrade --install sentinel-test ./charts \ --set image.repository=gcr.io/${GCP_PROJECT}/sentinel \ --set image.tag=${IMAGE_TAG} \ --set broker.type=googlepubsub \ - --set broker.googlepubsub.projectId=${GCP_PROJECT} \ + --set broker.googlepubsub.project_id=${GCP_PROJECT} \ --set monitoring.podMonitoring.enabled=true # For Prometheus Operator environments (OpenShift, vanilla Kubernetes): @@ -397,7 +400,7 @@ helm upgrade --install sentinel-test ./charts \ --set image.repository=gcr.io/${GCP_PROJECT}/sentinel \ --set image.tag=${IMAGE_TAG} \ --set broker.type=googlepubsub \ - --set broker.googlepubsub.projectId=${GCP_PROJECT} \ + --set broker.googlepubsub.project_id=${GCP_PROJECT} \ --set monitoring.serviceMonitor.enabled=true \ --set monitoring.serviceMonitor.additionalLabels.release=prometheus ``` @@ -538,6 +541,7 @@ podman build --platform linux/amd64 -t gcr.io/${GCP_PROJECT}/sentinel:${IMAGE_TA **Cause**: Broker is not running or `broker.yaml` is configured for the wrong broker type **Solution**: + 1. Verify the broker is running (RabbitMQ or Pub/Sub emulator) 2. Ensure `broker.yaml` has the correct `type` (rabbitmq or googlepubsub) 3. For Pub/Sub emulator, ensure `PUBSUB_EMULATOR_HOST` is set @@ -550,15 +554,21 @@ podman build --platform linux/amd64 -t gcr.io/${GCP_PROJECT}/sentinel:${IMAGE_TA **Cause**: PodMonitoring not configured correctly or GMP collector not scraping **Solution**: + 1. Verify PodMonitoring is created: + ```bash kubectl get podmonitoring -n ${NAMESPACE} ``` + 2. Check GMP collector logs: + ```bash kubectl logs -n gmp-system -l app.kubernetes.io/name=collector ``` + 3. Ensure the metrics endpoint is accessible: + ```bash kubectl port-forward -n ${NAMESPACE} svc/sentinel-test 8080:8080 curl http://localhost:8080/metrics @@ -571,6 +581,7 @@ podman build --platform linux/amd64 -t gcr.io/${GCP_PROJECT}/sentinel:${IMAGE_TA **Cause**: Broker credentials must be set via environment variables, not ConfigMap **Solution**: Use `--set` flags or a values file to set broker credentials: + ```bash --set broker.rabbitmq.url="amqp://user:pass@host:5672/" ``` @@ -580,12 +591,14 @@ podman build --platform linux/amd64 -t gcr.io/${GCP_PROJECT}/sentinel:${IMAGE_TA **Problem**: Sentinel cannot connect to HyperFleet API **Solution**: + 1. Verify the API endpoint is correct in your config 2. For local execution, ensure the API is running 3. For GKE, use the in-cluster service name: + ```yaml hyperfleet_api: - endpoint: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 + base_url: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 ``` ### OpenAPI Client Not Generated @@ -595,6 +608,7 @@ podman build --platform linux/amd64 -t gcr.io/${GCP_PROJECT}/sentinel:${IMAGE_TA **Cause**: OpenAPI client was not generated **Solution**: Run the generate target before building: + ```bash make generate make build diff --git a/internal/client/client.go b/internal/client/client.go index 05e4272..44311ce 100644 --- a/internal/client/client.go +++ b/internal/client/client.go @@ -46,13 +46,22 @@ type HyperFleetClient struct { log logger.HyperFleetLogger } -// NewHyperFleetClient creates a new HyperFleet API client using OpenAPI-generated client -func NewHyperFleetClient(endpoint string, timeout time.Duration) (*HyperFleetClient, error) { +// NewHyperFleetClient creates a new HyperFleet API client using OpenAPI-generated client. +// sentinelName and version are used to build the User-Agent header sent with every request. +func NewHyperFleetClient(endpoint string, timeout time.Duration, sentinelName, version string) (*HyperFleetClient, error) { httpClient := &http.Client{ Timeout: timeout, } - client, err := openapi.NewClientWithResponses(endpoint, openapi.WithHTTPClient(httpClient)) + userAgent := fmt.Sprintf("hyperfleet-sentinel/%s (%s)", version, sentinelName) + + client, err := openapi.NewClientWithResponses(endpoint, + openapi.WithHTTPClient(httpClient), + openapi.WithRequestEditorFn(func(_ context.Context, req *http.Request) error { + req.Header.Set("User-Agent", userAgent) + return nil + }), + ) if err != nil { return nil, fmt.Errorf("failed to create OpenAPI client: %v", err) // This should only fail if the endpoint URL is invalid } diff --git a/internal/client/client_test.go b/internal/client/client_test.go index eb06dea..ac61fee 100644 --- a/internal/client/client_test.go +++ b/internal/client/client_test.go @@ -82,7 +82,7 @@ func TestFetchResources_Success(t *testing.T) { defer server.Close() // Create client - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") // Fetch resources ctx := context.Background() @@ -117,7 +117,7 @@ func TestFetchResources_EmptyList(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") resources, err := client.FetchResources(context.Background(), ResourceTypeClusters, nil) if err != nil { @@ -138,7 +138,7 @@ func TestFetchResources_404NotFound(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") _, err := client.FetchResources(context.Background(), ResourceTypeClusters, nil) @@ -168,7 +168,7 @@ func TestFetchResources_500ServerError(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() @@ -213,7 +213,7 @@ func TestFetchResources_503ServiceUnavailable_ThenSuccess(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") resources, err := client.FetchResources(context.Background(), ResourceTypeClusters, nil) if err != nil { @@ -251,7 +251,7 @@ func TestFetchResources_429RateLimited(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") _, err := client.FetchResources(context.Background(), ResourceTypeClusters, nil) if err != nil { @@ -272,7 +272,7 @@ func TestFetchResources_Timeout(t *testing.T) { defer server.Close() // Create client with very short timeout - client, _ := NewHyperFleetClient(server.URL, 100*time.Millisecond) + client, _ := NewHyperFleetClient(server.URL, 100*time.Millisecond, "test-sentinel", "test") ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) defer cancel() @@ -295,7 +295,7 @@ func TestFetchResources_ContextCancellation(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") ctx, cancel := context.WithCancel(context.Background()) @@ -324,7 +324,7 @@ func TestFetchResources_MalformedJSON(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") _, err := client.FetchResources(context.Background(), ResourceTypeClusters, nil) @@ -337,7 +337,7 @@ func TestFetchResources_MalformedJSON(t *testing.T) { // TestFetchResources_NilContext tests handling of nil context func TestFetchResources_NilContext(t *testing.T) { - client, _ := NewHyperFleetClient("http://localhost", 10*time.Second) + client, _ := NewHyperFleetClient("http://localhost", 10*time.Second, "test-sentinel", "test") // Intentionally pass nil context to test validation // nolint:staticcheck // Testing nil context validation @@ -354,7 +354,7 @@ func TestFetchResources_NilContext(t *testing.T) { // TestFetchResources_InvalidResourceType tests handling of invalid resource type func TestFetchResources_InvalidResourceType(t *testing.T) { - client, _ := NewHyperFleetClient("http://localhost", 10*time.Second) + client, _ := NewHyperFleetClient("http://localhost", 10*time.Second, "test-sentinel", "test") testCases := []struct { name string @@ -406,7 +406,7 @@ func TestFetchResources_NilStatus(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") // Note: A warning will be logged for cluster-1, but we can't easily // verify log output in tests. In production, logs are captured for monitoring. @@ -596,7 +596,7 @@ func TestFetchResources_NodePools(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") resources, err := client.FetchResources(context.Background(), ResourceTypeNodePools, nil) if err != nil { t.Fatalf("Expected no error, got %v", err) @@ -627,6 +627,36 @@ func TestFetchResources_NodePools(t *testing.T) { } } +// TestNewHyperFleetClient_UserAgent verifies that every request carries the expected +// User-Agent header built from the sentinel name and version. +func TestNewHyperFleetClient_UserAgent(t *testing.T) { + var receivedUA string + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + receivedUA = r.Header.Get("User-Agent") + response := createMockClusterList([]map[string]interface{}{}) + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + t.Errorf("Failed to encode response: %v", err) + } + })) + defer server.Close() + + c, err := NewHyperFleetClient(server.URL, 10*time.Second, "my-sentinel", "v1.2.3") + if err != nil { + t.Fatalf("NewHyperFleetClient: %v", err) + } + + if _, err := c.FetchResources(context.Background(), ResourceTypeClusters, nil); err != nil { + t.Fatalf("FetchResources: %v", err) + } + + expected := "hyperfleet-sentinel/v1.2.3 (my-sentinel)" + if receivedUA != expected { + t.Errorf("User-Agent = %q, want %q", receivedUA, expected) + } +} + // TestFetchResources_WithLabelSelector tests search parameter functionality func TestFetchResources_WithLabelSelector(t *testing.T) { var receivedSearchParam string @@ -644,7 +674,7 @@ func TestFetchResources_WithLabelSelector(t *testing.T) { })) defer server.Close() - client, _ := NewHyperFleetClient(server.URL, 10*time.Second) + client, _ := NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") labelSelector := map[string]string{ "region": "us-east", diff --git a/internal/config/config.go b/internal/config/config.go index 3aae158..403fa41 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -8,13 +8,17 @@ import ( "time" "github.com/openshift-hyperfleet/hyperfleet-sentinel/pkg/logger" + "github.com/spf13/pflag" "github.com/spf13/viper" ) +// EnvPrefix is the prefix for all environment variables that override sentinel config +const EnvPrefix = "HYPERFLEET" + // LabelSelector represents a label key-value pair for resource filtering type LabelSelector struct { - Label string `mapstructure:"label"` - Value string `mapstructure:"value"` + Label string `yaml:"label" mapstructure:"label"` + Value string `yaml:"value" mapstructure:"value"` } // LabelSelectorList is a list of label selectors @@ -22,20 +26,52 @@ type LabelSelectorList []LabelSelector // SentinelConfig represents the Sentinel configuration type SentinelConfig struct { - ResourceType string `mapstructure:"resource_type"` - PollInterval time.Duration `mapstructure:"poll_interval"` - MaxAgeNotReady time.Duration `mapstructure:"max_age_not_ready"` - MaxAgeReady time.Duration `mapstructure:"max_age_ready"` - ResourceSelector LabelSelectorList `mapstructure:"resource_selector"` - HyperFleetAPI *HyperFleetAPIConfig `mapstructure:"hyperfleet_api"` - MessageData map[string]interface{} `mapstructure:"message_data"` - Topic string `mapstructure:"topic"` + Sentinel SentinelInfo `yaml:"sentinel" mapstructure:"sentinel"` + DebugConfig bool `yaml:"debug_config,omitempty" mapstructure:"debug_config"` + Log LogConfig `yaml:"log,omitempty" mapstructure:"log"` + Clients ClientsConfig `yaml:"clients" mapstructure:"clients"` + ResourceType string `yaml:"resource_type" mapstructure:"resource_type"` + PollInterval time.Duration `yaml:"poll_interval" mapstructure:"poll_interval"` + MaxAgeNotReady time.Duration `yaml:"max_age_not_ready" mapstructure:"max_age_not_ready"` + MaxAgeReady time.Duration `yaml:"max_age_ready" mapstructure:"max_age_ready"` + ResourceSelector LabelSelectorList `yaml:"resource_selector,omitempty" mapstructure:"resource_selector"` + MessageData map[string]interface{} `yaml:"message_data,omitempty" mapstructure:"message_data"` +} + +// SentinelInfo contains basic sentinel information +type SentinelInfo struct { + Name string `yaml:"name" mapstructure:"name"` +} + +// LogConfig contains logging configuration. +// Priority (lowest to highest): config file < HYPERFLEET_LOG_* env vars < --log-* CLI flags +type LogConfig struct { + Level string `yaml:"level,omitempty" mapstructure:"level"` + Format string `yaml:"format,omitempty" mapstructure:"format"` + Output string `yaml:"output,omitempty" mapstructure:"output"` +} + +// ClientsConfig contains all client configurations +type ClientsConfig struct { + HyperfleetAPI *HyperFleetAPIConfig `yaml:"hyperfleet_api" mapstructure:"hyperfleet_api"` + Broker *BrokerConfig `yaml:"broker,omitempty" mapstructure:"broker"` } // HyperFleetAPIConfig defines the HyperFleet API client configuration type HyperFleetAPIConfig struct { - Endpoint string `mapstructure:"endpoint"` - Timeout time.Duration `mapstructure:"timeout"` + BaseURL string `yaml:"base_url" mapstructure:"base_url"` + Version string `yaml:"version,omitempty" mapstructure:"version"` + Timeout time.Duration `yaml:"timeout" mapstructure:"timeout"` + RetryAttempts int `yaml:"retry_attempts,omitempty" mapstructure:"retry_attempts"` + RetryBackoff string `yaml:"retry_backoff,omitempty" mapstructure:"retry_backoff"` + BaseDelay time.Duration `yaml:"base_delay,omitempty" mapstructure:"base_delay"` + MaxDelay time.Duration `yaml:"max_delay,omitempty" mapstructure:"max_delay"` + DefaultHeaders map[string]string `yaml:"default_headers,omitempty" mapstructure:"default_headers"` +} + +// BrokerConfig contains broker configuration +type BrokerConfig struct { + Topic string `yaml:"topic,omitempty" mapstructure:"topic"` } // ToMap converts label selectors to a map for filtering @@ -56,40 +92,131 @@ func (ls LabelSelectorList) ToMap() map[string]string { // NewSentinelConfig creates a new configuration with defaults func NewSentinelConfig() *SentinelConfig { return &SentinelConfig{ + Sentinel: SentinelInfo{ + Name: "hyperfleet-sentinel", + }, + DebugConfig: false, + Log: LogConfig{ + Level: "info", + Format: "text", + Output: "stdout", + }, + Clients: ClientsConfig{ + HyperfleetAPI: &HyperFleetAPIConfig{ + Version: "v1", + Timeout: 10 * time.Second, + RetryAttempts: 3, + RetryBackoff: "exponential", + BaseDelay: 1 * time.Second, + MaxDelay: 30 * time.Second, + }, + Broker: &BrokerConfig{}, + }, // ResourceType is required and must be set in config file PollInterval: 5 * time.Second, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, ResourceSelector: []LabelSelector{}, // Empty means watch all resources - HyperFleetAPI: &HyperFleetAPIConfig{ - // Endpoint is required and must be set in config file - Timeout: 5 * time.Second, - }, } } -// LoadConfig loads configuration from YAML file and environment variables -// Precedence: Environment variables > YAML file > Defaults -func LoadConfig(configFile string) (*SentinelConfig, error) { +// viperKeyMappings defines mappings from config paths to env variable suffixes +// The full env var name is EnvPrefix + "_" + suffix +// Note: Uses "::" as key delimiter to avoid conflicts with dots in YAML keys +// Complex types (maps, slices) are intentionally excluded — they cannot be expressed as scalar env vars. +var viperKeyMappings = map[string]string{ + "debug_config": "DEBUG_CONFIG", + "sentinel::name": "SENTINEL_NAME", + "log::level": "LOG_LEVEL", + "log::format": "LOG_FORMAT", + "log::output": "LOG_OUTPUT", + "clients::hyperfleet_api::base_url": "API_BASE_URL", + "clients::hyperfleet_api::version": "API_VERSION", + "clients::hyperfleet_api::timeout": "API_TIMEOUT", + "clients::hyperfleet_api::retry_attempts": "API_RETRY_ATTEMPTS", + "clients::hyperfleet_api::retry_backoff": "API_RETRY_BACKOFF", + "clients::hyperfleet_api::base_delay": "API_BASE_DELAY", + "clients::hyperfleet_api::max_delay": "API_MAX_DELAY", + "clients::broker::topic": "BROKER_TOPIC", + "resource_type": "RESOURCE_TYPE", + "poll_interval": "POLL_INTERVAL", + "max_age_not_ready": "MAX_AGE_NOT_READY", + "max_age_ready": "MAX_AGE_READY", +} + +// cliFlags defines mappings from CLI flag names to config paths +// Note: Uses "::" as key delimiter to avoid conflicts with dots in YAML keys +var cliFlags = map[string]string{ + "debug-config": "debug_config", + "sentinel-name": "sentinel::name", + "hyperfleet-api-base-url": "clients::hyperfleet_api::base_url", + "hyperfleet-api-version": "clients::hyperfleet_api::version", + "hyperfleet-api-timeout": "clients::hyperfleet_api::timeout", + "hyperfleet-api-retry-attempts": "clients::hyperfleet_api::retry_attempts", + "hyperfleet-api-retry-backoff": "clients::hyperfleet_api::retry_backoff", + "hyperfleet-api-base-delay": "clients::hyperfleet_api::base_delay", + "hyperfleet-api-max-delay": "clients::hyperfleet_api::max_delay", + "broker-topic": "clients::broker::topic", + "resource-type": "resource_type", + "poll-interval": "poll_interval", + "max-age-not-ready": "max_age_not_ready", + "max-age-ready": "max_age_ready", + "log-level": "log::level", + "log-format": "log::format", + "log-output": "log::output", +} + +// LoadConfig loads configuration from YAML file with environment variable and CLI flag overrides +// Precedence: CLI flags > Environment variables > YAML file > Defaults +func LoadConfig(configFile string, flags *pflag.FlagSet) (*SentinelConfig, error) { cfg := NewSentinelConfig() - // Load from YAML file if configFile == "" { - return nil, fmt.Errorf("config file is required") + if env := os.Getenv("HYPERFLEET_CONFIG"); env != "" { + configFile = env + } else { + configFile = "/etc/sentinel/config.yaml" + } } log := logger.NewHyperFleetLogger() ctx := context.Background() log.Infof(ctx, "Loading configuration from %s", configFile) - v := viper.New() + // Use "::" as key delimiter to avoid conflicts with dots in YAML keys + v := viper.NewWithOptions(viper.KeyDelimiter("::")) v.SetConfigFile(configFile) + // Read the YAML file if err := v.ReadInConfig(); err != nil { return nil, fmt.Errorf("failed to read config file: %w", err) } - if err := v.Unmarshal(cfg); err != nil { + // Bind environment variables + v.SetEnvPrefix(EnvPrefix) + v.AutomaticEnv() + // Replace "::" (our key delimiter) and "-" with "_" for env var lookups + v.SetEnvKeyReplacer(strings.NewReplacer("::", "_", "-", "_")) + + // Bind specific environment variables with HYPERFLEET_ prefix + for configPath, envSuffix := range viperKeyMappings { + envVar := EnvPrefix + "_" + envSuffix + if val := os.Getenv(envVar); val != "" { + v.Set(configPath, val) + } + } + + // Bind CLI flags if provided + if flags != nil { + for flagName, configPath := range cliFlags { + if flag := flags.Lookup(flagName); flag != nil && flag.Changed { + v.Set(configPath, flag.Value.String()) + } + } + } + + // Unmarshal into SentinelConfig struct — ErrorUnused ensures unknown fields are rejected + if err := v.UnmarshalExact(cfg); err != nil { return nil, fmt.Errorf("failed to unmarshal config: %w", err) } @@ -102,24 +229,23 @@ func LoadConfig(configFile string) (*SentinelConfig, error) { } } - // Override topic from environment variable if explicitly provided - // Environment variable takes precedence over config file (including empty value to clear) - if topic, ok := os.LookupEnv("BROKER_TOPIC"); ok { - cfg.Topic = topic - } - // Validate configuration if err := cfg.Validate(); err != nil { return nil, fmt.Errorf("invalid config: %w", err) } - log.Infof(ctx, "Configuration loaded successfully: resource_type=%s", cfg.ResourceType) + log.Infof(ctx, "Configuration loaded successfully: name=%s resource_type=%s", + cfg.Sentinel.Name, cfg.ResourceType) return cfg, nil } // Validate validates the configuration func (c *SentinelConfig) Validate() error { + if c.Sentinel.Name == "" { + return fmt.Errorf("sentinel.name is required") + } + if c.ResourceType == "" { return fmt.Errorf("resource_type is required") } @@ -130,8 +256,20 @@ func (c *SentinelConfig) Validate() error { c.ResourceType, strings.Join(validResourceTypes, ", ")) } - if c.HyperFleetAPI.Endpoint == "" { - return fmt.Errorf("hyperfleet_api.endpoint is required") + if c.Clients.HyperfleetAPI == nil { + return fmt.Errorf("clients.hyperfleet_api is required") + } + + if c.Clients.HyperfleetAPI.BaseURL == "" { + return fmt.Errorf("clients.hyperfleet_api.base_url is required") + } + + if c.Clients.HyperfleetAPI.RetryBackoff != "" { + validBackoffs := []string{"exponential", "linear", "constant"} + if !contains(validBackoffs, c.Clients.HyperfleetAPI.RetryBackoff) { + return fmt.Errorf("invalid clients.hyperfleet_api.retry_backoff: %s (must be one of: %s)", + c.Clients.HyperfleetAPI.RetryBackoff, strings.Join(validBackoffs, ", ")) + } } if c.PollInterval <= 0 { @@ -189,3 +327,43 @@ func contains(slice []string, value string) bool { } return false } + +// RedactedCopy returns a deep copy of the config. Use this copy when logging +// the merged configuration at startup so that any future sensitive fields are +// never accidentally shared by reference. +func (c *SentinelConfig) RedactedCopy() *SentinelConfig { + cp := *c + + if cp.Clients.HyperfleetAPI != nil { + api := *cp.Clients.HyperfleetAPI + if api.DefaultHeaders != nil { + headers := make(map[string]string, len(api.DefaultHeaders)) + for k, v := range api.DefaultHeaders { + headers[k] = v + } + api.DefaultHeaders = headers + } + cp.Clients.HyperfleetAPI = &api + } + + if cp.Clients.Broker != nil { + b := *cp.Clients.Broker + cp.Clients.Broker = &b + } + + if c.ResourceSelector != nil { + rs := make(LabelSelectorList, len(c.ResourceSelector)) + copy(rs, c.ResourceSelector) + cp.ResourceSelector = rs + } + + if c.MessageData != nil { + md := make(map[string]interface{}, len(c.MessageData)) + for k, v := range c.MessageData { + md[k] = v + } + cp.MessageData = md + } + + return &cp +} diff --git a/internal/config/config_loading_test.go b/internal/config/config_loading_test.go new file mode 100644 index 0000000..14666af --- /dev/null +++ b/internal/config/config_loading_test.go @@ -0,0 +1,451 @@ +package config + +import ( + "testing" + "time" + + "github.com/spf13/pflag" +) + +// baseConfig is a minimal valid YAML config used as the base for override tests. +// It sets explicit values for all 18 viperKeyMappings entries so that each +// subtest can verify that the override (env var or CLI flag) wins over the file. +const baseConfig = ` +sentinel: + name: test-sentinel +debug_config: false +log: + level: info + format: text + output: stdout +resource_type: clusters +poll_interval: 5s +max_age_not_ready: 10s +max_age_ready: 30m +message_data: + id: "resource.id" +clients: + hyperfleet_api: + base_url: https://api.example.com + version: v1 + timeout: 10s + retry_attempts: 3 + retry_backoff: exponential + base_delay: 1s + max_delay: 30s + broker: + topic: base-topic +` + +// makeFlags creates a pflag.FlagSet pre-populated with all config override flags +// (mirroring addConfigOverrideFlags in cmd/sentinel/main.go) and marks the +// given name→value pairs as Changed by calling Set on each. +func makeFlags(t *testing.T, pairs map[string]string) *pflag.FlagSet { + t.Helper() + fs := pflag.NewFlagSet("test", pflag.ContinueOnError) + + // General + fs.Bool("debug-config", false, "") + // Sentinel + fs.String("sentinel-name", "", "") + // Log + fs.String("log-level", "", "") + fs.String("log-format", "", "") + fs.String("log-output", "", "") + // HyperFleet API + fs.String("hyperfleet-api-base-url", "", "") + fs.String("hyperfleet-api-version", "", "") + fs.String("hyperfleet-api-timeout", "", "") + fs.Int("hyperfleet-api-retry-attempts", 0, "") + fs.String("hyperfleet-api-retry-backoff", "", "") + fs.String("hyperfleet-api-base-delay", "", "") + fs.String("hyperfleet-api-max-delay", "", "") + // Broker + fs.String("broker-topic", "", "") + // Sentinel-specific + fs.String("resource-type", "", "") + fs.String("poll-interval", "", "") + fs.String("max-age-not-ready", "", "") + fs.String("max-age-ready", "", "") + + for name, value := range pairs { + if err := fs.Set(name, value); err != nil { + t.Fatalf("failed to set flag %q=%q: %v", name, value, err) + } + } + return fs +} + +// ============================================================================ +// TestLoadConfig_EnvVarOverrides +// ============================================================================ + +func TestLoadConfig_EnvVarOverrides(t *testing.T) { + tests := []struct { + name string + envVar string + envValue string + check func(t *testing.T, cfg *SentinelConfig) + }{ + { + name: "log::level", + envVar: "HYPERFLEET_LOG_LEVEL", + envValue: "debug", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Log.Level != "debug" { + t.Errorf("expected Log.Level=%q, got %q", "debug", cfg.Log.Level) + } + }, + }, + { + name: "log::format", + envVar: "HYPERFLEET_LOG_FORMAT", + envValue: "json", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Log.Format != "json" { + t.Errorf("expected Log.Format=%q, got %q", "json", cfg.Log.Format) + } + }, + }, + { + name: "log::output", + envVar: "HYPERFLEET_LOG_OUTPUT", + envValue: "stderr", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Log.Output != "stderr" { + t.Errorf("expected Log.Output=%q, got %q", "stderr", cfg.Log.Output) + } + }, + }, + { + name: "sentinel::name", + envVar: "HYPERFLEET_SENTINEL_NAME", + envValue: "env-sentinel", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Sentinel.Name != "env-sentinel" { + t.Errorf("expected Sentinel.Name=%q, got %q", "env-sentinel", cfg.Sentinel.Name) + } + }, + }, + { + name: "debug_config", + envVar: "HYPERFLEET_DEBUG_CONFIG", + envValue: "true", + check: func(t *testing.T, cfg *SentinelConfig) { + if !cfg.DebugConfig { + t.Errorf("expected DebugConfig=true, got false") + } + }, + }, + { + name: "clients::hyperfleet_api::base_url", + envVar: "HYPERFLEET_API_BASE_URL", + envValue: "https://env.example.com", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Clients.HyperfleetAPI.BaseURL != "https://env.example.com" { + t.Errorf("expected BaseURL=%q, got %q", "https://env.example.com", cfg.Clients.HyperfleetAPI.BaseURL) + } + }, + }, + { + name: "clients::hyperfleet_api::version", + envVar: "HYPERFLEET_API_VERSION", + envValue: "v2", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Clients.HyperfleetAPI.Version != "v2" { + t.Errorf("expected Version=%q, got %q", "v2", cfg.Clients.HyperfleetAPI.Version) + } + }, + }, + { + name: "clients::hyperfleet_api::timeout", + envVar: "HYPERFLEET_API_TIMEOUT", + envValue: "20s", + check: func(t *testing.T, cfg *SentinelConfig) { + want := 20 * time.Second + if cfg.Clients.HyperfleetAPI.Timeout != want { + t.Errorf("expected Timeout=%v, got %v", want, cfg.Clients.HyperfleetAPI.Timeout) + } + }, + }, + { + name: "clients::hyperfleet_api::retry_attempts", + envVar: "HYPERFLEET_API_RETRY_ATTEMPTS", + envValue: "5", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Clients.HyperfleetAPI.RetryAttempts != 5 { + t.Errorf("expected RetryAttempts=5, got %d", cfg.Clients.HyperfleetAPI.RetryAttempts) + } + }, + }, + { + name: "clients::hyperfleet_api::retry_backoff", + envVar: "HYPERFLEET_API_RETRY_BACKOFF", + envValue: "linear", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Clients.HyperfleetAPI.RetryBackoff != "linear" { + t.Errorf("expected RetryBackoff=%q, got %q", "linear", cfg.Clients.HyperfleetAPI.RetryBackoff) + } + }, + }, + { + name: "clients::hyperfleet_api::base_delay", + envVar: "HYPERFLEET_API_BASE_DELAY", + envValue: "2s", + check: func(t *testing.T, cfg *SentinelConfig) { + want := 2 * time.Second + if cfg.Clients.HyperfleetAPI.BaseDelay != want { + t.Errorf("expected BaseDelay=%v, got %v", want, cfg.Clients.HyperfleetAPI.BaseDelay) + } + }, + }, + { + name: "clients::hyperfleet_api::max_delay", + envVar: "HYPERFLEET_API_MAX_DELAY", + envValue: "60s", + check: func(t *testing.T, cfg *SentinelConfig) { + want := 60 * time.Second + if cfg.Clients.HyperfleetAPI.MaxDelay != want { + t.Errorf("expected MaxDelay=%v, got %v", want, cfg.Clients.HyperfleetAPI.MaxDelay) + } + }, + }, + { + name: "clients::broker::topic", + envVar: "HYPERFLEET_BROKER_TOPIC", + envValue: "env-topic", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Clients.Broker.Topic != "env-topic" { + t.Errorf("expected Topic=%q, got %q", "env-topic", cfg.Clients.Broker.Topic) + } + }, + }, + { + name: "resource_type", + envVar: "HYPERFLEET_RESOURCE_TYPE", + envValue: "nodepools", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.ResourceType != "nodepools" { + t.Errorf("expected ResourceType=%q, got %q", "nodepools", cfg.ResourceType) + } + }, + }, + { + name: "poll_interval", + envVar: "HYPERFLEET_POLL_INTERVAL", + envValue: "15s", + check: func(t *testing.T, cfg *SentinelConfig) { + want := 15 * time.Second + if cfg.PollInterval != want { + t.Errorf("expected PollInterval=%v, got %v", want, cfg.PollInterval) + } + }, + }, + { + name: "max_age_not_ready", + envVar: "HYPERFLEET_MAX_AGE_NOT_READY", + envValue: "20s", + check: func(t *testing.T, cfg *SentinelConfig) { + want := 20 * time.Second + if cfg.MaxAgeNotReady != want { + t.Errorf("expected MaxAgeNotReady=%v, got %v", want, cfg.MaxAgeNotReady) + } + }, + }, + { + name: "max_age_ready", + envVar: "HYPERFLEET_MAX_AGE_READY", + envValue: "1h", + check: func(t *testing.T, cfg *SentinelConfig) { + want := time.Hour + if cfg.MaxAgeReady != want { + t.Errorf("expected MaxAgeReady=%v, got %v", want, cfg.MaxAgeReady) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + configPath := createTempConfigFile(t, baseConfig) + t.Setenv(tt.envVar, tt.envValue) + + cfg, err := LoadConfig(configPath, nil) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + tt.check(t, cfg) + }) + } +} + +// ============================================================================ +// TestLoadConfig_CLIFlagOverrides +// ============================================================================ + +func TestLoadConfig_CLIFlagOverrides(t *testing.T) { + tests := []struct { + name string + envVar string + envValue string + flagName string + flagValue string + check func(t *testing.T, cfg *SentinelConfig) + }{ + { + name: "sentinel-name beats env and file", + envVar: "HYPERFLEET_SENTINEL_NAME", + envValue: "env-sentinel", + flagName: "sentinel-name", + flagValue: "flag-sentinel", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Sentinel.Name != "flag-sentinel" { + t.Errorf("expected Sentinel.Name=%q (flag wins), got %q", "flag-sentinel", cfg.Sentinel.Name) + } + }, + }, + { + name: "hyperfleet-api-base-url beats file", + flagName: "hyperfleet-api-base-url", + flagValue: "https://flag.example.com", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Clients.HyperfleetAPI.BaseURL != "https://flag.example.com" { + t.Errorf("expected BaseURL=%q (flag wins), got %q", "https://flag.example.com", cfg.Clients.HyperfleetAPI.BaseURL) + } + }, + }, + { + name: "poll-interval beats file", + flagName: "poll-interval", + flagValue: "45s", + check: func(t *testing.T, cfg *SentinelConfig) { + want := 45 * time.Second + if cfg.PollInterval != want { + t.Errorf("expected PollInterval=%v (flag wins), got %v", want, cfg.PollInterval) + } + }, + }, + { + name: "log-level beats file", + flagName: "log-level", + flagValue: "warn", + check: func(t *testing.T, cfg *SentinelConfig) { + if cfg.Log.Level != "warn" { + t.Errorf("expected Log.Level=%q (flag wins), got %q", "warn", cfg.Log.Level) + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + configPath := createTempConfigFile(t, baseConfig) + if tt.envVar != "" { + t.Setenv(tt.envVar, tt.envValue) + } + flags := makeFlags(t, map[string]string{tt.flagName: tt.flagValue}) + + cfg, err := LoadConfig(configPath, flags) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + tt.check(t, cfg) + }) + } +} + +// ============================================================================ +// TestLoadConfig_LegacyBrokerEnvVars +// ============================================================================ + + +// ============================================================================ +// TestLoadConfig_FilePrecedence +// ============================================================================ + +func TestLoadConfig_FilePrecedence(t *testing.T) { + configPath := createTempConfigFile(t, baseConfig) + + cfg, err := LoadConfig(configPath, nil) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + if cfg.Sentinel.Name != "test-sentinel" { + t.Errorf("expected Sentinel.Name=%q, got %q", "test-sentinel", cfg.Sentinel.Name) + } + if cfg.ResourceType != "clusters" { + t.Errorf("expected ResourceType=%q, got %q", "clusters", cfg.ResourceType) + } + if cfg.PollInterval != 5*time.Second { + t.Errorf("expected PollInterval=5s, got %v", cfg.PollInterval) + } + if cfg.MaxAgeNotReady != 10*time.Second { + t.Errorf("expected MaxAgeNotReady=10s, got %v", cfg.MaxAgeNotReady) + } + if cfg.MaxAgeReady != 30*time.Minute { + t.Errorf("expected MaxAgeReady=30m, got %v", cfg.MaxAgeReady) + } + if cfg.Clients.HyperfleetAPI.BaseURL != "https://api.example.com" { + t.Errorf("expected BaseURL=%q, got %q", "https://api.example.com", cfg.Clients.HyperfleetAPI.BaseURL) + } + if cfg.Log.Level != "info" { + t.Errorf("expected Log.Level=%q, got %q", "info", cfg.Log.Level) + } + if cfg.Clients.Broker.Topic != "base-topic" { + t.Errorf("expected Topic=%q, got %q", "base-topic", cfg.Clients.Broker.Topic) + } +} + +// ============================================================================ +// TestLoadConfig_PriorityChain +// ============================================================================ + +func TestLoadConfig_PriorityChain(t *testing.T) { + t.Run("flag beats env beats file", func(t *testing.T) { + configPath := createTempConfigFile(t, baseConfig) + t.Setenv("HYPERFLEET_POLL_INTERVAL", "10s") + flags := makeFlags(t, map[string]string{"poll-interval": "15s"}) + + cfg, err := LoadConfig(configPath, flags) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + want := 15 * time.Second + if cfg.PollInterval != want { + t.Errorf("expected PollInterval=%v (flag wins), got %v", want, cfg.PollInterval) + } + }) + + t.Run("env beats file when no flag", func(t *testing.T) { + configPath := createTempConfigFile(t, baseConfig) + t.Setenv("HYPERFLEET_POLL_INTERVAL", "10s") + + cfg, err := LoadConfig(configPath, nil) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + want := 10 * time.Second + if cfg.PollInterval != want { + t.Errorf("expected PollInterval=%v (env wins), got %v", want, cfg.PollInterval) + } + }) + + t.Run("file value used when no env or flag", func(t *testing.T) { + configPath := createTempConfigFile(t, baseConfig) + + cfg, err := LoadConfig(configPath, nil) + if err != nil { + t.Fatalf("LoadConfig failed: %v", err) + } + + want := 5 * time.Second + if cfg.PollInterval != want { + t.Errorf("expected PollInterval=%v (file value), got %v", want, cfg.PollInterval) + } + }) +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 83b1549..1d0353b 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -27,7 +27,7 @@ func createTempConfigFile(t *testing.T, content string) string { func TestLoadConfig_ValidComplete(t *testing.T) { configPath := filepath.Join("testdata", "valid-complete.yaml") - cfg, err := LoadConfig(configPath) + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } @@ -54,11 +54,11 @@ func TestLoadConfig_ValidComplete(t *testing.T) { } // Verify HyperFleet API config - if cfg.HyperFleetAPI.Endpoint != "https://api.hyperfleet.example.com" { - t.Errorf("Expected endpoint 'https://api.hyperfleet.example.com', got '%s'", cfg.HyperFleetAPI.Endpoint) + if cfg.Clients.HyperfleetAPI.BaseURL != "https://api.hyperfleet.example.com" { + t.Errorf("Expected base_url 'https://api.hyperfleet.example.com', got '%s'", cfg.Clients.HyperfleetAPI.BaseURL) } - if cfg.HyperFleetAPI.Timeout != 5*time.Second { - t.Errorf("Expected timeout 5s, got %v", cfg.HyperFleetAPI.Timeout) + if cfg.Clients.HyperfleetAPI.Timeout != 10*time.Second { + t.Errorf("Expected timeout 10s, got %v", cfg.Clients.HyperfleetAPI.Timeout) } // Verify message data @@ -77,7 +77,7 @@ func TestLoadConfig_ValidComplete(t *testing.T) { func TestLoadConfig_Minimal(t *testing.T) { configPath := filepath.Join("testdata", "minimal.yaml") - cfg, err := LoadConfig(configPath) + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } @@ -95,19 +95,48 @@ func TestLoadConfig_Minimal(t *testing.T) { } func TestLoadConfig_FileNotFound(t *testing.T) { - _, err := LoadConfig("/nonexistent/path/config.yaml") + _, err := LoadConfig("/nonexistent/path/config.yaml", nil) if err == nil { t.Fatal("Expected error for nonexistent file, got nil") } } -func TestLoadConfig_EmptyPath(t *testing.T) { - _, err := LoadConfig("") +func TestLoadConfig_EmptyPath_FallsBackToDefault(t *testing.T) { + t.Setenv("HYPERFLEET_CONFIG", "") + _, err := LoadConfig("", nil) if err == nil { - t.Fatal("Expected error for empty config path, got nil") + t.Fatal("Expected error for missing default config file, got nil") } - if err.Error() != "config file is required" { - t.Errorf("Expected 'config file is required' error, got: %v", err) + if !strings.Contains(err.Error(), "/etc/sentinel/config.yaml") { + t.Errorf("Expected error to mention default path /etc/sentinel/config.yaml, got: %v", err) + } +} + +func TestLoadConfig_HyperfleetConfigEnvVar(t *testing.T) { + yaml := ` +sentinel: + name: env-var-sentinel +resource_type: clusters +message_data: + id: "resource.id" + kind: "resource.kind" +poll_interval: 5s +max_age_not_ready: 10s +max_age_ready: 30m +clients: + hyperfleet_api: + base_url: https://example.com + timeout: 10s +` + configPath := createTempConfigFile(t, yaml) + t.Setenv("HYPERFLEET_CONFIG", configPath) + + cfg, err := LoadConfig("", nil) + if err != nil { + t.Fatalf("Expected config to load from HYPERFLEET_CONFIG env var, got error: %v", err) + } + if cfg.Sentinel.Name != "env-var-sentinel" { + t.Errorf("Expected sentinel name 'env-var-sentinel', got: %s", cfg.Sentinel.Name) } } @@ -119,7 +148,7 @@ invalid yaml here: [ ` configPath := createTempConfigFile(t, yaml) - _, err := LoadConfig(configPath) + _, err := LoadConfig(configPath, nil) if err == nil { t.Fatal("Expected error for invalid YAML, got nil") } @@ -145,12 +174,12 @@ func TestNewSentinelConfig_Defaults(t *testing.T) { if cfg.MaxAgeReady != 30*time.Minute { t.Errorf("Expected default max_age_ready 30m, got %v", cfg.MaxAgeReady) } - if cfg.HyperFleetAPI.Timeout != 5*time.Second { - t.Errorf("Expected default timeout 30s, got %v", cfg.HyperFleetAPI.Timeout) + if cfg.Clients.HyperfleetAPI.Timeout != 10*time.Second { + t.Errorf("Expected default timeout 10s, got %v", cfg.Clients.HyperfleetAPI.Timeout) } - // Endpoint has no default - must be set in config file - if cfg.HyperFleetAPI.Endpoint != "" { - t.Errorf("Expected no default endpoint (empty string), got '%s'", cfg.HyperFleetAPI.Endpoint) + // BaseURL has no default - must be set in config file + if cfg.Clients.HyperfleetAPI.BaseURL != "" { + t.Errorf("Expected no default base_url (empty string), got '%s'", cfg.Clients.HyperfleetAPI.BaseURL) } if len(cfg.ResourceSelector) != 0 { t.Errorf("Expected empty resource_selector, got %d items", len(cfg.ResourceSelector)) @@ -164,10 +193,25 @@ func TestNewSentinelConfig_Defaults(t *testing.T) { // Validation Tests - Required Fields // ============================================================================ +func TestValidate_MissingSentinelName(t *testing.T) { + cfg := NewSentinelConfig() + cfg.Sentinel.Name = "" + cfg.ResourceType = "clusters" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" + + err := cfg.Validate() + if err == nil { + t.Fatal("Expected error for missing sentinel.name, got nil") + } + if err.Error() != "sentinel.name is required" { + t.Errorf("Expected 'sentinel.name is required' error, got: %v", err) + } +} + func TestValidate_MissingResourceType(t *testing.T) { cfg := NewSentinelConfig() cfg.ResourceType = "" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" err := cfg.Validate() if err == nil { @@ -178,17 +222,17 @@ func TestValidate_MissingResourceType(t *testing.T) { } } -func TestValidate_MissingEndpoint(t *testing.T) { +func TestValidate_MissingBaseURL(t *testing.T) { cfg := NewSentinelConfig() - cfg.ResourceType = "clusters" // Set valid resource_type to test endpoint validation - cfg.HyperFleetAPI.Endpoint = "" + cfg.ResourceType = "clusters" // Set valid resource_type to test base_url validation + cfg.Clients.HyperfleetAPI.BaseURL = "" err := cfg.Validate() if err == nil { - t.Fatal("Expected error for missing endpoint, got nil") + t.Fatal("Expected error for missing base_url, got nil") } - if err.Error() != "hyperfleet_api.endpoint is required" { - t.Errorf("Expected 'hyperfleet_api.endpoint is required' error, got: %v", err) + if err.Error() != "clients.hyperfleet_api.base_url is required" { + t.Errorf("Expected 'clients.hyperfleet_api.base_url is required' error, got: %v", err) } } @@ -199,7 +243,7 @@ func TestValidate_MissingEndpoint(t *testing.T) { func TestValidate_InvalidResourceType(t *testing.T) { cfg := NewSentinelConfig() cfg.ResourceType = "invalid-type" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" err := cfg.Validate() if err == nil { @@ -226,7 +270,7 @@ func TestValidate_InvalidResourceTypes(t *testing.T) { t.Run(tt.name, func(t *testing.T) { cfg := NewSentinelConfig() cfg.ResourceType = tt.resourceType - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" cfg.MessageData = map[string]interface{}{"id": "resource.id"} err := cfg.Validate() @@ -240,6 +284,40 @@ func TestValidate_InvalidResourceTypes(t *testing.T) { } } +func TestValidate_InvalidRetryBackoffs(t *testing.T) { + tests := []struct { + name string + retryBackoff string + shouldFail bool + }{ + {"valid exponential", "exponential", false}, + {"valid linear", "linear", false}, + {"valid constant", "constant", false}, + {"empty (uses default)", "", false}, + {"invalid random", "random", true}, + {"invalid jitter", "jitter", true}, + {"invalid none", "none", true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + cfg := NewSentinelConfig() + cfg.ResourceType = "clusters" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" + cfg.Clients.HyperfleetAPI.RetryBackoff = tt.retryBackoff + cfg.MessageData = map[string]interface{}{"id": "resource.id"} + + err := cfg.Validate() + if tt.shouldFail && err == nil { + t.Errorf("Expected error for retry_backoff '%s', got nil", tt.retryBackoff) + } + if !tt.shouldFail && err != nil { + t.Errorf("Expected no error for retry_backoff '%s', got: %v", tt.retryBackoff, err) + } + }) + } +} + func TestValidate_NegativeDurations(t *testing.T) { tests := []struct { name string @@ -274,7 +352,9 @@ func TestValidate_NegativeDurations(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { cfg := NewSentinelConfig() - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.ResourceType = "clusters" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" + cfg.MessageData = map[string]interface{}{"id": "resource.id"} tt.modifier(cfg) err := cfg.Validate() @@ -339,7 +419,7 @@ func TestLabelSelectorList_ToMap_EmptyLabel(t *testing.T) { func TestValidate_ValidMessageDataFlat(t *testing.T) { cfg := NewSentinelConfig() cfg.ResourceType = "clusters" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" cfg.MessageData = map[string]interface{}{ "id": "resource.id", "kind": "resource.kind", @@ -354,7 +434,7 @@ func TestValidate_ValidMessageDataFlat(t *testing.T) { func TestValidate_ValidMessageDataNested(t *testing.T) { cfg := NewSentinelConfig() cfg.ResourceType = "clusters" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" cfg.MessageData = map[string]interface{}{ "origin": `"sentinel"`, "ref": map[string]interface{}{ @@ -371,7 +451,7 @@ func TestValidate_ValidMessageDataNested(t *testing.T) { func TestValidate_NilMessageData(t *testing.T) { cfg := NewSentinelConfig() cfg.ResourceType = "clusters" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" // MessageData is nil by default — message_data is required so this must fail if err := cfg.Validate(); err == nil { @@ -383,7 +463,7 @@ func TestValidate_NilLeafInMessageData(t *testing.T) { // Mirrors YAML: `id:` — viper may drop the key, but if it doesn't the nil leaf must be rejected. cfg := NewSentinelConfig() cfg.ResourceType = "clusters" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" cfg.MessageData = map[string]interface{}{ "id": nil, "kind": "resource.kind", @@ -398,7 +478,7 @@ func TestValidate_EmptyStringLeafInMessageData(t *testing.T) { // Mirrors YAML: `id: ""` — an explicitly-set empty CEL expression. cfg := NewSentinelConfig() cfg.ResourceType = "clusters" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" cfg.MessageData = map[string]interface{}{ "id": "", "kind": "resource.kind", @@ -413,7 +493,7 @@ func TestValidate_NilLeafInNestedMessageData(t *testing.T) { // Ensures the recursive check reaches nested objects. cfg := NewSentinelConfig() cfg.ResourceType = "clusters" - cfg.HyperFleetAPI.Endpoint = "http://api.example.com" + cfg.Clients.HyperfleetAPI.BaseURL = "http://api.example.com" cfg.MessageData = map[string]interface{}{ "ref": map[string]interface{}{ "id": nil, @@ -435,7 +515,7 @@ func TestLoadConfig_BlankMessageDataLeafReturnsError(t *testing.T) { // parser. mapstructure then silently drops nil-valued keys during Unmarshal, // so the key disappears from cfg.MessageData before Validate() runs. // LoadConfig must catch this via the raw viper value. - _, err := LoadConfig(filepath.Join("testdata", "message-data-blank-id.yaml")) + _, err := LoadConfig(filepath.Join("testdata", "message-data-blank-id.yaml"), nil) if err == nil { t.Fatal("expected error for blank message_data leaf, got nil") } @@ -447,7 +527,7 @@ func TestLoadConfig_BlankMessageDataLeafReturnsError(t *testing.T) { func TestLoadConfig_FullWorkflow(t *testing.T) { configPath := filepath.Join("testdata", "full-workflow.yaml") - cfg, err := LoadConfig(configPath) + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } @@ -469,123 +549,143 @@ func TestLoadConfig_FullWorkflow(t *testing.T) { } // ============================================================================ -// Topic Tests +// RedactedCopy Tests // ============================================================================ -func TestLoadConfig_TopicFromEnvVar(t *testing.T) { - // Set environment variable (t.Setenv auto-cleans after test) - t.Setenv("BROKER_TOPIC", "test-namespace-clusters") +func TestRedactedCopy_NilBrokerHandled(t *testing.T) { + cfg := NewSentinelConfig() + cfg.Clients.Broker = nil - configPath := filepath.Join("testdata", "minimal.yaml") + redacted := cfg.RedactedCopy() - cfg, err := LoadConfig(configPath) - if err != nil { - t.Fatalf("Expected no error, got: %v", err) + if redacted.Clients.Broker != nil { + t.Errorf("Expected nil Broker to stay nil after redaction") } +} - if cfg.Topic != "test-namespace-clusters" { - t.Errorf("Expected topic 'test-namespace-clusters', got '%s'", cfg.Topic) +func TestRedactedCopy_DoesNotMutateOriginal(t *testing.T) { + cfg := NewSentinelConfig() + cfg.Clients.Broker = &BrokerConfig{Topic: "my-topic"} + + _ = cfg.RedactedCopy() + + if cfg.Clients.Broker.Topic != "my-topic" { + t.Errorf("RedactedCopy must not mutate the original; got '%s'", cfg.Clients.Broker.Topic) } } -func TestLoadConfig_TopicEnvVarOverridesConfig(t *testing.T) { - // Set environment variable (t.Setenv auto-cleans after test) - t.Setenv("BROKER_TOPIC", "env-topic") +// ============================================================================ +// Unknown Field Tests +// ============================================================================ + +func TestLoadConfig_UnknownFieldReturnsError(t *testing.T) { + _, err := LoadConfig(filepath.Join("testdata", "unknown-field.yaml"), nil) + if err == nil { + t.Fatal("Expected error for unknown field 'resouce_type', got nil") + } +} - // Create config with topic set +func TestLoadConfig_UnknownFieldInline(t *testing.T) { yaml := ` +sentinel: + name: test-sentinel +clients: + hyperfleet_api: + base_url: http://localhost:8000 resource_type: clusters -hyperfleet_api: - endpoint: http://localhost:8000 -topic: config-topic message_data: id: resource.id +hyperfleet_api: + endpoint: http://old-format.example.com ` configPath := createTempConfigFile(t, yaml) - cfg, err := LoadConfig(configPath) + _, err := LoadConfig(configPath, nil) + if err == nil { + t.Fatal("Expected error for unknown field 'hyperfleet_api', got nil") + } +} + +// ============================================================================ +// Topic Tests +// ============================================================================ + +func TestLoadConfig_TopicFromEnvVar(t *testing.T) { + t.Setenv("HYPERFLEET_BROKER_TOPIC", "test-namespace-clusters") + + configPath := filepath.Join("testdata", "minimal.yaml") + + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } - // Environment variable should override config file - if cfg.Topic != "env-topic" { - t.Errorf("Expected topic 'env-topic' (from env), got '%s'", cfg.Topic) + if cfg.Clients.Broker.Topic != "test-namespace-clusters" { + t.Errorf("Expected topic 'test-namespace-clusters', got '%s'", cfg.Clients.Broker.Topic) } } -func TestLoadConfig_TopicFromConfigFile(t *testing.T) { - // Save and restore original value, then unset for test - origValue, wasSet := os.LookupEnv("BROKER_TOPIC") - if wasSet { - defer func() { _ = os.Setenv("BROKER_TOPIC", origValue) }() - } - _ = os.Unsetenv("BROKER_TOPIC") +func TestLoadConfig_TopicEnvVarOverridesConfig(t *testing.T) { + t.Setenv("HYPERFLEET_BROKER_TOPIC", "env-topic") yaml := ` +sentinel: + name: test-sentinel +clients: + hyperfleet_api: + base_url: http://localhost:8000 + broker: + topic: config-topic resource_type: clusters -hyperfleet_api: - endpoint: http://localhost:8000 -topic: my-namespace-clusters message_data: id: resource.id ` configPath := createTempConfigFile(t, yaml) - cfg, err := LoadConfig(configPath) + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } - if cfg.Topic != "my-namespace-clusters" { - t.Errorf("Expected topic 'my-namespace-clusters', got '%s'", cfg.Topic) + if cfg.Clients.Broker.Topic != "env-topic" { + t.Errorf("Expected topic 'env-topic' (from env), got '%s'", cfg.Clients.Broker.Topic) } } -func TestLoadConfig_TopicEmpty(t *testing.T) { - // Save and restore original value, then unset for test - origValue, wasSet := os.LookupEnv("BROKER_TOPIC") - if wasSet { - defer func() { _ = os.Setenv("BROKER_TOPIC", origValue) }() - } - _ = os.Unsetenv("BROKER_TOPIC") - - configPath := filepath.Join("testdata", "minimal.yaml") +func TestLoadConfig_TopicFromConfigFile(t *testing.T) { + yaml := ` +sentinel: + name: test-sentinel +clients: + hyperfleet_api: + base_url: http://localhost:8000 + broker: + topic: my-namespace-clusters +resource_type: clusters +message_data: + id: resource.id +` + configPath := createTempConfigFile(t, yaml) - cfg, err := LoadConfig(configPath) + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } - // Topic should be empty when not configured - if cfg.Topic != "" { - t.Errorf("Expected empty topic, got '%s'", cfg.Topic) + if cfg.Clients.Broker.Topic != "my-namespace-clusters" { + t.Errorf("Expected topic 'my-namespace-clusters', got '%s'", cfg.Clients.Broker.Topic) } } -func TestLoadConfig_TopicEnvVarEmptyClearsConfig(t *testing.T) { - // Set environment variable to empty string (explicitly clears config value) - // t.Setenv auto-cleans after test - t.Setenv("BROKER_TOPIC", "") - - // Create config with topic set - yaml := ` -resource_type: clusters -hyperfleet_api: - endpoint: http://localhost:8000 -topic: config-topic -message_data: - id: resource.id -` - configPath := createTempConfigFile(t, yaml) +func TestLoadConfig_TopicEmpty(t *testing.T) { + configPath := filepath.Join("testdata", "minimal.yaml") - cfg, err := LoadConfig(configPath) + cfg, err := LoadConfig(configPath, nil) if err != nil { t.Fatalf("Expected no error, got: %v", err) } - // Empty env var should clear the config value (using os.LookupEnv) - if cfg.Topic != "" { - t.Errorf("Expected empty topic (cleared by env var), got '%s'", cfg.Topic) + if cfg.Clients.Broker.Topic != "" { + t.Errorf("Expected empty topic, got '%s'", cfg.Clients.Broker.Topic) } } diff --git a/internal/config/testdata/full-workflow.yaml b/internal/config/testdata/full-workflow.yaml index ce8c07b..eb185d9 100644 --- a/internal/config/testdata/full-workflow.yaml +++ b/internal/config/testdata/full-workflow.yaml @@ -1,3 +1,25 @@ +sentinel: + name: hyperfleet-sentinel-nodepools + +debug_config: false + +log: + level: "info" + format: "text" + output: "stdout" + +clients: + hyperfleet_api: + base_url: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 + version: "v1" + timeout: 20s + retry_attempts: 3 + retry_backoff: "exponential" + base_delay: "1s" + max_delay: "30s" + broker: + topic: "" + resource_type: nodepools poll_interval: 3s max_age_not_ready: 5s @@ -9,10 +31,6 @@ resource_selector: - label: region value: eu-west -hyperfleet_api: - endpoint: http://hyperfleet-api.hyperfleet-system.svc.cluster.local:8080 - timeout: 20s - message_data: id: "resource.id" kind: "resource.kind" diff --git a/internal/config/testdata/message-data-blank-id.yaml b/internal/config/testdata/message-data-blank-id.yaml index 7b9c69a..fe647d0 100644 --- a/internal/config/testdata/message-data-blank-id.yaml +++ b/internal/config/testdata/message-data-blank-id.yaml @@ -1,12 +1,15 @@ # this is an invalid config for tests # it has a nil in the message_data.id property +sentinel: + name: hyperfleet-sentinel-test +clients: + hyperfleet_api: + base_url: http://localhost:8000 + timeout: 10s resource_type: clusters poll_interval: 2s max_age_not_ready: 5s max_age_ready: 2m -hyperfleet_api: - endpoint: http://localhost:8000 - timeout: 10s message_data: id: kind: "resource.kind" diff --git a/internal/config/testdata/minimal.yaml b/internal/config/testdata/minimal.yaml index 5fee87c..0ac461f 100644 --- a/internal/config/testdata/minimal.yaml +++ b/internal/config/testdata/minimal.yaml @@ -1,5 +1,11 @@ +sentinel: + name: hyperfleet-sentinel-minimal + +clients: + hyperfleet_api: + base_url: http://api.example.com + resource_type: clusters -hyperfleet_api: - endpoint: http://api.example.com + message_data: - id: resource.id + id: "resource.id" diff --git a/internal/config/testdata/unknown-field.yaml b/internal/config/testdata/unknown-field.yaml new file mode 100644 index 0000000..5d99114 --- /dev/null +++ b/internal/config/testdata/unknown-field.yaml @@ -0,0 +1,12 @@ +sentinel: + name: hyperfleet-sentinel-test + +clients: + hyperfleet_api: + base_url: http://api.example.com + +resource_type: clusters +resouce_type: clusters # typo — unknown field + +message_data: + id: "resource.id" diff --git a/internal/config/testdata/valid-complete.yaml b/internal/config/testdata/valid-complete.yaml index 62dee5f..caec3bf 100644 --- a/internal/config/testdata/valid-complete.yaml +++ b/internal/config/testdata/valid-complete.yaml @@ -1,3 +1,25 @@ +sentinel: + name: hyperfleet-sentinel-test + +debug_config: false + +log: + level: "info" + format: "json" + output: "stdout" + +clients: + hyperfleet_api: + base_url: https://api.hyperfleet.example.com + version: "v1" + timeout: 10s + retry_attempts: 3 + retry_backoff: "exponential" + base_delay: "1s" + max_delay: "30s" + broker: + topic: "test-topic" + resource_type: clusters poll_interval: 5s max_age_not_ready: 10s @@ -9,10 +31,6 @@ resource_selector: - label: environment value: production -hyperfleet_api: - endpoint: https://api.hyperfleet.example.com - timeout: 5s - message_data: id: "resource.id" kind: "resource.kind" diff --git a/internal/sentinel/sentinel.go b/internal/sentinel/sentinel.go index f0dcf60..b23fc37 100644 --- a/internal/sentinel/sentinel.go +++ b/internal/sentinel/sentinel.go @@ -96,7 +96,10 @@ func (s *Sentinel) trigger(ctx context.Context) error { // Get metric labels resourceType := s.config.ResourceType resourceSelector := metrics.GetResourceSelectorLabel(s.config.ResourceSelector) - topic := s.config.Topic + topic := "" + if s.config.Clients.Broker != nil { + topic = s.config.Clients.Broker.Topic + } // Add subset to context for structured logging ctx = logger.WithSubset(ctx, resourceType) diff --git a/internal/sentinel/sentinel_test.go b/internal/sentinel/sentinel_test.go index ba11af9..a016ce8 100644 --- a/internal/sentinel/sentinel_test.go +++ b/internal/sentinel/sentinel_test.go @@ -128,7 +128,7 @@ func TestTrigger_Success(t *testing.T) { defer server.Close() // Setup components - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -138,8 +138,11 @@ func TestTrigger_Success(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ @@ -202,7 +205,7 @@ func TestTrigger_NoEventsPublished(t *testing.T) { defer server.Close() // Setup components - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -212,8 +215,11 @@ func TestTrigger_NoEventsPublished(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ @@ -253,7 +259,7 @@ func TestTrigger_FetchError(t *testing.T) { defer server.Close() // Setup components - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 1*time.Second) // Short timeout + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 1*time.Second, "test-sentinel", "test") // Short timeout decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -263,8 +269,11 @@ func TestTrigger_FetchError(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ @@ -307,7 +316,7 @@ func TestTrigger_PublishError(t *testing.T) { defer server.Close() // Setup components - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{ publishError: errors.New("broker connection failed"), @@ -319,8 +328,11 @@ func TestTrigger_PublishError(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ @@ -364,7 +376,7 @@ func TestTrigger_MixedResources(t *testing.T) { defer server.Close() // Setup components - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -374,8 +386,11 @@ func TestTrigger_MixedResources(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ @@ -427,7 +442,7 @@ func TestTrigger_WithMessageDataConfig(t *testing.T) { })) defer server.Close() - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -436,8 +451,11 @@ func TestTrigger_WithMessageDataConfig(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ @@ -493,7 +511,7 @@ func TestTrigger_WithNestedMessageData(t *testing.T) { })) defer server.Close() - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) mockPublisher := &MockPublisher{} log := logger.NewHyperFleetLogger() @@ -502,8 +520,11 @@ func TestTrigger_WithNestedMessageData(t *testing.T) { metrics.NewSentinelMetrics(registry, "test") cfg := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: "test-topic", + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: "test-topic"}, + }, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, MessageData: map[string]interface{}{ diff --git a/test/integration/integration_test.go b/test/integration/integration_test.go index 47c2327..c77a539 100644 --- a/test/integration/integration_test.go +++ b/test/integration/integration_test.go @@ -145,7 +145,7 @@ func TestIntegration_EndToEnd(t *testing.T) { defer server.Close() // Setup components with real RabbitMQ broker - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) log := logger.NewHyperFleetLogger() @@ -261,7 +261,7 @@ func TestIntegration_LabelSelectorFiltering(t *testing.T) { defer server.Close() // Setup components with real RabbitMQ broker - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) log := logger.NewHyperFleetLogger() @@ -372,7 +372,7 @@ func TestIntegration_TSLSyntaxMultipleLabels(t *testing.T) { defer server.Close() // Setup components - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) log := logger.NewHyperFleetLogger() @@ -475,12 +475,15 @@ func TestIntegration_BrokerLoggerContext(t *testing.T) { })) defer server.Close() - hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second) + hyperfleetClient, _ := client.NewHyperFleetClient(server.URL, 10*time.Second, "test-sentinel", "test") decisionEngine := engine.NewDecisionEngine(10*time.Second, 30*time.Minute) sentinelConfig := &config.SentinelConfig{ - ResourceType: "clusters", - Topic: TEST_TOPIC, + ResourceType: "clusters", + Clients: config.ClientsConfig{ + HyperfleetAPI: &config.HyperFleetAPIConfig{}, + Broker: &config.BrokerConfig{Topic: TEST_TOPIC}, + }, PollInterval: 100 * time.Millisecond, MaxAgeNotReady: 10 * time.Second, MaxAgeReady: 30 * time.Minute, diff --git a/test/integration/test-config-loading.sh b/test/integration/test-config-loading.sh new file mode 100755 index 0000000..a825a52 --- /dev/null +++ b/test/integration/test-config-loading.sh @@ -0,0 +1,322 @@ +#!/usr/bin/env bash +# test-config-loading.sh - Verifies that every sentinel config parameter loads correctly from +# all available sources: config file, environment variable, and CLI flag. +# +# Usage: +# ./test/integration/test-config-loading.sh [--verbose] +# +# Output: one PASS/FAIL line per test, plus a summary at the end. +# Exit code: 0 if all tests pass, 1 if any fail. + +set -euo pipefail + +VERBOSE=0 +for arg in "$@"; do + [[ "$arg" == "--verbose" || "$arg" == "-v" ]] && VERBOSE=1 +done + +# ─── Colours ────────────────────────────────────────────────────────────────── +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; CYAN='\033[0;36m'; NC='\033[0m' + +PASS=0; FAIL=0; declare -a ERRORS=() + +pass() { echo -e " ${GREEN}PASS${NC} $1"; PASS=$((PASS+1)); } +fail() { + local name="$1" pattern="$2" output="$3" + echo -e " ${RED}FAIL${NC} $name" + echo " expected pattern: ${pattern}" + FAIL=$((FAIL+1)); ERRORS+=("$name") + if [[ $VERBOSE -eq 1 ]]; then + echo " output:" + echo "$output" | sed 's/^/ /' + fi +} + +section() { echo -e "\n${CYAN}══ $1 ══${NC}"; } + +# assert_contains +assert_contains() { + local name="$1" output="$2" pattern="$3" + if echo "$output" | grep -qF "$pattern"; then + pass "$name" + else + fail "$name" "$pattern" "$output" + fi +} + +# assert_not_contains +assert_not_contains() { + local name="$1" output="$2" pattern="$3" + if echo "$output" | grep -qF "$pattern"; then + fail "$name" "NOT: $pattern" "$output" + else + pass "$name" + fi +} + +# ─── Setup ──────────────────────────────────────────────────────────────────── +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" +TMPDIR_TEST="$(mktemp -d)" + +# If a pre-built binary is present at ROOT_DIR/bin/sentinel (placed by the test harness), +# use it directly; otherwise build from source using the local Go toolchain. +if [[ -x "$ROOT_DIR/bin/sentinel" ]]; then + echo -e "${YELLOW}Using pre-built sentinel binary...${NC}" + SENTINEL_BIN="$ROOT_DIR/bin/sentinel" + _SENTINEL_OWN=0 +else + SENTINEL_BIN="$(mktemp /tmp/sentinel-test-XXXXXX)" + _SENTINEL_OWN=1 + echo -e "${YELLOW}Building sentinel binary...${NC}" + (cd "$ROOT_DIR" && go build -o "$SENTINEL_BIN" ./cmd/sentinel) + echo " Built: $SENTINEL_BIN" +fi + +cleanup() { [[ ${_SENTINEL_OWN:-0} -eq 1 ]] && rm -f "$SENTINEL_BIN"; rm -rf "$TMPDIR_TEST"; } +trap cleanup EXIT + +# ─── Config-dump wrapper ─────────────────────────────────────────────────────── +# cfg_dump [extra CLI flags...] +# Caller must set env vars in the calling environment (use subshells). +cfg_dump() { + local config="$1"; shift + "$SENTINEL_BIN" config-dump -c "$config" "$@" 2>/dev/null +} + +cfg_dump_no_flag() { + "$SENTINEL_BIN" config-dump "$@" 2>/dev/null +} + +# ─── Config file factory ────────────────────────────────────────────────────── +# sentinel_config [extra yaml lines...] +# +# Creates a minimal sentinel config. The file ends with "clients:" so that: +# - 2-space-indented extra args become children of clients +# - 0-space-indented extra args become root-level keys (e.g. log:, debug_config:, poll_interval:) +# +# Required fields NOT included (must be passed as extra args): +# poll_interval, max_age_not_ready, max_age_ready, clients.hyperfleet_api.* +sentinel_config() { + local file="$1"; shift + { + cat <<'YAML' +sentinel: + name: test-sentinel +resource_type: clusters +message_data: + id: "resource.id" + kind: "resource.kind" +clients: +YAML + printf '%s\n' "$@" + } >"$file" +} + +CFG="$TMPDIR_TEST/sentinel.yaml" # reused across tests (overwritten each time) + +# ─── Base config fragments ───────────────────────────────────────────────────── +# Combine these as extra args to sentinel_config to satisfy validation. +# Indentation determines YAML placement: +# BASE_API, BASE_BROKER → children of clients: (2-space indent) +# BASE_TIMING → root-level keys (0-space indent) +BASE_API=( + " hyperfleet_api:" + " base_url: https://base.example.com" + " timeout: 10s" +) +BASE_BROKER=( + " broker:" + " topic: base-topic" +) +BASE_TIMING=( + "poll_interval: 5s" + "max_age_not_ready: 10s" + "max_age_ready: 30m" +) + +# ───────────────────────────────────────────────────────────────────────────── +section "Sentinel identity" +# ───────────────────────────────────────────────────────────────────────────── + +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" +out=$(cfg_dump "$CFG") +assert_contains "sentinel.name [file]" "$out" "name: test-sentinel" + +assert_contains "sentinel.name [env]" "$(HYPERFLEET_SENTINEL_NAME=env-name cfg_dump "$CFG")" "name: env-name" +assert_contains "sentinel.name [cli]" "$(cfg_dump "$CFG" --sentinel-name=cli-name)" "name: cli-name" +assert_contains "sentinel.name [cli>env]" "$(HYPERFLEET_SENTINEL_NAME=env-name cfg_dump "$CFG" --sentinel-name=cli-name)" "name: cli-name" + +# ───────────────────────────────────────────────────────────────────────────── +section "HyperFleet API" +# ───────────────────────────────────────────────────────────────────────────── + +# base_url +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://file-api.example.com" " timeout: 10s" "${BASE_TIMING[@]}" +assert_contains "api.base_url [file]" "$(cfg_dump "$CFG")" "base_url: https://file-api.example.com" +assert_contains "api.base_url [env]" "$(HYPERFLEET_API_BASE_URL=https://env-api.example.com cfg_dump "$CFG")" "base_url: https://env-api.example.com" +assert_contains "api.base_url [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-base-url=https://cli-api.example.com)" "base_url: https://cli-api.example.com" +assert_contains "api.base_url [cli>env]" "$(HYPERFLEET_API_BASE_URL=https://env-api.example.com cfg_dump "$CFG" --hyperfleet-api-base-url=https://cli-api.example.com)" "base_url: https://cli-api.example.com" + +# version +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://base.example.com" " timeout: 10s" " version: file-v99" "${BASE_TIMING[@]}" +assert_contains "api.version [file]" "$(cfg_dump "$CFG")" "version: file-v99" +assert_contains "api.version [env]" "$(HYPERFLEET_API_VERSION=env-v88 cfg_dump "$CFG")" "version: env-v88" +assert_contains "api.version [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-version=cli-v77)" "version: cli-v77" +assert_contains "api.version [cli>env]" "$(HYPERFLEET_API_VERSION=env-v88 cfg_dump "$CFG" --hyperfleet-api-version=cli-v77)" "version: cli-v77" + +# timeout +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://base.example.com" " timeout: 11s" "${BASE_TIMING[@]}" +assert_contains "api.timeout [file]" "$(cfg_dump "$CFG")" "timeout: 11s" +assert_contains "api.timeout [env]" "$(HYPERFLEET_API_TIMEOUT=22s cfg_dump "$CFG")" "timeout: 22s" +assert_contains "api.timeout [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-timeout=33s)" "timeout: 33s" +assert_contains "api.timeout [cli>env]" "$(HYPERFLEET_API_TIMEOUT=22s cfg_dump "$CFG" --hyperfleet-api-timeout=33s)" "timeout: 33s" + +# retry_attempts +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://base.example.com" " timeout: 10s" " retry_attempts: 11" "${BASE_TIMING[@]}" +assert_contains "api.retry_attempts [file]" "$(cfg_dump "$CFG")" "retry_attempts: 11" +assert_contains "api.retry_attempts [env]" "$(HYPERFLEET_API_RETRY_ATTEMPTS=22 cfg_dump "$CFG")" "retry_attempts: 22" +assert_contains "api.retry_attempts [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-retry-attempts=33)" "retry_attempts: 33" +assert_contains "api.retry_attempts [cli>env]" "$(HYPERFLEET_API_RETRY_ATTEMPTS=22 cfg_dump "$CFG" --hyperfleet-api-retry-attempts=33)" "retry_attempts: 33" + +# retry_backoff +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://base.example.com" " timeout: 10s" " retry_backoff: linear" "${BASE_TIMING[@]}" +assert_contains "api.retry_backoff [file]" "$(cfg_dump "$CFG")" "retry_backoff: linear" +assert_contains "api.retry_backoff [env]" "$(HYPERFLEET_API_RETRY_BACKOFF=constant cfg_dump "$CFG")" "retry_backoff: constant" +assert_contains "api.retry_backoff [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-retry-backoff=exponential)" "retry_backoff: exponential" +assert_contains "api.retry_backoff [cli>env]" "$(HYPERFLEET_API_RETRY_BACKOFF=constant cfg_dump "$CFG" --hyperfleet-api-retry-backoff=exponential)" "retry_backoff: exponential" + +# base_delay +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://base.example.com" " timeout: 10s" " base_delay: 11s" "${BASE_TIMING[@]}" +assert_contains "api.base_delay [file]" "$(cfg_dump "$CFG")" "base_delay: 11s" +assert_contains "api.base_delay [env]" "$(HYPERFLEET_API_BASE_DELAY=22s cfg_dump "$CFG")" "base_delay: 22s" +assert_contains "api.base_delay [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-base-delay=33s)" "base_delay: 33s" +assert_contains "api.base_delay [cli>env]" "$(HYPERFLEET_API_BASE_DELAY=22s cfg_dump "$CFG" --hyperfleet-api-base-delay=33s)" "base_delay: 33s" + +# max_delay — use sub-60s values since time.Duration.String() reformats e.g. 111s → 1m51s +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://base.example.com" " timeout: 10s" " max_delay: 51s" "${BASE_TIMING[@]}" +assert_contains "api.max_delay [file]" "$(cfg_dump "$CFG")" "max_delay: 51s" +assert_contains "api.max_delay [env]" "$(HYPERFLEET_API_MAX_DELAY=52s cfg_dump "$CFG")" "max_delay: 52s" +assert_contains "api.max_delay [cli]" "$(cfg_dump "$CFG" --hyperfleet-api-max-delay=53s)" "max_delay: 53s" +assert_contains "api.max_delay [cli>env]" "$(HYPERFLEET_API_MAX_DELAY=52s cfg_dump "$CFG" --hyperfleet-api-max-delay=53s)" "max_delay: 53s" + +# ───────────────────────────────────────────────────────────────────────────── +section "Broker" +# ───────────────────────────────────────────────────────────────────────────── + +sentinel_config "$CFG" "${BASE_API[@]}" " broker:" " topic: file-topic" "${BASE_TIMING[@]}" + +# topic — standard env var +assert_contains "broker.topic [file]" "$(cfg_dump "$CFG")" "topic: file-topic" +assert_contains "broker.topic [env]" "$(HYPERFLEET_BROKER_TOPIC=env-topic cfg_dump "$CFG")" "topic: env-topic" +assert_contains "broker.topic [cli]" "$(cfg_dump "$CFG" --broker-topic=cli-topic)" "topic: cli-topic" +assert_contains "broker.topic [cli>env]" "$(HYPERFLEET_BROKER_TOPIC=env-topic cfg_dump "$CFG" --broker-topic=cli-topic)" "topic: cli-topic" + +# ───────────────────────────────────────────────────────────────────────────── +section "Log" +# ───────────────────────────────────────────────────────────────────────────── + +# level +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" "log:" " level: debug" +assert_contains "log.level [file]" "$(cfg_dump "$CFG")" "level: debug" +assert_contains "log.level [env]" "$(HYPERFLEET_LOG_LEVEL=warn cfg_dump "$CFG")" "level: warn" +assert_contains "log.level [cli]" "$(cfg_dump "$CFG" --log-level=error)" "level: error" +assert_contains "log.level [cli>env]" "$(HYPERFLEET_LOG_LEVEL=warn cfg_dump "$CFG" --log-level=error)" "level: error" +assert_contains "log.level [env>file]" "$(HYPERFLEET_LOG_LEVEL=warn cfg_dump "$CFG")" "level: warn" + +# format +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" "log:" " format: json" +assert_contains "log.format [file]" "$(cfg_dump "$CFG")" "format: json" +assert_contains "log.format [env]" "$(HYPERFLEET_LOG_FORMAT=text cfg_dump "$CFG")" "format: text" +assert_contains "log.format [cli]" "$(cfg_dump "$CFG" --log-format=json)" "format: json" +assert_contains "log.format [cli>env]" "$(HYPERFLEET_LOG_FORMAT=text cfg_dump "$CFG" --log-format=json)" "format: json" + +# output +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" "log:" " output: stderr" +assert_contains "log.output [file]" "$(cfg_dump "$CFG")" "output: stderr" +assert_contains "log.output [env]" "$(HYPERFLEET_LOG_OUTPUT=stdout cfg_dump "$CFG")" "output: stdout" +assert_contains "log.output [cli]" "$(cfg_dump "$CFG" --log-output=stderr)" "output: stderr" +assert_contains "log.output [cli>env]" "$(HYPERFLEET_LOG_OUTPUT=stdout cfg_dump "$CFG" --log-output=stderr)" "output: stderr" + +# ───────────────────────────────────────────────────────────────────────────── +section "Sentinel-specific parameters" +# ───────────────────────────────────────────────────────────────────────────── + +# resource_type +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" +assert_contains "resource_type [file]" "$(cfg_dump "$CFG")" "resource_type: clusters" +assert_contains "resource_type [env]" "$(HYPERFLEET_RESOURCE_TYPE=nodepools cfg_dump "$CFG")" "resource_type: nodepools" +assert_contains "resource_type [cli]" "$(cfg_dump "$CFG" --resource-type=nodepools)" "resource_type: nodepools" +assert_contains "resource_type [cli>env]" "$(HYPERFLEET_RESOURCE_TYPE=nodepools cfg_dump "$CFG" --resource-type=clusters)" "resource_type: clusters" + +# poll_interval +sentinel_config "$CFG" "${BASE_API[@]}" "poll_interval: 11s" "max_age_not_ready: 10s" "max_age_ready: 30m" +assert_contains "poll_interval [file]" "$(cfg_dump "$CFG")" "poll_interval: 11s" +assert_contains "poll_interval [env]" "$(HYPERFLEET_POLL_INTERVAL=22s cfg_dump "$CFG")" "poll_interval: 22s" +assert_contains "poll_interval [cli]" "$(cfg_dump "$CFG" --poll-interval=33s)" "poll_interval: 33s" +assert_contains "poll_interval [cli>env]" "$(HYPERFLEET_POLL_INTERVAL=22s cfg_dump "$CFG" --poll-interval=33s)" "poll_interval: 33s" + +# max_age_not_ready +sentinel_config "$CFG" "${BASE_API[@]}" "poll_interval: 5s" "max_age_not_ready: 11s" "max_age_ready: 30m" +assert_contains "max_age_not_ready [file]" "$(cfg_dump "$CFG")" "max_age_not_ready: 11s" +assert_contains "max_age_not_ready [env]" "$(HYPERFLEET_MAX_AGE_NOT_READY=22s cfg_dump "$CFG")" "max_age_not_ready: 22s" +assert_contains "max_age_not_ready [cli]" "$(cfg_dump "$CFG" --max-age-not-ready=33s)" "max_age_not_ready: 33s" +assert_contains "max_age_not_ready [cli>env]" "$(HYPERFLEET_MAX_AGE_NOT_READY=22s cfg_dump "$CFG" --max-age-not-ready=33s)" "max_age_not_ready: 33s" + +# max_age_ready — use sub-60s values to avoid duration reformatting (e.g. 111s → 1m51s) +sentinel_config "$CFG" "${BASE_API[@]}" "poll_interval: 5s" "max_age_not_ready: 10s" "max_age_ready: 51s" +assert_contains "max_age_ready [file]" "$(cfg_dump "$CFG")" "max_age_ready: 51s" +assert_contains "max_age_ready [env]" "$(HYPERFLEET_MAX_AGE_READY=52s cfg_dump "$CFG")" "max_age_ready: 52s" +assert_contains "max_age_ready [cli]" "$(cfg_dump "$CFG" --max-age-ready=53s)" "max_age_ready: 53s" +assert_contains "max_age_ready [cli>env]" "$(HYPERFLEET_MAX_AGE_READY=52s cfg_dump "$CFG" --max-age-ready=53s)" "max_age_ready: 53s" + +# ───────────────────────────────────────────────────────────────────────────── +section "debug_config flag" +# ───────────────────────────────────────────────────────────────────────────── + +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" "debug_config: true" +assert_contains "debug_config [file=true]" "$(cfg_dump "$CFG")" "debug_config: true" + +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" +assert_not_contains "debug_config [default=false]" "$(cfg_dump "$CFG")" "debug_config: true" +assert_contains "debug_config [env=true]" "$(HYPERFLEET_DEBUG_CONFIG=true cfg_dump "$CFG")" "debug_config: true" +assert_contains "debug_config [cli=true]" "$(cfg_dump "$CFG" --debug-config)" "debug_config: true" + +# ───────────────────────────────────────────────────────────────────────────── +section "Priority verification (cross-parameter)" +# ───────────────────────────────────────────────────────────────────────────── +# Use api.base_url as the representative parameter for all priority checks. + +sentinel_config "$CFG" " hyperfleet_api:" " base_url: https://file.example.com" " timeout: 10s" "${BASE_TIMING[@]}" + +assert_contains "priority: file only → file value" "$(cfg_dump "$CFG")" "base_url: https://file.example.com" +assert_contains "priority: env > file" "$(HYPERFLEET_API_BASE_URL=https://env.example.com cfg_dump "$CFG")" "base_url: https://env.example.com" +assert_contains "priority: cli > file" "$(cfg_dump "$CFG" --hyperfleet-api-base-url=https://cli.example.com)" "base_url: https://cli.example.com" +assert_contains "priority: cli > env" "$(HYPERFLEET_API_BASE_URL=https://env.example.com cfg_dump "$CFG" --hyperfleet-api-base-url=https://cli.example.com)" "base_url: https://cli.example.com" +assert_contains "priority: env does not override cli" "$(HYPERFLEET_API_BASE_URL=https://env.example.com cfg_dump "$CFG" --hyperfleet-api-base-url=https://cli.example.com)" "base_url: https://cli.example.com" + +# ───────────────────────────────────────────────────────────────────────────── +section "Config file resolution" +# ───────────────────────────────────────────────────────────────────────────── + +sentinel_config "$CFG" "${BASE_API[@]}" "${BASE_TIMING[@]}" +assert_contains "HYPERFLEET_CONFIG [selects file]" \ + "$(HYPERFLEET_CONFIG="$CFG" cfg_dump_no_flag)" \ + "name: test-sentinel" + +# ───────────────────────────────────────────────────────────────────────────── +# Summary +# ───────────────────────────────────────────────────────────────────────────── +echo "" +echo "─────────────────────────────────────────" +TOTAL=$((PASS+FAIL)) +if [[ $FAIL -eq 0 ]]; then + echo -e "${GREEN}All $TOTAL tests passed.${NC}" +else + echo -e "${RED}$FAIL/$TOTAL tests FAILED:${NC}" + for e in "${ERRORS[@]}"; do + echo " - $e" + done +fi +echo "" +[[ $FAIL -eq 0 ]]