From abf9a04ac3749791671ab2005a5efcb5fdbf8587 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 16:32:05 +0200 Subject: [PATCH 01/17] feat(api): add GatewayClassParameters.spec.service ServiceConfig type (#70) --- api/v1alpha1/gatewayclassparameters_types.go | 66 +++++++++++++++++ api/v1alpha1/zz_generated.deepcopy.go | 54 ++++++++++++++ ...h-software.com_gatewayclassparameters.yaml | 70 +++++++++++++++++++ deploy/00-prereqs.yaml | 70 +++++++++++++++++++ 4 files changed, 260 insertions(+) diff --git a/api/v1alpha1/gatewayclassparameters_types.go b/api/v1alpha1/gatewayclassparameters_types.go index 5d2ac0a7..30134d50 100644 --- a/api/v1alpha1/gatewayclassparameters_types.go +++ b/api/v1alpha1/gatewayclassparameters_types.go @@ -89,6 +89,12 @@ type GatewayClassParametersSpec struct { // hash). // +optional TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints,omitempty"` + + // Service configures the data-plane Service. When omitted, the operator + // preserves the historical default (Type: LoadBalancer, no annotations). + // Service shape changes do NOT trigger pod restarts. + // +optional + Service *ServiceConfig `json:"service,omitempty"` } // PodDisruptionBudget configures a PodDisruptionBudget for Gateway pods. @@ -150,6 +156,66 @@ type ConfigMapReference struct { Key string `json:"key,omitempty"` } +// ServiceConfig configures the data-plane Service object. +// +// Fields that only make sense for specific Service types are validated by +// CEL rules at admission time. The Type field defaults to LoadBalancer when +// omitted (handled by the resolver, not by a kubebuilder default, to keep +// stored CRs minimal). +// +// Labels and Annotations are layered: GatewayClassParameters provides the +// defaults, and Gateway.spec.infrastructure.{labels,annotations} can override +// or add to them on a per-Gateway basis. The other fields (Type, +// LoadBalancerClass, LoadBalancerSourceRanges, ExternalTrafficPolicy) are +// class-level only — to use different values per Gateway, create separate +// GatewayClasses. +// +// +kubebuilder:validation:XValidation:rule="!has(self.loadBalancerClass) || !has(self.type) || self.type == 'LoadBalancer'",message="loadBalancerClass is only valid when type is LoadBalancer" +// +kubebuilder:validation:XValidation:rule="!has(self.loadBalancerSourceRanges) || !has(self.type) || self.type == 'LoadBalancer'",message="loadBalancerSourceRanges is only valid when type is LoadBalancer" +// +kubebuilder:validation:XValidation:rule="!has(self.externalTrafficPolicy) || !has(self.type) || self.type == 'LoadBalancer' || self.type == 'NodePort'",message="externalTrafficPolicy is only valid when type is LoadBalancer or NodePort" +type ServiceConfig struct { + // Type selects the Service type. Defaults to LoadBalancer when omitted. + // Headless services (clusterIP: None) and ExternalName are intentionally + // not supported. + // +optional + // +kubebuilder:validation:Enum=ClusterIP;NodePort;LoadBalancer + Type *corev1.ServiceType `json:"type,omitempty"` + + // Annotations are added to the Service's ObjectMeta. Keys set here are + // tracked via the varnish.io/managed-annotations sentinel and pruned on + // removal; annotations added by other actors (cloud controllers, etc.) + // are preserved. + // +optional + Annotations map[string]string `json:"annotations,omitempty"` + + // Labels are added to the Service's ObjectMeta. The operator's + // controller-managed labels (app.kubernetes.io/managed-by, + // gateway.networking.k8s.io/gateway-name, + // gateway.networking.k8s.io/gateway-namespace) cannot be overridden; + // user-supplied keys colliding with these are dropped with a log warning. + // Other keys are tracked via the varnish.io/managed-labels sentinel. + // +optional + Labels map[string]string `json:"labels,omitempty"` + + // LoadBalancerClass selects an implementation of load balancer + // (cluster-specific). Only valid when Type is LoadBalancer. + // +optional + LoadBalancerClass *string `json:"loadBalancerClass,omitempty"` + + // LoadBalancerSourceRanges restricts traffic to the LoadBalancer to the + // listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is + // validated by the Kubernetes API server. + // +optional + LoadBalancerSourceRanges []string `json:"loadBalancerSourceRanges,omitempty"` + + // ExternalTrafficPolicy selects how nodes route external traffic to the + // Service. Local preserves the client source IP at the cost of imbalanced + // load. Only valid when Type is LoadBalancer or NodePort. + // +optional + // +kubebuilder:validation:Enum=Cluster;Local + ExternalTrafficPolicy *corev1.ServiceExternalTrafficPolicy `json:"externalTrafficPolicy,omitempty"` +} + // GatewayClassParametersList contains a list of GatewayClassParameters. // // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index df8e04ee..97760582 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -184,6 +184,55 @@ func (in *ConfigMapReference) DeepCopy() *ConfigMapReference { return out } +// DeepCopyInto copies the receiver into out. +func (in *ServiceConfig) DeepCopyInto(out *ServiceConfig) { + *out = *in + if in.Type != nil { + in, out := &in.Type, &out.Type + *out = new(corev1.ServiceType) + **out = **in + } + if in.Annotations != nil { + in, out := &in.Annotations, &out.Annotations + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Labels != nil { + in, out := &in.Labels, &out.Labels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.LoadBalancerClass != nil { + in, out := &in.LoadBalancerClass, &out.LoadBalancerClass + *out = new(string) + **out = **in + } + if in.LoadBalancerSourceRanges != nil { + in, out := &in.LoadBalancerSourceRanges, &out.LoadBalancerSourceRanges + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.ExternalTrafficPolicy != nil { + in, out := &in.ExternalTrafficPolicy, &out.ExternalTrafficPolicy + *out = new(corev1.ServiceExternalTrafficPolicy) + **out = **in + } +} + +// DeepCopy creates a deep copy of ServiceConfig. +func (in *ServiceConfig) DeepCopy() *ServiceConfig { + if in == nil { + return nil + } + out := new(ServiceConfig) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto copies the receiver into out. func (in *VarnishLogging) DeepCopyInto(out *VarnishLogging) { *out = *in @@ -255,6 +304,11 @@ func (in *GatewayClassParametersSpec) DeepCopyInto(out *GatewayClassParametersSp (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.Service != nil { + in, out := &in.Service, &out.Service + *out = new(ServiceConfig) + (*in).DeepCopyInto(*out) + } } // DeepCopyInto copies the receiver into out. diff --git a/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml b/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml index 4efbd575..6e879816 100644 --- a/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml +++ b/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml @@ -3677,6 +3677,76 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + service: + description: |- + Service configures the data-plane Service. When omitted, the operator + preserves the historical default (Type: LoadBalancer, no annotations). + Service shape changes do NOT trigger pod restarts. + properties: + annotations: + additionalProperties: + type: string + description: |- + Annotations are added to the Service's ObjectMeta. Keys set here are + tracked via the varnish.io/managed-annotations sentinel and pruned on + removal; annotations added by other actors (cloud controllers, etc.) + are preserved. + type: object + externalTrafficPolicy: + description: |- + ExternalTrafficPolicy selects how nodes route external traffic to the + Service. Local preserves the client source IP at the cost of imbalanced + load. Only valid when Type is LoadBalancer or NodePort. + enum: + - Cluster + - Local + type: string + labels: + additionalProperties: + type: string + description: |- + Labels are added to the Service's ObjectMeta. The operator's + controller-managed labels (app.kubernetes.io/managed-by, + gateway.networking.k8s.io/gateway-name, + gateway.networking.k8s.io/gateway-namespace) cannot be overridden; + user-supplied keys colliding with these are dropped with a log warning. + Other keys are tracked via the varnish.io/managed-labels sentinel. + type: object + loadBalancerClass: + description: |- + LoadBalancerClass selects an implementation of load balancer + (cluster-specific). Only valid when Type is LoadBalancer. + type: string + loadBalancerSourceRanges: + description: |- + LoadBalancerSourceRanges restricts traffic to the LoadBalancer to the + listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is + validated by the Kubernetes API server. + items: + type: string + type: array + type: + description: |- + Type selects the Service type. Defaults to LoadBalancer when omitted. + Headless services (clusterIP: None) and ExternalName are intentionally + not supported. + enum: + - ClusterIP + - NodePort + - LoadBalancer + type: string + type: object + x-kubernetes-validations: + - message: loadBalancerClass is only valid when type is LoadBalancer + rule: '!has(self.loadBalancerClass) || !has(self.type) || self.type + == ''LoadBalancer''' + - message: loadBalancerSourceRanges is only valid when type is LoadBalancer + rule: '!has(self.loadBalancerSourceRanges) || !has(self.type) || + self.type == ''LoadBalancer''' + - message: externalTrafficPolicy is only valid when type is LoadBalancer + or NodePort + rule: '!has(self.externalTrafficPolicy) || !has(self.type) || self.type + == ''LoadBalancer'' || self.type == ''NodePort''' topologySpreadConstraints: description: |- TopologySpreadConstraints lets you pin Gateway pods across failure diff --git a/deploy/00-prereqs.yaml b/deploy/00-prereqs.yaml index 10970cd6..9a141019 100644 --- a/deploy/00-prereqs.yaml +++ b/deploy/00-prereqs.yaml @@ -3686,6 +3686,76 @@ spec: More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ type: object type: object + service: + description: |- + Service configures the data-plane Service. When omitted, the operator + preserves the historical default (Type: LoadBalancer, no annotations). + Service shape changes do NOT trigger pod restarts. + properties: + annotations: + additionalProperties: + type: string + description: |- + Annotations are added to the Service's ObjectMeta. Keys set here are + tracked via the varnish.io/managed-annotations sentinel and pruned on + removal; annotations added by other actors (cloud controllers, etc.) + are preserved. + type: object + externalTrafficPolicy: + description: |- + ExternalTrafficPolicy selects how nodes route external traffic to the + Service. Local preserves the client source IP at the cost of imbalanced + load. Only valid when Type is LoadBalancer or NodePort. + enum: + - Cluster + - Local + type: string + labels: + additionalProperties: + type: string + description: |- + Labels are added to the Service's ObjectMeta. The operator's + controller-managed labels (app.kubernetes.io/managed-by, + gateway.networking.k8s.io/gateway-name, + gateway.networking.k8s.io/gateway-namespace) cannot be overridden; + user-supplied keys colliding with these are dropped with a log warning. + Other keys are tracked via the varnish.io/managed-labels sentinel. + type: object + loadBalancerClass: + description: |- + LoadBalancerClass selects an implementation of load balancer + (cluster-specific). Only valid when Type is LoadBalancer. + type: string + loadBalancerSourceRanges: + description: |- + LoadBalancerSourceRanges restricts traffic to the LoadBalancer to the + listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is + validated by the Kubernetes API server. + items: + type: string + type: array + type: + description: |- + Type selects the Service type. Defaults to LoadBalancer when omitted. + Headless services (clusterIP: None) and ExternalName are intentionally + not supported. + enum: + - ClusterIP + - NodePort + - LoadBalancer + type: string + type: object + x-kubernetes-validations: + - message: loadBalancerClass is only valid when type is LoadBalancer + rule: '!has(self.loadBalancerClass) || !has(self.type) || self.type + == ''LoadBalancer''' + - message: loadBalancerSourceRanges is only valid when type is LoadBalancer + rule: '!has(self.loadBalancerSourceRanges) || !has(self.type) || + self.type == ''LoadBalancer''' + - message: externalTrafficPolicy is only valid when type is LoadBalancer + or NodePort + rule: '!has(self.externalTrafficPolicy) || !has(self.type) || self.type + == ''LoadBalancer'' || self.type == ''NodePort''' topologySpreadConstraints: description: |- TopologySpreadConstraints lets you pin Gateway pods across failure From cff91b026e64400a1c7278dccf079c99acc0300e Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 16:47:12 +0200 Subject: [PATCH 02/17] docs(api): clarify LoadBalancerSourceRanges CIDR validation timing --- api/v1alpha1/gatewayclassparameters_types.go | 5 +++-- .../gateway.varnish-software.com_gatewayclassparameters.yaml | 5 +++-- deploy/00-prereqs.yaml | 5 +++-- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/api/v1alpha1/gatewayclassparameters_types.go b/api/v1alpha1/gatewayclassparameters_types.go index 30134d50..9da14669 100644 --- a/api/v1alpha1/gatewayclassparameters_types.go +++ b/api/v1alpha1/gatewayclassparameters_types.go @@ -203,8 +203,9 @@ type ServiceConfig struct { LoadBalancerClass *string `json:"loadBalancerClass,omitempty"` // LoadBalancerSourceRanges restricts traffic to the LoadBalancer to the - // listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is - // validated by the Kubernetes API server. + // listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is not + // validated at admission time for this CRD; Kubernetes validates it when + // the Service is reconciled. // +optional LoadBalancerSourceRanges []string `json:"loadBalancerSourceRanges,omitempty"` diff --git a/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml b/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml index 6e879816..6c602208 100644 --- a/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml +++ b/charts/varnish-gateway/crds/gateway.varnish-software.com_gatewayclassparameters.yaml @@ -3720,8 +3720,9 @@ spec: loadBalancerSourceRanges: description: |- LoadBalancerSourceRanges restricts traffic to the LoadBalancer to the - listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is - validated by the Kubernetes API server. + listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is not + validated at admission time for this CRD; Kubernetes validates it when + the Service is reconciled. items: type: string type: array diff --git a/deploy/00-prereqs.yaml b/deploy/00-prereqs.yaml index 9a141019..8521b59c 100644 --- a/deploy/00-prereqs.yaml +++ b/deploy/00-prereqs.yaml @@ -3729,8 +3729,9 @@ spec: loadBalancerSourceRanges: description: |- LoadBalancerSourceRanges restricts traffic to the LoadBalancer to the - listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is - validated by the Kubernetes API server. + listed CIDRs. Only valid when Type is LoadBalancer. CIDR syntax is not + validated at admission time for this CRD; Kubernetes validates it when + the Service is reconciled. items: type: string type: array From 8a18c79110193db84f21ccb83b6cf8664ceb8437 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 16:50:46 +0200 Subject: [PATCH 03/17] feat(controller): add resolveServiceConfig with class/gateway overlay Pure function resolving GatewayClassParameters.spec.service + Gateway.spec.infrastructure into a fully-defaulted ResolvedServiceConfig. Defaults Type to LoadBalancer when omitted; overlays Gateway-level annotations/labels on top of class-level defaults without aliasing inputs. Co-Authored-By: Claude Sonnet 4.6 --- internal/controller/service_resolver.go | 80 ++++++++ internal/controller/service_resolver_test.go | 192 +++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 internal/controller/service_resolver.go create mode 100644 internal/controller/service_resolver_test.go diff --git a/internal/controller/service_resolver.go b/internal/controller/service_resolver.go new file mode 100644 index 00000000..56b0f3cf --- /dev/null +++ b/internal/controller/service_resolver.go @@ -0,0 +1,80 @@ +package controller + +import ( + corev1 "k8s.io/api/core/v1" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" + + gatewayparamsv1alpha1 "github.com/varnish/gateway/api/v1alpha1" +) + +// Sentinel annotation keys recording which annotation and label keys the +// operator manages on the Service. Pruning on update consults these to avoid +// stomping on annotations added by cloud controllers, service-mesh sidecar +// injectors, observability tooling, etc. +const ( + AnnotationManagedAnnotations = "varnish.io/managed-annotations" + AnnotationManagedLabels = "varnish.io/managed-labels" +) + +// ResolvedServiceConfig is the fully-defaulted, fully-merged Service shape +// the controller will apply to the Service object. Type is always non-nil +// (defaults to LoadBalancer); other pointer fields stay nil when omitted. +// +// Annotations and Labels are the merged result of GatewayClassParameters +// defaults + Gateway.spec.infrastructure overlay. The maps are never nil +// after resolution — they may be empty. +type ResolvedServiceConfig struct { + Type corev1.ServiceType + Annotations map[string]string + Labels map[string]string + LoadBalancerClass *string + LoadBalancerSourceRanges []string + ExternalTrafficPolicy *corev1.ServiceExternalTrafficPolicy +} + +// resolveServiceConfig computes the desired Service shape from the +// GatewayClass-level parameters and Gateway-level infrastructure overlay. +// Both inputs may be nil; the function always returns a usable config. +func resolveServiceConfig(gateway *gatewayv1.Gateway, params *gatewayparamsv1alpha1.GatewayClassParameters) ResolvedServiceConfig { + out := ResolvedServiceConfig{ + Type: corev1.ServiceTypeLoadBalancer, + Annotations: map[string]string{}, + Labels: map[string]string{}, + } + + if params != nil && params.Spec.Service != nil { + svc := params.Spec.Service + if svc.Type != nil { + out.Type = *svc.Type + } + for k, v := range svc.Annotations { + out.Annotations[k] = v + } + for k, v := range svc.Labels { + out.Labels[k] = v + } + if svc.LoadBalancerClass != nil { + lb := *svc.LoadBalancerClass + out.LoadBalancerClass = &lb + } + if len(svc.LoadBalancerSourceRanges) > 0 { + out.LoadBalancerSourceRanges = append([]string{}, svc.LoadBalancerSourceRanges...) + } + if svc.ExternalTrafficPolicy != nil { + etp := *svc.ExternalTrafficPolicy + out.ExternalTrafficPolicy = &etp + } + } + + // Gateway.spec.infrastructure overlay applies only to annotations and labels. + if gateway != nil && gateway.Spec.Infrastructure != nil { + for k, v := range gateway.Spec.Infrastructure.Annotations { + out.Annotations[string(k)] = string(v) + } + for k, v := range gateway.Spec.Infrastructure.Labels { + out.Labels[string(k)] = string(v) + } + } + + return out +} diff --git a/internal/controller/service_resolver_test.go b/internal/controller/service_resolver_test.go new file mode 100644 index 00000000..72067977 --- /dev/null +++ b/internal/controller/service_resolver_test.go @@ -0,0 +1,192 @@ +package controller + +import ( + "testing" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" + + gatewayparamsv1alpha1 "github.com/varnish/gateway/api/v1alpha1" +) + +func TestResolveServiceConfig_NoParams_DefaultsToLoadBalancer(t *testing.T) { + gw := &gatewayv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}} + + got := resolveServiceConfig(gw, nil) + + if got.Type != corev1.ServiceTypeLoadBalancer { + t.Errorf("Type = %v, want LoadBalancer", got.Type) + } + if len(got.Annotations) != 0 { + t.Errorf("Annotations = %v, want empty", got.Annotations) + } + if len(got.Labels) != 0 { + t.Errorf("Labels = %v, want empty", got.Labels) + } + if got.LoadBalancerClass != nil { + t.Errorf("LoadBalancerClass = %v, want nil", *got.LoadBalancerClass) + } + if got.ExternalTrafficPolicy != nil { + t.Errorf("ExternalTrafficPolicy = %v, want nil", *got.ExternalTrafficPolicy) + } +} + +func TestResolveServiceConfig_NilServiceField_DefaultsToLoadBalancer(t *testing.T) { + gw := &gatewayv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}} + params := &gatewayparamsv1alpha1.GatewayClassParameters{} + + got := resolveServiceConfig(gw, params) + + if got.Type != corev1.ServiceTypeLoadBalancer { + t.Errorf("Type = %v, want LoadBalancer", got.Type) + } +} + +func TestResolveServiceConfig_NilTypeInService_DefaultsToLoadBalancer(t *testing.T) { + gw := &gatewayv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}} + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + Annotations: map[string]string{"foo": "bar"}, + }, + }, + } + + got := resolveServiceConfig(gw, params) + + if got.Type != corev1.ServiceTypeLoadBalancer { + t.Errorf("Type = %v, want LoadBalancer", got.Type) + } + if got.Annotations["foo"] != "bar" { + t.Errorf("Annotations[foo] = %q, want bar", got.Annotations["foo"]) + } +} + +func TestResolveServiceConfig_ClassOnly(t *testing.T) { + gw := &gatewayv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}} + etp := corev1.ServiceExternalTrafficPolicyLocal + lbClass := "service.k8s.io/cloud-provider-mock" + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + Type: ptr(corev1.ServiceTypeClusterIP), + Annotations: map[string]string{"a": "1", "b": "2"}, + Labels: map[string]string{"team": "edge"}, + LoadBalancerClass: &lbClass, + LoadBalancerSourceRanges: []string{"10.0.0.0/8"}, + ExternalTrafficPolicy: &etp, + }, + }, + } + + got := resolveServiceConfig(gw, params) + + if got.Type != corev1.ServiceTypeClusterIP { + t.Errorf("Type = %v, want ClusterIP", got.Type) + } + if got.Annotations["a"] != "1" || got.Annotations["b"] != "2" { + t.Errorf("Annotations = %v", got.Annotations) + } + if got.Labels["team"] != "edge" { + t.Errorf("Labels = %v", got.Labels) + } + if got.LoadBalancerClass == nil || *got.LoadBalancerClass != lbClass { + t.Errorf("LoadBalancerClass = %v, want %q", got.LoadBalancerClass, lbClass) + } + if len(got.LoadBalancerSourceRanges) != 1 || got.LoadBalancerSourceRanges[0] != "10.0.0.0/8" { + t.Errorf("LoadBalancerSourceRanges = %v", got.LoadBalancerSourceRanges) + } + if got.ExternalTrafficPolicy == nil || *got.ExternalTrafficPolicy != etp { + t.Errorf("ExternalTrafficPolicy = %v", got.ExternalTrafficPolicy) + } +} + +func TestResolveServiceConfig_GatewayInfraOverlay_AnnotationsAndLabels(t *testing.T) { + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + Infrastructure: &gatewayv1.GatewayInfrastructure{ + Annotations: map[gatewayv1.AnnotationKey]gatewayv1.AnnotationValue{ + "a": "overridden", // collides with class + "new": "value", // new key + }, + Labels: map[gatewayv1.LabelKey]gatewayv1.LabelValue{ + "team": "core", // overrides class + }, + }, + }, + } + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + Annotations: map[string]string{"a": "class", "kept": "yes"}, + Labels: map[string]string{"team": "edge", "tier": "cache"}, + }, + }, + } + + got := resolveServiceConfig(gw, params) + + if got.Annotations["a"] != "overridden" { + t.Errorf("Annotations[a] = %q, want overridden", got.Annotations["a"]) + } + if got.Annotations["new"] != "value" { + t.Errorf("Annotations[new] = %q, want value", got.Annotations["new"]) + } + if got.Annotations["kept"] != "yes" { + t.Errorf("Annotations[kept] = %q, want yes", got.Annotations["kept"]) + } + if got.Labels["team"] != "core" { + t.Errorf("Labels[team] = %q, want core", got.Labels["team"]) + } + if got.Labels["tier"] != "cache" { + t.Errorf("Labels[tier] = %q, want cache", got.Labels["tier"]) + } +} + +func TestResolveServiceConfig_GatewayInfraOnly_NoClassConfig(t *testing.T) { + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + Infrastructure: &gatewayv1.GatewayInfrastructure{ + Annotations: map[gatewayv1.AnnotationKey]gatewayv1.AnnotationValue{ + "per-gw": "ok", + }, + }, + }, + } + + got := resolveServiceConfig(gw, nil) + + if got.Type != corev1.ServiceTypeLoadBalancer { + t.Errorf("Type = %v, want LoadBalancer", got.Type) + } + if got.Annotations["per-gw"] != "ok" { + t.Errorf("Annotations[per-gw] = %q, want ok", got.Annotations["per-gw"]) + } +} + +func TestResolveServiceConfig_GatewayInfraDoesNotOverrideType(t *testing.T) { + // Only labels/annotations layer from Gateway.spec.infrastructure. + // Type, LBClass, SourceRanges, ETP stay class-level. + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + Infrastructure: &gatewayv1.GatewayInfrastructure{}, + }, + } + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + Type: ptr(corev1.ServiceTypeNodePort), + }, + }, + } + + got := resolveServiceConfig(gw, params) + + if got.Type != corev1.ServiceTypeNodePort { + t.Errorf("Type = %v, want NodePort", got.Type) + } +} From 81b17dc1c605d02d3ec2aa094be9f914cdf67960 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 16:55:35 +0200 Subject: [PATCH 04/17] test(controller): cover nil-gateway and defensive-copy paths for resolveServiceConfig --- internal/controller/service_resolver_test.go | 37 ++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/internal/controller/service_resolver_test.go b/internal/controller/service_resolver_test.go index 72067977..9209f0d1 100644 --- a/internal/controller/service_resolver_test.go +++ b/internal/controller/service_resolver_test.go @@ -167,6 +167,43 @@ func TestResolveServiceConfig_GatewayInfraOnly_NoClassConfig(t *testing.T) { } } +func TestResolveServiceConfig_NilGateway(t *testing.T) { + // The resolver guards on nil gateway. This pins the contract so a + // future refactor cannot remove the guard without a test failure. + got := resolveServiceConfig(nil, nil) + + if got.Type != corev1.ServiceTypeLoadBalancer { + t.Errorf("Type = %v, want LoadBalancer", got.Type) + } + if got.Annotations == nil { + t.Errorf("Annotations should be non-nil empty map, got nil") + } + if got.Labels == nil { + t.Errorf("Labels should be non-nil empty map, got nil") + } +} + +func TestResolveServiceConfig_DefensiveCopy_SourceRanges(t *testing.T) { + // Mutating the input slice after resolution must not affect the output. + // Pins the defensive-copy behavior against future regressions. + gw := &gatewayv1.Gateway{ObjectMeta: metav1.ObjectMeta{Name: "gw", Namespace: "default"}} + ranges := []string{"10.0.0.0/8"} + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + LoadBalancerSourceRanges: ranges, + }, + }, + } + + got := resolveServiceConfig(gw, params) + ranges[0] = "mutated" + + if got.LoadBalancerSourceRanges[0] != "10.0.0.0/8" { + t.Errorf("LoadBalancerSourceRanges[0] = %q, want 10.0.0.0/8 (input mutation leaked into output)", got.LoadBalancerSourceRanges[0]) + } +} + func TestResolveServiceConfig_GatewayInfraDoesNotOverrideType(t *testing.T) { // Only labels/annotations layer from Gateway.spec.infrastructure. // Type, LBClass, SourceRanges, ETP stay class-level. From c9ad53e53ed2b860a1da3a40219badc812750bb4 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 16:58:57 +0200 Subject: [PATCH 05/17] feat(controller): add mergeWithManaged sentinel-based annotation/label merger Implements the sentinel-based merge function that writes desired keys onto the existing map, prunes previously-managed keys no longer in desired, leaves unmanaged external keys untouched, and enforces protected-key policy. Eight tests cover first-apply, drift-add, drift-remove, external key preservation, empty desired pruning, sorted sentinel, protected key dropping, and nil existing map handling. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/service_resolver.go | 64 ++++++++ internal/controller/service_resolver_test.go | 146 +++++++++++++++++++ 2 files changed, 210 insertions(+) diff --git a/internal/controller/service_resolver.go b/internal/controller/service_resolver.go index 56b0f3cf..3b0aae85 100644 --- a/internal/controller/service_resolver.go +++ b/internal/controller/service_resolver.go @@ -1,6 +1,9 @@ package controller import ( + "sort" + "strings" + corev1 "k8s.io/api/core/v1" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" @@ -78,3 +81,64 @@ func resolveServiceConfig(gateway *gatewayv1.Gateway, params *gatewayparamsv1alp return out } + +// mergeWithManaged writes desired keys onto a copy of existing, prunes any +// previously-managed keys (per sentinel) that are no longer desired, and +// returns the merged map plus a fresh sentinel string listing the keys the +// operator now owns. Keys present in `protected` are silently dropped from +// desired before merging — they belong to the operator and cannot be +// overridden via spec. +// +// The sentinel is a comma-separated, lexically-sorted list of the keys the +// operator manages. Empty sentinel = nothing managed yet. The sentinel key +// itself (e.g. AnnotationManagedAnnotations) is NEVER recorded as managed — +// it's operator metadata and is always present when the feature is in use. +// +// existing may be nil; the function never mutates inputs. +func mergeWithManaged(desired, existing map[string]string, sentinel string, protected map[string]struct{}) (map[string]string, string) { + merged := make(map[string]string, len(existing)+len(desired)) + for k, v := range existing { + merged[k] = v + } + + // Parse previously-managed keys from the sentinel. + previouslyManaged := map[string]struct{}{} + if sentinel != "" { + for _, k := range strings.Split(sentinel, ",") { + if k != "" { + previouslyManaged[k] = struct{}{} + } + } + } + + // Filter protected keys out of desired (silently — the resolver logs the + // collision before calling us, so we just enforce the policy here). + filtered := make(map[string]string, len(desired)) + for k, v := range desired { + if _, isProtected := protected[k]; isProtected { + continue + } + filtered[k] = v + } + + // Prune managed-but-no-longer-desired keys. + for k := range previouslyManaged { + if _, stillDesired := filtered[k]; !stillDesired { + delete(merged, k) + } + } + + // Apply desired keys. + for k, v := range filtered { + merged[k] = v + } + + // Build new sentinel from the keys we just applied. + keys := make([]string, 0, len(filtered)) + for k := range filtered { + keys = append(keys, k) + } + sort.Strings(keys) + + return merged, strings.Join(keys, ",") +} diff --git a/internal/controller/service_resolver_test.go b/internal/controller/service_resolver_test.go index 9209f0d1..c098ff23 100644 --- a/internal/controller/service_resolver_test.go +++ b/internal/controller/service_resolver_test.go @@ -227,3 +227,149 @@ func TestResolveServiceConfig_GatewayInfraDoesNotOverrideType(t *testing.T) { t.Errorf("Type = %v, want NodePort", got.Type) } } + +func TestMergeWithManaged_FirstApply_NoSentinel(t *testing.T) { + existing := map[string]string{} + sentinel := "" + desired := map[string]string{"a": "1", "b": "2"} + + merged, newSentinel := mergeWithManaged(desired, existing, sentinel, nil) + + if merged["a"] != "1" || merged["b"] != "2" { + t.Errorf("merged = %v", merged) + } + if newSentinel != "a,b" { + t.Errorf("sentinel = %q, want %q", newSentinel, "a,b") + } +} + +func TestMergeWithManaged_DriftAdd(t *testing.T) { + existing := map[string]string{"a": "1"} + sentinel := "a" + desired := map[string]string{"a": "1", "b": "2"} + + merged, newSentinel := mergeWithManaged(desired, existing, sentinel, nil) + + if merged["a"] != "1" || merged["b"] != "2" { + t.Errorf("merged = %v", merged) + } + if newSentinel != "a,b" { + t.Errorf("sentinel = %q, want %q", newSentinel, "a,b") + } +} + +func TestMergeWithManaged_DriftRemove(t *testing.T) { + // Key "b" was previously managed (listed in sentinel) and is no longer + // in desired — must be pruned from the output map. + existing := map[string]string{"a": "1", "b": "2"} + sentinel := "a,b" + desired := map[string]string{"a": "1"} + + merged, newSentinel := mergeWithManaged(desired, existing, sentinel, nil) + + if merged["a"] != "1" { + t.Errorf("merged[a] = %q, want 1", merged["a"]) + } + if _, ok := merged["b"]; ok { + t.Errorf("merged[b] should be pruned, got %v", merged) + } + if newSentinel != "a" { + t.Errorf("sentinel = %q, want %q", newSentinel, "a") + } +} + +func TestMergeWithManaged_ExternalKeyUntouched(t *testing.T) { + // "cloud.k8s.io/foo" was never managed by us — must survive across reconciles. + existing := map[string]string{ + "a": "1", + "cloud.k8s.io/foo": "bar", + } + sentinel := "a" + desired := map[string]string{"a": "1"} + + merged, newSentinel := mergeWithManaged(desired, existing, sentinel, nil) + + if merged["cloud.k8s.io/foo"] != "bar" { + t.Errorf("external key was modified: %v", merged) + } + if newSentinel != "a" { + t.Errorf("sentinel = %q", newSentinel) + } +} + +func TestMergeWithManaged_EmptyDesired_PrunesAllManaged(t *testing.T) { + existing := map[string]string{ + "a": "1", + "b": "2", + "cloud.k8s.io/foo": "bar", + } + sentinel := "a,b" + desired := map[string]string{} + + merged, newSentinel := mergeWithManaged(desired, existing, sentinel, nil) + + if _, ok := merged["a"]; ok { + t.Errorf("a not pruned: %v", merged) + } + if _, ok := merged["b"]; ok { + t.Errorf("b not pruned: %v", merged) + } + if merged["cloud.k8s.io/foo"] != "bar" { + t.Errorf("external key was modified: %v", merged) + } + if newSentinel != "" { + t.Errorf("sentinel = %q, want empty", newSentinel) + } +} + +func TestMergeWithManaged_SentinelIsSorted(t *testing.T) { + // Sorted sentinel keys are required for deterministic reconciliation. + existing := map[string]string{} + desired := map[string]string{"z": "1", "a": "2", "m": "3"} + + _, newSentinel := mergeWithManaged(desired, existing, "", nil) + + if newSentinel != "a,m,z" { + t.Errorf("sentinel = %q, want %q", newSentinel, "a,m,z") + } +} + +func TestMergeWithManaged_ProtectedKey_DroppedFromDesired(t *testing.T) { + // User tried to override a controller-managed label. + existing := map[string]string{ + LabelManagedBy: ManagedByValue, // already set by buildLabels + } + sentinel := "" + desired := map[string]string{ + LabelManagedBy: "evil-actor", // user trying to hijack + "team": "edge", // legitimate user label + } + protected := map[string]struct{}{LabelManagedBy: {}} + + merged, newSentinel := mergeWithManaged(desired, existing, sentinel, protected) + + if merged[LabelManagedBy] != ManagedByValue { + t.Errorf("protected key was overridden: %v", merged) + } + if merged["team"] != "edge" { + t.Errorf("legitimate key not applied: %v", merged) + } + // Sentinel must NOT contain the protected key — it was never managed via spec. + if newSentinel != "team" { + t.Errorf("sentinel = %q, want %q", newSentinel, "team") + } +} + +func TestMergeWithManaged_NilExistingMap(t *testing.T) { + // A freshly-built Service may have nil annotations/labels — must not panic. + desired := map[string]string{"a": "1"} + + merged, newSentinel := mergeWithManaged(desired, nil, "", nil) + + if merged["a"] != "1" { + t.Errorf("merged = %v", merged) + } + if newSentinel != "a" { + t.Errorf("sentinel = %q", newSentinel) + } +} From debc33ad8fc9e8d46651756fdd8699c579e689b8 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:03:21 +0200 Subject: [PATCH 06/17] test(controller): cover desired-wins and idempotency for mergeWithManaged --- internal/controller/service_resolver.go | 6 ++-- internal/controller/service_resolver_test.go | 38 ++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/internal/controller/service_resolver.go b/internal/controller/service_resolver.go index 3b0aae85..4defaef9 100644 --- a/internal/controller/service_resolver.go +++ b/internal/controller/service_resolver.go @@ -94,7 +94,7 @@ func resolveServiceConfig(gateway *gatewayv1.Gateway, params *gatewayparamsv1alp // itself (e.g. AnnotationManagedAnnotations) is NEVER recorded as managed — // it's operator metadata and is always present when the feature is in use. // -// existing may be nil; the function never mutates inputs. +// existing and protected may be nil; the function never mutates inputs. func mergeWithManaged(desired, existing map[string]string, sentinel string, protected map[string]struct{}) (map[string]string, string) { merged := make(map[string]string, len(existing)+len(desired)) for k, v := range existing { @@ -111,8 +111,8 @@ func mergeWithManaged(desired, existing map[string]string, sentinel string, prot } } - // Filter protected keys out of desired (silently — the resolver logs the - // collision before calling us, so we just enforce the policy here). + // Filter protected keys out of desired. Callers should log the collision + // before calling us; this function only enforces the policy. filtered := make(map[string]string, len(desired)) for k, v := range desired { if _, isProtected := protected[k]; isProtected { diff --git a/internal/controller/service_resolver_test.go b/internal/controller/service_resolver_test.go index c098ff23..0677f6fb 100644 --- a/internal/controller/service_resolver_test.go +++ b/internal/controller/service_resolver_test.go @@ -373,3 +373,41 @@ func TestMergeWithManaged_NilExistingMap(t *testing.T) { t.Errorf("sentinel = %q", newSentinel) } } + +func TestMergeWithManaged_DesiredValueWinsOverExisting(t *testing.T) { + // Pin the apply-order: desired must overwrite existing values for the + // same key. A regression that inverted this would silently leak stale + // annotations across reconciles. + existing := map[string]string{"a": "old-value"} + desired := map[string]string{"a": "new-value"} + + merged, _ := mergeWithManaged(desired, existing, "a", nil) + + if merged["a"] != "new-value" { + t.Errorf("merged[a] = %q, want new-value (desired must win)", merged["a"]) + } +} + +func TestMergeWithManaged_Idempotent(t *testing.T) { + // The function is called on every reconcile; feeding its output back as + // the next call's existing must yield identical results. Catches subtle + // bugs like sentinel keys leaking into the managed set or non-deterministic + // sentinel ordering. + desired := map[string]string{"x": "1", "y": "2"} + protected := map[string]struct{}{"forbidden": {}} + + merged1, sentinel1 := mergeWithManaged(desired, nil, "", protected) + merged2, sentinel2 := mergeWithManaged(desired, merged1, sentinel1, protected) + + if sentinel1 != sentinel2 { + t.Errorf("sentinel changed between calls: %q vs %q", sentinel1, sentinel2) + } + if len(merged1) != len(merged2) { + t.Errorf("merged map size changed: %d vs %d", len(merged1), len(merged2)) + } + for k, v := range merged1 { + if merged2[k] != v { + t.Errorf("merged[%q] = %q on second call, want %q", k, merged2[k], v) + } + } +} From fa69e68a05a11b06d71e0a6c8e8083bbd496fe6e Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:06:46 +0200 Subject: [PATCH 07/17] feat(controller): apply ResolvedServiceConfig in buildService Wire the ResolvedServiceConfig produced by resolveServiceConfig through buildService: service type, LoadBalancer knobs, ExternalTrafficPolicy, and user annotations/labels (via managed-keys sentinel). Selector is built from controller-managed labels only so user labels can never break pod selection. Update all five call sites (4 in resources_test, 1 in gateway_controller_test) and add a placeholder in reconcileResources (Task 6 replaces with real resolver invocation). Add controllerManagedLabelKeys helper and 5 new TestBuildService_* tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/gateway_controller.go | 2 +- .../controller/gateway_controller_test.go | 2 +- internal/controller/resources.go | 67 +++++++---- internal/controller/resources_test.go | 108 +++++++++++++++++- internal/controller/service_resolver.go | 10 ++ 5 files changed, 158 insertions(+), 31 deletions(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index fdee1d41..b513399e 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -258,7 +258,7 @@ func (r *GatewayReconciler) reconcileResources(ctx context.Context, gateway *gat r.buildServiceAccount(gateway), r.buildClusterRoleBinding(gateway), r.buildDeployment(gateway, effectiveImage, varnishdExtraArgs, logging, infraHash, extraVolumes, extraVolumeMounts, extraInitContainers, containerResources, topologySpread, hasBackendTLS), - r.buildService(gateway), + r.buildService(gateway, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}), ) // PDB is opt-in. Create one only if the user asked for it. if pdbSpec != nil { diff --git a/internal/controller/gateway_controller_test.go b/internal/controller/gateway_controller_test.go index 74a4869e..91e27ff9 100644 --- a/internal/controller/gateway_controller_test.go +++ b/internal/controller/gateway_controller_test.go @@ -332,7 +332,7 @@ func TestBuildService(t *testing.T) { }, } - svc := r.buildService(gateway) + svc := r.buildService(gateway, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}) if len(svc.Spec.Ports) != tc.expectedPorts { t.Errorf("expected %d ports, got %d", tc.expectedPorts, len(svc.Spec.Ports)) diff --git a/internal/controller/resources.go b/internal/controller/resources.go index ea3ac9c6..dad6a18f 100644 --- a/internal/controller/resources.go +++ b/internal/controller/resources.go @@ -555,14 +555,20 @@ func (r *GatewayReconciler) buildGatewayContainer(gateway *gatewayv1.Gateway, ef } } -// buildService creates the Service for the Gateway. -func (r *GatewayReconciler) buildService(gateway *gatewayv1.Gateway) *corev1.Service { - labels := r.buildLabels(gateway) +// buildService creates the Service for the Gateway. The resolved config +// supplies Type, LB knobs, traffic policy, plus user-supplied annotations +// and labels that are layered onto the controller-managed labels via the +// managed-keys sentinel. +func (r *GatewayReconciler) buildService(gateway *gatewayv1.Gateway, resolved ResolvedServiceConfig) *corev1.Service { + controllerLabels := r.buildLabels(gateway) + + // Selector mirrors the controller-managed labels — never the user-supplied set. + selector := make(map[string]string, len(controllerLabels)) + for k, v := range controllerLabels { + selector[k] = v + } // Map Gateway listeners to Service ports, deduplicating by port number. - // Multiple listeners can share the same port (differentiated by hostname), - // but a Service only needs one entry per unique port. - // Container ports = listener ports (no translation). var ports []corev1.ServicePort seenPorts := make(map[int32]bool) for i := range gateway.Spec.Listeners { @@ -579,35 +585,46 @@ func (r *GatewayReconciler) buildService(gateway *gatewayv1.Gateway) *corev1.Ser Protocol: corev1.ProtocolTCP, }) } - - // Default to http-80 on port 80 if no listeners if len(ports) == 0 { ports = []corev1.ServicePort{ - { - Name: "http-80", - Port: 80, - TargetPort: intstr.FromInt(80), - Protocol: corev1.ProtocolTCP, - }, + {Name: "http-80", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}, } } - return &corev1.Service{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "v1", - Kind: "Service", - }, + // Merge user labels onto controller labels via the sentinel. Existing + // state is the controller-label baseline; protected keys can't be + // overridden by user input. + mergedLabels, labelSentinel := mergeWithManaged(resolved.Labels, controllerLabels, "", controllerManagedLabelKeys()) + mergedLabels[AnnotationManagedLabels] = labelSentinel + + mergedAnnotations, annoSentinel := mergeWithManaged(resolved.Annotations, nil, "", nil) + mergedAnnotations[AnnotationManagedAnnotations] = annoSentinel + + svc := &corev1.Service{ + TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Service"}, ObjectMeta: metav1.ObjectMeta{ - Name: gateway.Name, - Namespace: gateway.Namespace, - Labels: labels, + Name: gateway.Name, + Namespace: gateway.Namespace, + Labels: mergedLabels, + Annotations: mergedAnnotations, }, Spec: corev1.ServiceSpec{ - Type: corev1.ServiceTypeLoadBalancer, - Selector: labels, - Ports: ports, + Type: resolved.Type, + Selector: selector, + Ports: ports, + LoadBalancerSourceRanges: resolved.LoadBalancerSourceRanges, }, } + + if resolved.LoadBalancerClass != nil { + lb := *resolved.LoadBalancerClass + svc.Spec.LoadBalancerClass = &lb + } + if resolved.ExternalTrafficPolicy != nil { + svc.Spec.ExternalTrafficPolicy = *resolved.ExternalTrafficPolicy + } + + return svc } // buildPodDisruptionBudget creates a PodDisruptionBudget matching the Gateway's diff --git a/internal/controller/resources_test.go b/internal/controller/resources_test.go index 81774d52..8e7f4a8e 100644 --- a/internal/controller/resources_test.go +++ b/internal/controller/resources_test.go @@ -254,7 +254,7 @@ func TestBuildService_SingleListener(t *testing.T) { r := testReconcilerSimple() gw := testGateway("my-gw", "default") - svc := r.buildService(gw) + svc := r.buildService(gw, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}) if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { t.Errorf("type = %v, want LoadBalancer", svc.Spec.Type) @@ -277,7 +277,7 @@ func TestBuildService_MultipleListeners(t *testing.T) { gatewayv1.Listener{Name: "https", Port: 443, Protocol: gatewayv1.HTTPSProtocolType}, ) - svc := r.buildService(gw) + svc := r.buildService(gw, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}) if len(svc.Spec.Ports) != 2 { t.Fatalf("expected 2 ports, got %d", len(svc.Spec.Ports)) @@ -291,7 +291,7 @@ func TestBuildService_DeduplicatesPorts(t *testing.T) { gatewayv1.Listener{Name: "web2", Port: 80, Protocol: gatewayv1.HTTPProtocolType, Hostname: new(gatewayv1.Hostname("b.example.com"))}, ) - svc := r.buildService(gw) + svc := r.buildService(gw, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}) if len(svc.Spec.Ports) != 1 { t.Errorf("expected 1 deduplicated port, got %d", len(svc.Spec.Ports)) @@ -305,7 +305,7 @@ func TestBuildService_NoListenersFallback(t *testing.T) { Spec: gatewayv1.GatewaySpec{GatewayClassName: "varnish"}, } - svc := r.buildService(gw) + svc := r.buildService(gw, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}) if len(svc.Spec.Ports) != 1 { t.Fatalf("expected 1 fallback port, got %d", len(svc.Spec.Ports)) @@ -315,6 +315,106 @@ func TestBuildService_NoListenersFallback(t *testing.T) { } } +func TestBuildService_AppliesResolvedType(t *testing.T) { + r := testReconcilerSimple() + gw := testGateway("my-gw", "default") + + svc := r.buildService(gw, ResolvedServiceConfig{ + Type: corev1.ServiceTypeClusterIP, + Annotations: map[string]string{}, + Labels: map[string]string{}, + }) + + if svc.Spec.Type != corev1.ServiceTypeClusterIP { + t.Errorf("Type = %v, want ClusterIP", svc.Spec.Type) + } +} + +func TestBuildService_AppliesLoadBalancerFields(t *testing.T) { + r := testReconcilerSimple() + gw := testGateway("my-gw", "default") + etp := corev1.ServiceExternalTrafficPolicyLocal + lbClass := "service.k8s.io/example" + + svc := r.buildService(gw, ResolvedServiceConfig{ + Type: corev1.ServiceTypeLoadBalancer, + Annotations: map[string]string{}, + Labels: map[string]string{}, + LoadBalancerClass: &lbClass, + LoadBalancerSourceRanges: []string{"10.0.0.0/8", "192.168.0.0/16"}, + ExternalTrafficPolicy: &etp, + }) + + if svc.Spec.LoadBalancerClass == nil || *svc.Spec.LoadBalancerClass != lbClass { + t.Errorf("LoadBalancerClass = %v", svc.Spec.LoadBalancerClass) + } + if len(svc.Spec.LoadBalancerSourceRanges) != 2 { + t.Errorf("LoadBalancerSourceRanges = %v", svc.Spec.LoadBalancerSourceRanges) + } + if svc.Spec.ExternalTrafficPolicy != etp { + t.Errorf("ExternalTrafficPolicy = %v, want %v", svc.Spec.ExternalTrafficPolicy, etp) + } +} + +func TestBuildService_AppliesAnnotationsWithSentinel(t *testing.T) { + r := testReconcilerSimple() + gw := testGateway("my-gw", "default") + + svc := r.buildService(gw, ResolvedServiceConfig{ + Type: corev1.ServiceTypeLoadBalancer, + Annotations: map[string]string{"metallb.universe.tf/loadBalancerIPs": "10.0.0.1"}, + Labels: map[string]string{}, + }) + + if svc.Annotations["metallb.universe.tf/loadBalancerIPs"] != "10.0.0.1" { + t.Errorf("annotation not applied: %v", svc.Annotations) + } + if svc.Annotations[AnnotationManagedAnnotations] != "metallb.universe.tf/loadBalancerIPs" { + t.Errorf("sentinel = %q", svc.Annotations[AnnotationManagedAnnotations]) + } +} + +func TestBuildService_AppliesLabelsWithSentinel_ProtectsControllerLabels(t *testing.T) { + r := testReconcilerSimple() + gw := testGateway("my-gw", "default") + + svc := r.buildService(gw, ResolvedServiceConfig{ + Type: corev1.ServiceTypeLoadBalancer, + Annotations: map[string]string{}, + Labels: map[string]string{ + LabelManagedBy: "evil", // protected — must be ignored + "team": "edge", // applied + }, + }) + + if svc.Labels[LabelManagedBy] != ManagedByValue { + t.Errorf("protected label was overridden: %q", svc.Labels[LabelManagedBy]) + } + if svc.Labels["team"] != "edge" { + t.Errorf("user label = %q, want edge", svc.Labels["team"]) + } + if svc.Labels[AnnotationManagedLabels] != "team" { + t.Errorf("sentinel = %q, want team", svc.Labels[AnnotationManagedLabels]) + } +} + +func TestBuildService_NoUserAnnotations_NoSentinel(t *testing.T) { + r := testReconcilerSimple() + gw := testGateway("my-gw", "default") + + svc := r.buildService(gw, ResolvedServiceConfig{ + Type: corev1.ServiceTypeLoadBalancer, + Annotations: map[string]string{}, + Labels: map[string]string{}, + }) + + // When no user-supplied annotations exist, the sentinel still gets + // written but is empty (consistent with explicit "manages nothing"). + if v, ok := svc.Annotations[AnnotationManagedAnnotations]; ok && v != "" { + t.Errorf("sentinel = %q, want empty or absent", v) + } +} + // --- buildVolumes --- func TestBuildVolumes_HTTPOnly(t *testing.T) { diff --git a/internal/controller/service_resolver.go b/internal/controller/service_resolver.go index 4defaef9..c76de2e3 100644 --- a/internal/controller/service_resolver.go +++ b/internal/controller/service_resolver.go @@ -82,6 +82,16 @@ func resolveServiceConfig(gateway *gatewayv1.Gateway, params *gatewayparamsv1alp return out } +// controllerManagedLabelKeys returns the label keys that buildLabels always +// sets on resources. Users cannot override these via ServiceConfig.Labels. +func controllerManagedLabelKeys() map[string]struct{} { + return map[string]struct{}{ + LabelManagedBy: {}, + LabelGatewayName: {}, + LabelGatewayNamespace: {}, + } +} + // mergeWithManaged writes desired keys onto a copy of existing, prunes any // previously-managed keys (per sentinel) that are no longer desired, and // returns the merged map plus a fresh sentinel string listing the keys the From bfda2a188d6200f9e6c3ca64eb8740eb8c336a5e Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:11:10 +0200 Subject: [PATCH 08/17] test(controller): pin selector invariant and tighten sentinel test --- internal/controller/gateway_controller.go | 4 +++ internal/controller/resources.go | 3 +++ internal/controller/resources_test.go | 30 +++++++++++++++++++---- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index b513399e..2c997662 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -258,6 +258,10 @@ func (r *GatewayReconciler) reconcileResources(ctx context.Context, gateway *gat r.buildServiceAccount(gateway), r.buildClusterRoleBinding(gateway), r.buildDeployment(gateway, effectiveImage, varnishdExtraArgs, logging, infraHash, extraVolumes, extraVolumeMounts, extraInitContainers, containerResources, topologySpread, hasBackendTLS), + // TODO(#70 Task 6): replace this hardcoded default with + // resolveServiceConfig(gateway, params) once params is hoisted into + // scope. Until then, the reconcile path produces the same LoadBalancer + // Service it always did — backwards compatible. r.buildService(gateway, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}), ) // PDB is opt-in. Create one only if the user asked for it. diff --git a/internal/controller/resources.go b/internal/controller/resources.go index dad6a18f..6765356d 100644 --- a/internal/controller/resources.go +++ b/internal/controller/resources.go @@ -569,6 +569,9 @@ func (r *GatewayReconciler) buildService(gateway *gatewayv1.Gateway, resolved Re } // Map Gateway listeners to Service ports, deduplicating by port number. + // Multiple listeners can share the same port (differentiated by hostname), + // but a Service only needs one entry per unique port. Container ports + // equal listener ports — no translation. var ports []corev1.ServicePort seenPorts := make(map[int32]bool) for i := range gateway.Spec.Listeners { diff --git a/internal/controller/resources_test.go b/internal/controller/resources_test.go index 8e7f4a8e..2d6316a3 100644 --- a/internal/controller/resources_test.go +++ b/internal/controller/resources_test.go @@ -396,9 +396,17 @@ func TestBuildService_AppliesLabelsWithSentinel_ProtectsControllerLabels(t *test if svc.Labels[AnnotationManagedLabels] != "team" { t.Errorf("sentinel = %q, want team", svc.Labels[AnnotationManagedLabels]) } + // CRITICAL invariant: user labels MUST NOT leak into the selector. + // If they did, the Service would fail to find any pods. + if _, present := svc.Spec.Selector["team"]; present { + t.Error("user label leaked into Spec.Selector — pod selection will break") + } + if svc.Spec.Selector[LabelManagedBy] != ManagedByValue { + t.Errorf("controller label missing from selector: %v", svc.Spec.Selector) + } } -func TestBuildService_NoUserAnnotations_NoSentinel(t *testing.T) { +func TestBuildService_EmptyUserMaps_WritesEmptySentinel(t *testing.T) { r := testReconcilerSimple() gw := testGateway("my-gw", "default") @@ -408,10 +416,22 @@ func TestBuildService_NoUserAnnotations_NoSentinel(t *testing.T) { Labels: map[string]string{}, }) - // When no user-supplied annotations exist, the sentinel still gets - // written but is empty (consistent with explicit "manages nothing"). - if v, ok := svc.Annotations[AnnotationManagedAnnotations]; ok && v != "" { - t.Errorf("sentinel = %q, want empty or absent", v) + // When no user-supplied annotations/labels exist, the sentinel keys + // are still written with empty values, signalling "this Service was + // touched by this version of the operator, currently managing nothing." + v, ok := svc.Annotations[AnnotationManagedAnnotations] + if !ok { + t.Error("AnnotationManagedAnnotations sentinel missing") + } + if v != "" { + t.Errorf("annotation sentinel = %q, want empty string", v) + } + lv, lok := svc.Labels[AnnotationManagedLabels] + if !lok { + t.Error("AnnotationManagedLabels sentinel missing") + } + if lv != "" { + t.Errorf("label sentinel = %q, want empty string", lv) } } From bce2fb40158adff64e268d2b52c67cd306d0c794 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:15:07 +0200 Subject: [PATCH 09/17] feat(controller): extend needsServiceUpdate and update path for Service shape Extend needsServiceUpdate to compare Type, LoadBalancerClass, LoadBalancerSourceRanges, ExternalTrafficPolicy, and managed annotations/labels (via sentinel) in addition to ports. Add stringPtrEqual, stringSliceEqual, and managedMapEqual helpers. Replace the Service update branch in reconcileResource to apply all operator-owned fields when drift is detected, using desiredAnnotationsFromSentinel, desiredLabelsFromSentinel, and buildLabelsForService helpers to preserve cloud-controller-managed state during updates. Co-Authored-By: Claude Sonnet 4.6 --- internal/controller/gateway_controller.go | 181 +++++++++++++++++- .../controller/gateway_controller_test.go | 122 ++++++++++++ 2 files changed, 299 insertions(+), 4 deletions(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index 2c997662..c0b6c78f 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -383,15 +383,50 @@ func (r *GatewayReconciler) reconcileResource(ctx context.Context, gateway *gate } } - // For Services, update if ports changed (e.g., adding HTTPS listener) + // For Services, re-apply the fields we own when drift is detected. + // Cloud-controller-managed fields (clusterIP, NodePort assignments, + // loadBalancer.ingress) and non-managed annotations/labels are deliberately + // left alone. K8s API server handles Type transitions natively. if desiredSvc, ok := desired.(*corev1.Service); ok { existingSvc := existing.(*corev1.Service) if needsServiceUpdate(existingSvc, desiredSvc) { existingSvc.Spec.Ports = desiredSvc.Spec.Ports + existingSvc.Spec.Type = desiredSvc.Spec.Type + existingSvc.Spec.LoadBalancerClass = desiredSvc.Spec.LoadBalancerClass + existingSvc.Spec.LoadBalancerSourceRanges = desiredSvc.Spec.LoadBalancerSourceRanges + existingSvc.Spec.ExternalTrafficPolicy = desiredSvc.Spec.ExternalTrafficPolicy + + // Re-merge annotations/labels using the operator's sentinels. + // Existing state may contain cloud-controller annotations we + // must preserve; mergeWithManaged handles the bookkeeping. + existingAnnoSentinel := "" + if existingSvc.Annotations != nil { + existingAnnoSentinel = existingSvc.Annotations[AnnotationManagedAnnotations] + } + desiredAnnos := desiredAnnotationsFromSentinel(desiredSvc) + mergedAnnos, newAnnoSentinel := mergeWithManaged(desiredAnnos, existingSvc.Annotations, existingAnnoSentinel, nil) + mergedAnnos[AnnotationManagedAnnotations] = newAnnoSentinel + existingSvc.Annotations = mergedAnnos + + existingLabelSentinel := "" + if existingSvc.Labels != nil { + existingLabelSentinel = existingSvc.Labels[AnnotationManagedLabels] + } + desiredLabels := desiredLabelsFromSentinel(desiredSvc) + mergedLabels, newLabelSentinel := mergeWithManaged(desiredLabels, existingSvc.Labels, existingLabelSentinel, controllerManagedLabelKeys()) + mergedLabels[AnnotationManagedLabels] = newLabelSentinel + // Re-apply controller-managed labels unconditionally (mergeWithManaged + // drops them from desired; we put them back here to defend against + // existing being missing them on a hand-edited Service). + for k, v := range r.buildLabelsForService(desiredSvc) { + mergedLabels[k] = v + } + existingSvc.Labels = mergedLabels + if err := r.Update(ctx, existingSvc); err != nil { return fmt.Errorf("r.Update(%s): %w", desired.GetName(), err) } - r.Logger.Info("updated service ports", + r.Logger.Info("updated service", "name", desired.GetName()) } return nil @@ -464,9 +499,26 @@ func needsDeploymentUpdate(existing, desired *appsv1.Deployment) (bool, string) return false, "" } -// needsServiceUpdate checks if the Service ports need to be updated. -// Compares only the fields we control (name, port, targetPort, protocol). +// needsServiceUpdate checks whether the Service needs to be re-applied. +// Compares the fields the operator owns: ports, type, LB knobs, traffic +// policy, and the managed subset of annotations/labels (per sentinel). +// Cloud-controller-managed fields (clusterIP, NodePort, loadBalancer.ingress) +// and annotations/labels outside the managed sentinel are deliberately +// ignored. func needsServiceUpdate(existing, desired *corev1.Service) bool { + if existing.Spec.Type != desired.Spec.Type { + return true + } + if !stringPtrEqual(existing.Spec.LoadBalancerClass, desired.Spec.LoadBalancerClass) { + return true + } + if !stringSliceEqual(existing.Spec.LoadBalancerSourceRanges, desired.Spec.LoadBalancerSourceRanges) { + return true + } + if existing.Spec.ExternalTrafficPolicy != desired.Spec.ExternalTrafficPolicy { + return true + } + if len(existing.Spec.Ports) != len(desired.Spec.Ports) { return true } @@ -483,9 +535,71 @@ func needsServiceUpdate(existing, desired *corev1.Service) bool { return true } } + + if !managedMapEqual(existing.Annotations, desired.Annotations, AnnotationManagedAnnotations) { + return true + } + if !managedMapEqual(existing.Labels, desired.Labels, AnnotationManagedLabels) { + return true + } + return false } +// stringPtrEqual returns true if a and b point to equal strings, treating +// nil == nil as equal. +func stringPtrEqual(a, b *string) bool { + if a == nil || b == nil { + return a == b + } + return *a == *b +} + +// stringSliceEqual reports whether two string slices have equal length and +// element-wise equal contents (order-sensitive — Kubernetes preserves +// LoadBalancerSourceRanges order). +func stringSliceEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + +// managedMapEqual compares only the keys listed in the sentinel annotation +// (on the desired object). External keys not in the sentinel are ignored. +// If existing's sentinel differs from desired's sentinel, that itself is a +// drift signal. +func managedMapEqual(existing, desired map[string]string, sentinelKey string) bool { + desiredSentinel := "" + if desired != nil { + desiredSentinel = desired[sentinelKey] + } + existingSentinel := "" + if existing != nil { + existingSentinel = existing[sentinelKey] + } + if existingSentinel != desiredSentinel { + return false + } + if desiredSentinel == "" { + return true + } + for _, k := range strings.Split(desiredSentinel, ",") { + if k == "" { + continue + } + if existing[k] != desired[k] { + return false + } + } + return true +} + // needsPDBUpdate checks if the PodDisruptionBudget spec needs to be updated. // Compares the two fields the operator owns: MinAvailable/MaxUnavailable and // the selector. Any other fields are left alone. @@ -1728,3 +1842,62 @@ func (r *GatewayReconciler) SetupWithManager(mgr ctrl.Manager) error { func ptr[T any](v T) *T { return &v } + +// desiredAnnotationsFromSentinel reconstructs the user-supplied annotation +// map by reading the sentinel on the desired Service. The Service object +// generated by buildService contains the merged map; we extract just the +// keys this operator manages so update-path merge logic stays symmetric +// with the create path. +func desiredAnnotationsFromSentinel(svc *corev1.Service) map[string]string { + out := map[string]string{} + if svc.Annotations == nil { + return out + } + sentinel := svc.Annotations[AnnotationManagedAnnotations] + if sentinel == "" { + return out + } + for _, k := range strings.Split(sentinel, ",") { + if k == "" { + continue + } + if v, ok := svc.Annotations[k]; ok { + out[k] = v + } + } + return out +} + +// desiredLabelsFromSentinel is the label counterpart to desiredAnnotationsFromSentinel. +func desiredLabelsFromSentinel(svc *corev1.Service) map[string]string { + out := map[string]string{} + if svc.Labels == nil { + return out + } + sentinel := svc.Labels[AnnotationManagedLabels] + if sentinel == "" { + return out + } + for _, k := range strings.Split(sentinel, ",") { + if k == "" { + continue + } + if v, ok := svc.Labels[k]; ok { + out[k] = v + } + } + return out +} + +// buildLabelsForService returns the controller-managed label set for a +// Service. We can't call r.buildLabels(gateway) on the reconcile-time +// existing Service because we don't have the Gateway in scope here — but +// the labels are derived solely from the Service's own metadata. +func (r *GatewayReconciler) buildLabelsForService(svc *corev1.Service) map[string]string { + // The Service's name and namespace match the Gateway's. Mirror buildLabels. + return map[string]string{ + LabelManagedBy: ManagedByValue, + LabelGatewayName: svc.Name, + LabelGatewayNamespace: svc.Namespace, + } +} diff --git a/internal/controller/gateway_controller_test.go b/internal/controller/gateway_controller_test.go index 91e27ff9..f0e5cb7e 100644 --- a/internal/controller/gateway_controller_test.go +++ b/internal/controller/gateway_controller_test.go @@ -1460,6 +1460,126 @@ func TestNeedsServiceUpdate(t *testing.T) { }, expectUpdate: false, }, + { + name: "type changed", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeClusterIP, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "loadBalancerClass changed", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + LoadBalancerClass: ptrString("old"), + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + LoadBalancerClass: ptrString("new"), + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "loadBalancerSourceRanges changed", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + LoadBalancerSourceRanges: []string{"10.0.0.0/8"}, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + LoadBalancerSourceRanges: []string{"10.0.0.0/8", "192.168.0.0/16"}, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "externalTrafficPolicy changed", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyCluster, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyLocal, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "managed annotation added", + existing: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{AnnotationManagedAnnotations: ""}, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "metallb.universe.tf/loadBalancerIPs": "10.0.0.1", + AnnotationManagedAnnotations: "metallb.universe.tf/loadBalancerIPs", + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "non-managed annotation present on existing does NOT trigger update", + existing: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "cloud-controller-added": "yes", + AnnotationManagedAnnotations: "", + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{AnnotationManagedAnnotations: ""}, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: false, + }, } for _, tc := range tests { @@ -3203,4 +3323,6 @@ func (q *fakeQueue) AddRateLimited(item reconcile.Request) {} func (q *fakeQueue) Forget(item reconcile.Request) {} func (q *fakeQueue) NumRequeues(item reconcile.Request) int { return 0 } +func ptrString(s string) *string { return &s } + From 1b562002eaae98cfe5ae8fe50694978c29c52b14 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:21:44 +0200 Subject: [PATCH 10/17] fix(controller): re-apply Spec.Selector on Service drift, dedupe ptr helper --- internal/controller/gateway_controller.go | 22 +++++ .../controller/gateway_controller_test.go | 85 +++++++++++++++++-- 2 files changed, 101 insertions(+), 6 deletions(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index c0b6c78f..b7b43e2a 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -392,6 +392,7 @@ func (r *GatewayReconciler) reconcileResource(ctx context.Context, gateway *gate if needsServiceUpdate(existingSvc, desiredSvc) { existingSvc.Spec.Ports = desiredSvc.Spec.Ports existingSvc.Spec.Type = desiredSvc.Spec.Type + existingSvc.Spec.Selector = desiredSvc.Spec.Selector existingSvc.Spec.LoadBalancerClass = desiredSvc.Spec.LoadBalancerClass existingSvc.Spec.LoadBalancerSourceRanges = desiredSvc.Spec.LoadBalancerSourceRanges existingSvc.Spec.ExternalTrafficPolicy = desiredSvc.Spec.ExternalTrafficPolicy @@ -519,6 +520,13 @@ func needsServiceUpdate(existing, desired *corev1.Service) bool { return true } + // Selector drift would silently break pod selection. The selector is + // derived from buildLabels, so this only matters if the controller's + // label constants change between operator versions — defensive check. + if !stringMapEqual(existing.Spec.Selector, desired.Spec.Selector) { + return true + } + if len(existing.Spec.Ports) != len(desired.Spec.Ports) { return true } @@ -555,6 +563,20 @@ func stringPtrEqual(a, b *string) bool { return *a == *b } +// stringMapEqual reports whether two string maps have identical keys and values. +// Treats nil and empty maps as equal. +func stringMapEqual(a, b map[string]string) bool { + if len(a) != len(b) { + return false + } + for k, v := range a { + if bv, ok := b[k]; !ok || bv != v { + return false + } + } + return true +} + // stringSliceEqual reports whether two string slices have equal length and // element-wise equal contents (order-sensitive — Kubernetes preserves // LoadBalancerSourceRanges order). diff --git a/internal/controller/gateway_controller_test.go b/internal/controller/gateway_controller_test.go index f0e5cb7e..17355f81 100644 --- a/internal/controller/gateway_controller_test.go +++ b/internal/controller/gateway_controller_test.go @@ -1481,14 +1481,14 @@ func TestNeedsServiceUpdate(t *testing.T) { existing: &corev1.Service{ Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeLoadBalancer, - LoadBalancerClass: ptrString("old"), + LoadBalancerClass: ptr("old"), Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, }, }, desired: &corev1.Service{ Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeLoadBalancer, - LoadBalancerClass: ptrString("new"), + LoadBalancerClass: ptr("new"), Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, }, }, @@ -1580,6 +1580,83 @@ func TestNeedsServiceUpdate(t *testing.T) { }, expectUpdate: false, }, + { + name: "loadBalancerClass added (nil -> non-nil)", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + LoadBalancerClass: ptr("new"), + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "loadBalancerClass removed (non-nil -> nil)", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + LoadBalancerClass: ptr("old"), + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "managed annotation removed", + existing: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + "metallb.universe.tf/loadBalancerIPs": "10.0.0.1", + AnnotationManagedAnnotations: "metallb.universe.tf/loadBalancerIPs", + }, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{AnnotationManagedAnnotations: ""}, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, + { + name: "selector changed", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Selector: map[string]string{LabelGatewayName: "old-gw"}, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + Selector: map[string]string{LabelGatewayName: "new-gw"}, + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: true, + }, } for _, tc := range tests { @@ -3322,7 +3399,3 @@ func (q *fakeQueue) AddAfter(item reconcile.Request, duration time.Duration) {} func (q *fakeQueue) AddRateLimited(item reconcile.Request) {} func (q *fakeQueue) Forget(item reconcile.Request) {} func (q *fakeQueue) NumRequeues(item reconcile.Request) int { return 0 } - -func ptrString(s string) *string { return &s } - - From bfbd3dadaf154d5f79f8c9185c896744e5bbc52e Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:23:58 +0200 Subject: [PATCH 11/17] feat(controller): wire resolveServiceConfig into reconcileResources Hoist params out of the short-variable-declaration if-block so it is in scope at the buildService call site, then replace the hardcoded LoadBalancer default with resolveServiceConfig(gateway, params). The resolver is nil-safe, so gateways without GatewayClassParameters continue to produce an identical LoadBalancer Service. Add TestInfraHash_ServiceConfigDoesNotAffectHash to pin the design invariant that Service shape (Type, annotations, labels, LB knobs) is deliberately excluded from InfrastructureConfig and must never trigger pod restarts. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/controller/gateway_controller.go | 9 ++---- internal/controller/infra_hash_test.go | 35 +++++++++++++++++++++++ 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index b7b43e2a..0bfdcb99 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -193,7 +193,8 @@ func (r *GatewayReconciler) reconcileResources(ctx context.Context, gateway *gat var pdbSpec *gatewayparamsv1alpha1.PodDisruptionBudget var topologySpread []corev1.TopologySpreadConstraint var imageOverride string - if params := r.getGatewayClassParameters(ctx, gateway); params != nil { + params := r.getGatewayClassParameters(ctx, gateway) + if params != nil { imageOverride = params.Spec.Image varnishdExtraArgs = params.Spec.VarnishdExtraArgs logging = params.Spec.Logging @@ -258,11 +259,7 @@ func (r *GatewayReconciler) reconcileResources(ctx context.Context, gateway *gat r.buildServiceAccount(gateway), r.buildClusterRoleBinding(gateway), r.buildDeployment(gateway, effectiveImage, varnishdExtraArgs, logging, infraHash, extraVolumes, extraVolumeMounts, extraInitContainers, containerResources, topologySpread, hasBackendTLS), - // TODO(#70 Task 6): replace this hardcoded default with - // resolveServiceConfig(gateway, params) once params is hoisted into - // scope. Until then, the reconcile path produces the same LoadBalancer - // Service it always did — backwards compatible. - r.buildService(gateway, ResolvedServiceConfig{Type: corev1.ServiceTypeLoadBalancer, Annotations: map[string]string{}, Labels: map[string]string{}}), + r.buildService(gateway, resolveServiceConfig(gateway, params)), ) // PDB is opt-in. Create one only if the user asked for it. if pdbSpec != nil { diff --git a/internal/controller/infra_hash_test.go b/internal/controller/infra_hash_test.go index 588e28ad..d17ca12a 100644 --- a/internal/controller/infra_hash_test.go +++ b/internal/controller/infra_hash_test.go @@ -1,6 +1,7 @@ package controller import ( + "reflect" "testing" corev1 "k8s.io/api/core/v1" @@ -248,3 +249,37 @@ func TestInfrastructureConfig_SecretOrderDoesNotAffectHash(t *testing.T) { t.Errorf("ComputeHash() affected by secret order: %s != %s", hash1, hash2) } } + +// TestInfraHash_ServiceConfigDoesNotAffectHash is a regression check: changing +// Service shape (Type, annotations, labels, LB knobs) must NOT cause a pod +// restart, so the infra hash must be invariant under those changes. This +// matches the design decision documented in +// docs/superpowers/specs/2026-05-25-configurable-service-shape-design.md. +func TestInfraHash_ServiceConfigDoesNotAffectHash(t *testing.T) { + cfg := InfrastructureConfig{ + GatewayImage: "ghcr.io/varnish/gateway-chaperone:v0.21.4", + VarnishdExtraArgs: []string{"-p", "thread_pools=4"}, + ListenerSpecs: "http-80", + } + + hash1 := cfg.ComputeHash() + hash2 := cfg.ComputeHash() + if hash1 != hash2 { + t.Fatalf("hash not deterministic: %q vs %q", hash1, hash2) + } + + // Verify the InfrastructureConfig struct does not have any Service-shape + // field. This test will fail to compile if someone adds one, prompting + // them to revisit the design. + // + // The check is intentionally minimal — we don't want to assert the full + // field list, which would couple this test tightly to unrelated fields. + // The presence/absence assertion below covers the design invariant. + configType := reflect.TypeOf(cfg) + forbidden := []string{"Service", "ServiceType", "ServiceAnnotations", "ServiceLabels"} + for _, f := range forbidden { + if _, ok := configType.FieldByName(f); ok { + t.Errorf("InfrastructureConfig must not contain field %q — Service shape changes must not trigger pod restarts", f) + } + } +} From 355159699753af933bc5235b9dfd3509b426e45b Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:26:52 +0200 Subject: [PATCH 12/17] test(controller): broaden infra-hash regression test forbidden-field list --- internal/controller/infra_hash_test.go | 39 +++++++++++++------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/internal/controller/infra_hash_test.go b/internal/controller/infra_hash_test.go index d17ca12a..3fd85f5d 100644 --- a/internal/controller/infra_hash_test.go +++ b/internal/controller/infra_hash_test.go @@ -252,31 +252,30 @@ func TestInfrastructureConfig_SecretOrderDoesNotAffectHash(t *testing.T) { // TestInfraHash_ServiceConfigDoesNotAffectHash is a regression check: changing // Service shape (Type, annotations, labels, LB knobs) must NOT cause a pod -// restart, so the infra hash must be invariant under those changes. This +// restart, so InfrastructureConfig must not gain Service-related fields. This // matches the design decision documented in // docs/superpowers/specs/2026-05-25-configurable-service-shape-design.md. +// +// The check is structural (via reflection) because behavioral verification +// would require enumerating every possible Service-shape field; structural +// absence of the fields is a stronger, simpler guard. func TestInfraHash_ServiceConfigDoesNotAffectHash(t *testing.T) { - cfg := InfrastructureConfig{ - GatewayImage: "ghcr.io/varnish/gateway-chaperone:v0.21.4", - VarnishdExtraArgs: []string{"-p", "thread_pools=4"}, - ListenerSpecs: "http-80", - } + cfg := InfrastructureConfig{} + configType := reflect.TypeOf(cfg) - hash1 := cfg.ComputeHash() - hash2 := cfg.ComputeHash() - if hash1 != hash2 { - t.Fatalf("hash not deterministic: %q vs %q", hash1, hash2) + // Forbid both generic Service-shape names and the concrete field names + // from ResolvedServiceConfig — these are the obvious places someone might + // accidentally copy a field across. + forbidden := []string{ + "Service", + "ServiceType", + "ServiceConfig", + "ServiceAnnotations", + "ServiceLabels", + "LoadBalancerClass", + "LoadBalancerSourceRanges", + "ExternalTrafficPolicy", } - - // Verify the InfrastructureConfig struct does not have any Service-shape - // field. This test will fail to compile if someone adds one, prompting - // them to revisit the design. - // - // The check is intentionally minimal — we don't want to assert the full - // field list, which would couple this test tightly to unrelated fields. - // The presence/absence assertion below covers the design invariant. - configType := reflect.TypeOf(cfg) - forbidden := []string{"Service", "ServiceType", "ServiceAnnotations", "ServiceLabels"} for _, f := range forbidden { if _, ok := configType.FieldByName(f); ok { t.Errorf("InfrastructureConfig must not contain field %q — Service shape changes must not trigger pod restarts", f) From fa4f7dac1d45fa5c63bea131f003f7ad84168d4e Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:30:29 +0200 Subject: [PATCH 13/17] test(controller): envtest scenarios for configurable Service shape Co-Authored-By: Claude Sonnet 4.6 --- .../gateway_controller_envtest_test.go | 229 ++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/internal/controller/gateway_controller_envtest_test.go b/internal/controller/gateway_controller_envtest_test.go index 2c54e96d..c75a4898 100644 --- a/internal/controller/gateway_controller_envtest_test.go +++ b/internal/controller/gateway_controller_envtest_test.go @@ -16,6 +16,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" gatewayv1 "sigs.k8s.io/gateway-api/apis/v1" + + gatewayparamsv1alpha1 "github.com/varnish/gateway/api/v1alpha1" ) var testEnv *EnvtestEnvironment @@ -314,3 +316,230 @@ func TestHTTPSListener_MissingSecret_ProgrammedFalse(t *testing.T) { _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace("default")) _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace("default")) } + +// TestServiceShape_DefaultsToLoadBalancer_Envtest verifies that a Gateway +// with no GatewayClassParameters.spec.service config still gets a +// Type: LoadBalancer Service (backwards compatibility). +func TestServiceShape_DefaultsToLoadBalancer_Envtest(t *testing.T) { + ctx := context.Background() + + gatewayClass := &gatewayv1.GatewayClass{ + ObjectMeta: metav1.ObjectMeta{Name: "varnish-svc-default"}, + Spec: gatewayv1.GatewayClassSpec{ + ControllerName: gatewayv1.GatewayController(ControllerName), + }, + } + if err := testEnv.Client.Create(ctx, gatewayClass); err != nil { + t.Fatalf("create gatewayclass: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gatewayClass) }() + + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-default-gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + GatewayClassName: "varnish-svc-default", + Listeners: []gatewayv1.Listener{{Name: "http", Port: 80, Protocol: gatewayv1.HTTPProtocolType}}, + }, + } + if err := testEnv.Client.Create(ctx, gw); err != nil { + t.Fatalf("create gateway: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gw) }() + + r := NewEnvtestGatewayReconciler(testEnv) + if _, err := r.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}}); err != nil { + t.Fatalf("reconcile: %v", err) + } + time.Sleep(200 * time.Millisecond) + + var svc corev1.Service + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}, &svc); err != nil { + t.Fatalf("get service: %v", err) + } + if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { + t.Errorf("Type = %v, want LoadBalancer", svc.Spec.Type) + } +} + +// TestServiceShape_TypeTransition_Envtest verifies LoadBalancer -> ClusterIP +// transitions persist correctly and don't lose other Service fields. +func TestServiceShape_TypeTransition_Envtest(t *testing.T) { + ctx := context.Background() + + gatewayClass := &gatewayv1.GatewayClass{ + ObjectMeta: metav1.ObjectMeta{Name: "varnish-svc-transition"}, + Spec: gatewayv1.GatewayClassSpec{ + ControllerName: gatewayv1.GatewayController(ControllerName), + ParametersRef: &gatewayv1.ParametersReference{ + Group: gatewayv1.Group(gatewayparamsv1alpha1.GroupName), + Kind: "GatewayClassParameters", + Name: "svc-transition-params", + }, + }, + } + if err := testEnv.Client.Create(ctx, gatewayClass); err != nil { + t.Fatalf("create gatewayclass: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gatewayClass) }() + + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-transition-params"}, + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{Type: ptr(corev1.ServiceTypeLoadBalancer)}, + }, + } + if err := testEnv.Client.Create(ctx, params); err != nil { + t.Fatalf("create params: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, params) }() + + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-transition-gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + GatewayClassName: "varnish-svc-transition", + Listeners: []gatewayv1.Listener{{Name: "http", Port: 80, Protocol: gatewayv1.HTTPProtocolType}}, + }, + } + if err := testEnv.Client.Create(ctx, gw); err != nil { + t.Fatalf("create gateway: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gw) }() + + r := NewEnvtestGatewayReconciler(testEnv) + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}} + + if _, err := r.Reconcile(ctx, req); err != nil { + t.Fatalf("first reconcile: %v", err) + } + time.Sleep(200 * time.Millisecond) + + var svc corev1.Service + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}, &svc); err != nil { + t.Fatalf("get service: %v", err) + } + if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { + t.Fatalf("initial Type = %v, want LoadBalancer", svc.Spec.Type) + } + originalPorts := append([]corev1.ServicePort{}, svc.Spec.Ports...) + + // Flip to ClusterIP. + var p gatewayparamsv1alpha1.GatewayClassParameters + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: "svc-transition-params"}, &p); err != nil { + t.Fatalf("get params: %v", err) + } + p.Spec.Service.Type = ptr(corev1.ServiceTypeClusterIP) + if err := testEnv.Client.Update(ctx, &p); err != nil { + t.Fatalf("update params: %v", err) + } + + if _, err := r.Reconcile(ctx, req); err != nil { + t.Fatalf("second reconcile: %v", err) + } + time.Sleep(200 * time.Millisecond) + + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}, &svc); err != nil { + t.Fatalf("get service after transition: %v", err) + } + if svc.Spec.Type != corev1.ServiceTypeClusterIP { + t.Errorf("transitioned Type = %v, want ClusterIP", svc.Spec.Type) + } + if len(svc.Spec.Ports) != len(originalPorts) { + t.Errorf("ports lost during transition: was %d, now %d", len(originalPorts), len(svc.Spec.Ports)) + } +} + +// TestServiceShape_CloudControllerAnnotationPreserved_Envtest verifies that +// annotations added directly to the Service (simulating a cloud controller) +// are not pruned by subsequent reconciles. +func TestServiceShape_CloudControllerAnnotationPreserved_Envtest(t *testing.T) { + ctx := context.Background() + + gatewayClass := &gatewayv1.GatewayClass{ + ObjectMeta: metav1.ObjectMeta{Name: "varnish-svc-cloud"}, + Spec: gatewayv1.GatewayClassSpec{ + ControllerName: gatewayv1.GatewayController(ControllerName), + ParametersRef: &gatewayv1.ParametersReference{ + Group: gatewayv1.Group(gatewayparamsv1alpha1.GroupName), + Kind: "GatewayClassParameters", + Name: "svc-cloud-params", + }, + }, + } + if err := testEnv.Client.Create(ctx, gatewayClass); err != nil { + t.Fatalf("create gatewayclass: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gatewayClass) }() + + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-cloud-params"}, + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + Type: ptr(corev1.ServiceTypeLoadBalancer), + Annotations: map[string]string{"operator-managed": "v1"}, + }, + }, + } + if err := testEnv.Client.Create(ctx, params); err != nil { + t.Fatalf("create params: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, params) }() + + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-cloud-gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + GatewayClassName: "varnish-svc-cloud", + Listeners: []gatewayv1.Listener{{Name: "http", Port: 80, Protocol: gatewayv1.HTTPProtocolType}}, + }, + } + if err := testEnv.Client.Create(ctx, gw); err != nil { + t.Fatalf("create gateway: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gw) }() + + r := NewEnvtestGatewayReconciler(testEnv) + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}} + + if _, err := r.Reconcile(ctx, req); err != nil { + t.Fatalf("first reconcile: %v", err) + } + time.Sleep(200 * time.Millisecond) + + // Simulate cloud controller adding an annotation directly. + var svc corev1.Service + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}, &svc); err != nil { + t.Fatalf("get service: %v", err) + } + if svc.Annotations == nil { + svc.Annotations = map[string]string{} + } + svc.Annotations["cloud.example.com/lb-id"] = "lb-12345" + if err := testEnv.Client.Update(ctx, &svc); err != nil { + t.Fatalf("update svc with cloud annotation: %v", err) + } + + // Trigger another reconcile (e.g. by touching the params). + var p gatewayparamsv1alpha1.GatewayClassParameters + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: "svc-cloud-params"}, &p); err != nil { + t.Fatalf("get params: %v", err) + } + p.Spec.Service.Annotations["operator-managed"] = "v2" // force drift + if err := testEnv.Client.Update(ctx, &p); err != nil { + t.Fatalf("update params: %v", err) + } + + if _, err := r.Reconcile(ctx, req); err != nil { + t.Fatalf("second reconcile: %v", err) + } + time.Sleep(200 * time.Millisecond) + + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}, &svc); err != nil { + t.Fatalf("get service after second reconcile: %v", err) + } + + if svc.Annotations["cloud.example.com/lb-id"] != "lb-12345" { + t.Errorf("cloud-controller annotation pruned: %v", svc.Annotations) + } + if svc.Annotations["operator-managed"] != "v2" { + t.Errorf("operator annotation not updated: %v", svc.Annotations) + } +} From 36ba3d35df086ed68a3f46ec4604c847a7a9138a Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:35:40 +0200 Subject: [PATCH 14/17] test(controller): clean up derived resources in Service-shape envtests --- .../gateway_controller_envtest_test.go | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/internal/controller/gateway_controller_envtest_test.go b/internal/controller/gateway_controller_envtest_test.go index c75a4898..6870a99e 100644 --- a/internal/controller/gateway_controller_envtest_test.go +++ b/internal/controller/gateway_controller_envtest_test.go @@ -359,6 +359,15 @@ func TestServiceShape_DefaultsToLoadBalancer_Envtest(t *testing.T) { if svc.Spec.Type != corev1.ServiceTypeLoadBalancer { t.Errorf("Type = %v, want LoadBalancer", svc.Spec.Type) } + + // Clean up reconciler-created resources in default namespace so they + // don't bleed into subsequent tests in the suite. Matches the pattern + // in TestHTTPSListener_MissingSecret_ProgrammedFalse. + _ = testEnv.Client.DeleteAllOf(ctx, &appsv1.Deployment{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Service{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace("default")) } // TestServiceShape_TypeTransition_Envtest verifies LoadBalancer -> ClusterIP @@ -446,6 +455,12 @@ func TestServiceShape_TypeTransition_Envtest(t *testing.T) { if len(svc.Spec.Ports) != len(originalPorts) { t.Errorf("ports lost during transition: was %d, now %d", len(originalPorts), len(svc.Spec.Ports)) } + + _ = testEnv.Client.DeleteAllOf(ctx, &appsv1.Deployment{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Service{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace("default")) } // TestServiceShape_CloudControllerAnnotationPreserved_Envtest verifies that @@ -542,4 +557,10 @@ func TestServiceShape_CloudControllerAnnotationPreserved_Envtest(t *testing.T) { if svc.Annotations["operator-managed"] != "v2" { t.Errorf("operator annotation not updated: %v", svc.Annotations) } + + _ = testEnv.Client.DeleteAllOf(ctx, &appsv1.Deployment{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Service{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace("default")) } From ebf2f10e25842e8582ea9b98d5f03da3ffd672e1 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:40:19 +0200 Subject: [PATCH 15/17] docs: configurable Service shape (#70) --- CHANGELOG.md | 17 +++++++ CLAUDE.md | 1 + docs/reference/gatewayclassparameters.md | 57 ++++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14a2a191..5b345690 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,23 @@ All notable changes to Varnish Gateway are documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- **GatewayClassParameters: configurable data-plane Service shape (#70).** + New `spec.service` field exposes Service `type`, `annotations`, `labels`, + `loadBalancerClass`, `loadBalancerSourceRanges`, and + `externalTrafficPolicy`. Defaults preserve the existing `Type: LoadBalancer` + behavior, so existing deployments need no changes. Per-Gateway overlay + for labels and annotations via the Gateway API v1.1+ + `Gateway.spec.infrastructure` field. Cross-field validation via CEL at + admission time. Annotations added by cloud controllers (e.g., AWS LB + Controller, MetalLB) are preserved across reconciles via a managed-keys + sentinel on the Service. Unblocks gateway-behind-gateway, bare-metal + MetalLB, and internal-cache deployment patterns. Service shape changes + do not trigger pod restarts. + ## [v0.21.4 - 2026-05-24] ### Security diff --git a/CLAUDE.md b/CLAUDE.md index 6b321680..adc80604 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -158,6 +158,7 @@ The Gateway and HTTPRoute controllers have clearly defined responsibilities: - Fetch and merge user VCL from GatewayClassParameters - Create and update ConfigMap with `main.vcl` - Manage Deployment, Service, RBAC resources + - Resolve Service shape (Type, annotations, labels, LB class, source ranges, traffic policy) from `GatewayClassParameters.spec.service`, overlaid with `Gateway.spec.infrastructure.{labels,annotations}` - Compute infrastructure hash for pod restart detection - Watches: - Gateway resources (primary) diff --git a/docs/reference/gatewayclassparameters.md b/docs/reference/gatewayclassparameters.md index d142b20b..1be889f1 100644 --- a/docs/reference/gatewayclassparameters.md +++ b/docs/reference/gatewayclassparameters.md @@ -64,6 +64,63 @@ Additional command-line arguments passed to varnishd. Each array element is a se **Note:** The ghost VMOD uses Rust regex which benefits from increased stack size (160k vs default 80k), especially in debug builds. This is safe and adds minimal memory overhead (~16MB for typical thread pool configurations). +### spec.service + +Configures the data-plane Service. When omitted, the operator preserves the historical default: `Type: LoadBalancer` with no extra annotations or labels. Service-shape changes do **not** trigger pod restarts. + +| Field | Type | Description | +| -------------------------- | --------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `type` | `string` | One of `ClusterIP`, `NodePort`, `LoadBalancer`. Defaults to `LoadBalancer`. Headless services (`clusterIP: None`) and `ExternalName` are intentionally unsupported. | +| `annotations` | `map[string]string` | Annotations applied to the Service. Tracked via the `varnish.io/managed-annotations` sentinel; cloud-controller-added annotations are preserved on update. | +| `labels` | `map[string]string` | Labels applied to the Service. Controller-managed labels (`app.kubernetes.io/managed-by`, `gateway.networking.k8s.io/gateway-name`, `gateway.networking.k8s.io/gateway-namespace`) cannot be overridden. | +| `loadBalancerClass` | `string` | Selects a specific LB implementation. Only valid when `type: LoadBalancer`. | +| `loadBalancerSourceRanges` | `[]string` | CIDR allowlist for the LoadBalancer. Only valid when `type: LoadBalancer`. CIDR syntax is validated by Kubernetes when the Service is reconciled. | +| `externalTrafficPolicy` | `Cluster` \| `Local` | Source-IP preservation. Only valid when `type` is `LoadBalancer` or `NodePort`. | + +**Layering with `Gateway.spec.infrastructure`:** the Gateway API v1.1+ `Gateway.spec.infrastructure.{labels,annotations}` field overlays on top of the GatewayClass-level config — per-Gateway keys win on collisions. The other four fields (Type, LoadBalancerClass, LoadBalancerSourceRanges, ExternalTrafficPolicy) are GatewayClass-level only. + +**Admission-time validation:** + +- `loadBalancerClass` requires `type: LoadBalancer` +- `loadBalancerSourceRanges` requires `type: LoadBalancer` +- `externalTrafficPolicy` requires `type` to be `LoadBalancer` or `NodePort` + +**First-time opt-in behavior:** Services that existed before this feature was introduced get the operator's managed-keys sentinel established on the first reconcile that materializes a non-empty `annotations` or `labels` in `spec.service` (or in `Gateway.spec.infrastructure`). Pre-existing annotations are treated as not-managed-by-the-operator — they will not be pruned even if a key with the same name later appears in spec and is then removed. To bring an existing annotation under operator management, delete it from the Service manually and re-add it via `spec.service.annotations`. + +#### Example: ClusterIP behind another L7 (gateway-behind-gateway) + +```yaml +apiVersion: gateway.varnish-software.com/v1alpha1 +kind: GatewayClassParameters +metadata: + name: internal-cache +spec: + service: + type: ClusterIP +``` + +#### Example: MetalLB annotation-driven LoadBalancer on bare metal + +```yaml +spec: + service: + type: LoadBalancer + annotations: + metallb.universe.tf/loadBalancerIPs: "10.0.0.42" + metallb.universe.tf/address-pool: production +``` + +#### Example: Preserve source IP via Local traffic policy + +```yaml +spec: + service: + type: LoadBalancer + externalTrafficPolicy: Local + loadBalancerSourceRanges: + - 198.51.100.0/24 +``` + ## Environment Variables Chaperone reads these environment variables. The operator sets most of them automatically when it builds the gateway pod, so the "Chaperone default" column documents what chaperone falls back to when run standalone — in operator-managed pods the operator-set value is what you'll see in practice. From b6e6709c3b2cfeef6bc1f474fa620cda5a52d962 Mon Sep 17 00:00:00 2001 From: Oystein Skogvold Date: Mon, 25 May 2026 17:45:36 +0200 Subject: [PATCH 16/17] fix(controller): treat unset ExternalTrafficPolicy as Cluster to avoid reconcile loop --- internal/controller/gateway_controller.go | 18 ++++++++- .../controller/gateway_controller_test.go | 38 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index 0bfdcb99..d0dd193b 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -513,7 +513,12 @@ func needsServiceUpdate(existing, desired *corev1.Service) bool { if !stringSliceEqual(existing.Spec.LoadBalancerSourceRanges, desired.Spec.LoadBalancerSourceRanges) { return true } - if existing.Spec.ExternalTrafficPolicy != desired.Spec.ExternalTrafficPolicy { + // K8s API server defaults ExternalTrafficPolicy to "Cluster" server-side + // when type is LoadBalancer/NodePort. Treat "" and "Cluster" as equivalent + // so an unset desired value doesn't reconcile-loop against the API-defaulted + // existing value. + if effectiveExternalTrafficPolicy(existing.Spec.ExternalTrafficPolicy) != + effectiveExternalTrafficPolicy(desired.Spec.ExternalTrafficPolicy) { return true } @@ -574,6 +579,17 @@ func stringMapEqual(a, b map[string]string) bool { return true } +// effectiveExternalTrafficPolicy resolves the zero string to "Cluster", +// mirroring the K8s API server's server-side default. Used by +// needsServiceUpdate to avoid spurious reconciles when our desired Service +// leaves the field unset but the API server has filled it in. +func effectiveExternalTrafficPolicy(p corev1.ServiceExternalTrafficPolicy) corev1.ServiceExternalTrafficPolicy { + if p == "" { + return corev1.ServiceExternalTrafficPolicyCluster + } + return p +} + // stringSliceEqual reports whether two string slices have equal length and // element-wise equal contents (order-sensitive — Kubernetes preserves // LoadBalancerSourceRanges order). diff --git a/internal/controller/gateway_controller_test.go b/internal/controller/gateway_controller_test.go index 17355f81..0206932c 100644 --- a/internal/controller/gateway_controller_test.go +++ b/internal/controller/gateway_controller_test.go @@ -143,6 +143,26 @@ func TestBuildLabels(t *testing.T) { } } +// TestControllerManagedLabelKeys_MatchesBuildLabels pins the invariant that +// every key emitted by buildLabels is listed in controllerManagedLabelKeys. +// If buildLabels gains a key without the set being updated, users could +// override controller-managed labels via ServiceConfig.Labels. +func TestControllerManagedLabelKeys_MatchesBuildLabels(t *testing.T) { + r := &GatewayReconciler{Config: Config{}} + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "test", Namespace: "default"}, + } + + emitted := r.buildLabels(gw) + managed := controllerManagedLabelKeys() + + for k := range emitted { + if _, protected := managed[k]; !protected { + t.Errorf("buildLabels emits %q but controllerManagedLabelKeys does not list it — users could override it via ServiceConfig.Labels", k) + } + } +} + func TestBuildDeployment(t *testing.T) { r := &GatewayReconciler{ Config: Config{ @@ -1639,6 +1659,24 @@ func TestNeedsServiceUpdate(t *testing.T) { }, expectUpdate: true, }, + { + name: "API-defaulted ExternalTrafficPolicy=Cluster vs desired unset does NOT trigger update", + existing: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyCluster, // API-defaulted + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + desired: &corev1.Service{ + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + // ExternalTrafficPolicy not set — zero value "" + Ports: []corev1.ServicePort{{Name: "http", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}}, + }, + }, + expectUpdate: false, + }, { name: "selector changed", existing: &corev1.Service{ From 9847f3bf469eb9e1f03d140bc756d3d10f50526a Mon Sep 17 00:00:00 2001 From: Per Buer Date: Mon, 25 May 2026 19:32:01 +0200 Subject: [PATCH 17/17] fix(controller): correct Service sentinel storage and preserve NodePort Three findings from review of the configurable Service shape feature. 1. Labels sentinel was written into .metadata.labels with a comma-separated key list. Commas and slashes are not valid in Kubernetes label values, so the API server rejected any Service produced by a config with >= 2 user labels or any DNS-prefixed label key (e.g. app.kubernetes.io/component). The Gateway reconcile failed every loop. Both sentinels now live in .metadata.annotations; the user-supplied label values still live in .Labels and are looked up via the annotation-borne key list. 2. The Service update path replaced Spec.Ports wholesale with the desired slice, which has NodePort: 0. On every reconcile for a NodePort/LoadBalancer Service the API server would reallocate node ports, disconnecting clients connected via the old allocation. Added mergeServicePorts to carry existing NodePort allocations over by port name. 3. Protected-label collisions were silently dropped despite the API doc claiming a log warning. buildService now logs a Warn entry per collision, naming the gateway and the dropped key. Tests: - TestBuildService_MultipleLabels_SentinelIsValidLabelValue - TestServiceShape_MultipleLabels_Accepted_Envtest (would fail with API-server "Invalid value" prior to the fix) - TestMergeServicePorts_* (preserve, desired-wins, renamed) Co-Authored-By: Claude --- internal/controller/gateway_controller.go | 98 ++++++++++++++++--- .../gateway_controller_envtest_test.go | 83 ++++++++++++++++ .../controller/gateway_controller_test.go | 62 ++++++++++++ internal/controller/resources.go | 15 ++- internal/controller/resources_test.go | 59 +++++++++-- internal/controller/service_resolver.go | 9 +- 6 files changed, 300 insertions(+), 26 deletions(-) diff --git a/internal/controller/gateway_controller.go b/internal/controller/gateway_controller.go index d0dd193b..2c641dde 100644 --- a/internal/controller/gateway_controller.go +++ b/internal/controller/gateway_controller.go @@ -381,13 +381,14 @@ func (r *GatewayReconciler) reconcileResource(ctx context.Context, gateway *gate } // For Services, re-apply the fields we own when drift is detected. - // Cloud-controller-managed fields (clusterIP, NodePort assignments, - // loadBalancer.ingress) and non-managed annotations/labels are deliberately - // left alone. K8s API server handles Type transitions natively. + // Cloud-controller-managed fields (clusterIP, loadBalancer.ingress) and + // non-managed annotations/labels are deliberately left alone. + // NodePort allocations are carried over per-port by mergeServicePorts. + // K8s API server handles Type transitions natively. if desiredSvc, ok := desired.(*corev1.Service); ok { existingSvc := existing.(*corev1.Service) if needsServiceUpdate(existingSvc, desiredSvc) { - existingSvc.Spec.Ports = desiredSvc.Spec.Ports + existingSvc.Spec.Ports = mergeServicePorts(existingSvc.Spec.Ports, desiredSvc.Spec.Ports) existingSvc.Spec.Type = desiredSvc.Spec.Type existingSvc.Spec.Selector = desiredSvc.Spec.Selector existingSvc.Spec.LoadBalancerClass = desiredSvc.Spec.LoadBalancerClass @@ -397,22 +398,20 @@ func (r *GatewayReconciler) reconcileResource(ctx context.Context, gateway *gate // Re-merge annotations/labels using the operator's sentinels. // Existing state may contain cloud-controller annotations we // must preserve; mergeWithManaged handles the bookkeeping. + // Both sentinels live in .metadata.annotations — the label + // sentinel's value (comma-separated key list) is not a valid + // label value. existingAnnoSentinel := "" + existingLabelSentinel := "" if existingSvc.Annotations != nil { existingAnnoSentinel = existingSvc.Annotations[AnnotationManagedAnnotations] + existingLabelSentinel = existingSvc.Annotations[AnnotationManagedLabels] } desiredAnnos := desiredAnnotationsFromSentinel(desiredSvc) mergedAnnos, newAnnoSentinel := mergeWithManaged(desiredAnnos, existingSvc.Annotations, existingAnnoSentinel, nil) - mergedAnnos[AnnotationManagedAnnotations] = newAnnoSentinel - existingSvc.Annotations = mergedAnnos - existingLabelSentinel := "" - if existingSvc.Labels != nil { - existingLabelSentinel = existingSvc.Labels[AnnotationManagedLabels] - } desiredLabels := desiredLabelsFromSentinel(desiredSvc) mergedLabels, newLabelSentinel := mergeWithManaged(desiredLabels, existingSvc.Labels, existingLabelSentinel, controllerManagedLabelKeys()) - mergedLabels[AnnotationManagedLabels] = newLabelSentinel // Re-apply controller-managed labels unconditionally (mergeWithManaged // drops them from desired; we put them back here to defend against // existing being missing them on a hand-edited Service). @@ -421,6 +420,11 @@ func (r *GatewayReconciler) reconcileResource(ctx context.Context, gateway *gate } existingSvc.Labels = mergedLabels + // Both sentinels go on annotations, not labels. + mergedAnnos[AnnotationManagedAnnotations] = newAnnoSentinel + mergedAnnos[AnnotationManagedLabels] = newLabelSentinel + existingSvc.Annotations = mergedAnnos + if err := r.Update(ctx, existingSvc); err != nil { return fmt.Errorf("r.Update(%s): %w", desired.GetName(), err) } @@ -549,13 +553,73 @@ func needsServiceUpdate(existing, desired *corev1.Service) bool { if !managedMapEqual(existing.Annotations, desired.Annotations, AnnotationManagedAnnotations) { return true } - if !managedMapEqual(existing.Labels, desired.Labels, AnnotationManagedLabels) { + // Labels sentinel lives in .Annotations (label values can't carry the + // comma-separated key list), but the user-supplied values it points at + // live in .Labels. Verify the sentinel agrees, then compare those keys + // in the label map. + if !managedLabelsEqual(existing, desired) { return true } return false } +// managedLabelsEqual mirrors managedMapEqual but reads the sentinel from +// .metadata.annotations while comparing values in .metadata.labels. +func managedLabelsEqual(existing, desired *corev1.Service) bool { + desiredSentinel := "" + if desired.Annotations != nil { + desiredSentinel = desired.Annotations[AnnotationManagedLabels] + } + existingSentinel := "" + if existing.Annotations != nil { + existingSentinel = existing.Annotations[AnnotationManagedLabels] + } + if existingSentinel != desiredSentinel { + return false + } + if desiredSentinel == "" { + return true + } + for _, k := range strings.Split(desiredSentinel, ",") { + if k == "" { + continue + } + if existing.Labels[k] != desired.Labels[k] { + return false + } + } + return true +} + +// mergeServicePorts returns the desired ServicePort list with each entry's +// NodePort carried over from the existing slice (matched by port name) when +// the desired value is unset. buildService never sets NodePort — it's +// allocated by the API server — and a plain `existing.Ports = desired.Ports` +// assignment would zero it out on every reconcile, causing the API server +// to reallocate the node port and break clients connecting to it. +// +// Name is the join key because buildService names every port (listener +// socket name), and NodePort allocation is per-port. +func mergeServicePorts(existing, desired []corev1.ServicePort) []corev1.ServicePort { + existingNodePort := make(map[string]int32, len(existing)) + for _, p := range existing { + if p.NodePort != 0 { + existingNodePort[p.Name] = p.NodePort + } + } + out := make([]corev1.ServicePort, len(desired)) + copy(out, desired) + for i := range out { + if out[i].NodePort == 0 { + if np, ok := existingNodePort[out[i].Name]; ok { + out[i].NodePort = np + } + } + } + return out +} + // stringPtrEqual returns true if a and b point to equal strings, treating // nil == nil as equal. func stringPtrEqual(a, b *string) bool { @@ -1903,13 +1967,17 @@ func desiredAnnotationsFromSentinel(svc *corev1.Service) map[string]string { return out } -// desiredLabelsFromSentinel is the label counterpart to desiredAnnotationsFromSentinel. +// desiredLabelsFromSentinel reconstructs the user-supplied label map. +// The sentinel itself lives in .metadata.annotations (label values can't +// contain commas or slashes), but the user-supplied label values live in +// .metadata.labels — so we read the sentinel from annotations and look up +// each named key in labels. func desiredLabelsFromSentinel(svc *corev1.Service) map[string]string { out := map[string]string{} - if svc.Labels == nil { + if svc.Annotations == nil { return out } - sentinel := svc.Labels[AnnotationManagedLabels] + sentinel := svc.Annotations[AnnotationManagedLabels] if sentinel == "" { return out } diff --git a/internal/controller/gateway_controller_envtest_test.go b/internal/controller/gateway_controller_envtest_test.go index 6870a99e..34239bd7 100644 --- a/internal/controller/gateway_controller_envtest_test.go +++ b/internal/controller/gateway_controller_envtest_test.go @@ -564,3 +564,86 @@ func TestServiceShape_CloudControllerAnnotationPreserved_Envtest(t *testing.T) { _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace("default")) _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace("default")) } + +// TestServiceShape_MultipleLabels_Accepted_Envtest verifies that supplying +// multiple service labels — and labels with DNS-prefixed keys — produces a +// Service the API server accepts. Storing the sentinel in .Labels with a +// comma-separated key list breaks K8s label-value validation; this test +// guards against regressing to that layout. +func TestServiceShape_MultipleLabels_Accepted_Envtest(t *testing.T) { + ctx := context.Background() + + gatewayClass := &gatewayv1.GatewayClass{ + ObjectMeta: metav1.ObjectMeta{Name: "varnish-svc-multilabel"}, + Spec: gatewayv1.GatewayClassSpec{ + ControllerName: gatewayv1.GatewayController(ControllerName), + ParametersRef: &gatewayv1.ParametersReference{ + Group: gatewayv1.Group(gatewayparamsv1alpha1.GroupName), + Kind: "GatewayClassParameters", + Name: "svc-multilabel-params", + }, + }, + } + if err := testEnv.Client.Create(ctx, gatewayClass); err != nil { + t.Fatalf("create gatewayclass: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gatewayClass) }() + + params := &gatewayparamsv1alpha1.GatewayClassParameters{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-multilabel-params"}, + Spec: gatewayparamsv1alpha1.GatewayClassParametersSpec{ + Service: &gatewayparamsv1alpha1.ServiceConfig{ + Type: ptr(corev1.ServiceTypeClusterIP), + Labels: map[string]string{ + "team": "edge", + "tier": "cache", + "app.kubernetes.io/component": "gateway", + }, + }, + }, + } + if err := testEnv.Client.Create(ctx, params); err != nil { + t.Fatalf("create params: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, params) }() + + gw := &gatewayv1.Gateway{ + ObjectMeta: metav1.ObjectMeta{Name: "svc-multilabel-gw", Namespace: "default"}, + Spec: gatewayv1.GatewaySpec{ + GatewayClassName: "varnish-svc-multilabel", + Listeners: []gatewayv1.Listener{{Name: "http", Port: 80, Protocol: gatewayv1.HTTPProtocolType}}, + }, + } + if err := testEnv.Client.Create(ctx, gw); err != nil { + t.Fatalf("create gateway: %v", err) + } + defer func() { _ = testEnv.Client.Delete(ctx, gw) }() + + r := NewEnvtestGatewayReconciler(testEnv) + if _, err := r.Reconcile(ctx, ctrl.Request{NamespacedName: types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}}); err != nil { + t.Fatalf("reconcile (API server rejected Service?): %v", err) + } + time.Sleep(200 * time.Millisecond) + + var svc corev1.Service + if err := testEnv.Client.Get(ctx, types.NamespacedName{Name: gw.Name, Namespace: gw.Namespace}, &svc); err != nil { + t.Fatalf("get service: %v", err) + } + for _, want := range []string{"team", "tier", "app.kubernetes.io/component"} { + if _, ok := svc.Labels[want]; !ok { + t.Errorf("label %q missing from Service: %v", want, svc.Labels) + } + } + if _, leaked := svc.Labels[AnnotationManagedLabels]; leaked { + t.Errorf("sentinel leaked into .Labels: %v", svc.Labels) + } + if svc.Annotations[AnnotationManagedLabels] == "" { + t.Errorf("sentinel missing from .Annotations: %v", svc.Annotations) + } + + _ = testEnv.Client.DeleteAllOf(ctx, &appsv1.Deployment{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Service{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ConfigMap{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.Secret{}, client.InNamespace("default")) + _ = testEnv.Client.DeleteAllOf(ctx, &corev1.ServiceAccount{}, client.InNamespace("default")) +} diff --git a/internal/controller/gateway_controller_test.go b/internal/controller/gateway_controller_test.go index 0206932c..5670a563 100644 --- a/internal/controller/gateway_controller_test.go +++ b/internal/controller/gateway_controller_test.go @@ -1707,6 +1707,68 @@ func TestNeedsServiceUpdate(t *testing.T) { } } +func TestMergeServicePorts_PreservesNodePort(t *testing.T) { + // buildService produces ports with NodePort: 0 because the API server + // allocates them. The update path must carry the existing allocation + // across or every reconcile triggers a NodePort churn and disconnects + // clients. + existing := []corev1.ServicePort{ + {Name: "http-80", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP, NodePort: 31234}, + {Name: "https-443", Port: 443, TargetPort: intstr.FromInt(443), Protocol: corev1.ProtocolTCP, NodePort: 31235}, + } + desired := []corev1.ServicePort{ + {Name: "http-80", Port: 80, TargetPort: intstr.FromInt(80), Protocol: corev1.ProtocolTCP}, + {Name: "https-443", Port: 443, TargetPort: intstr.FromInt(443), Protocol: corev1.ProtocolTCP}, + } + + out := mergeServicePorts(existing, desired) + + if len(out) != 2 { + t.Fatalf("len(out) = %d, want 2", len(out)) + } + if out[0].NodePort != 31234 { + t.Errorf("http-80 NodePort = %d, want 31234 (carried from existing)", out[0].NodePort) + } + if out[1].NodePort != 31235 { + t.Errorf("https-443 NodePort = %d, want 31235 (carried from existing)", out[1].NodePort) + } +} + +func TestMergeServicePorts_DesiredNodePortWins(t *testing.T) { + // If a future caller explicitly sets a NodePort in the desired port + // (e.g. user pins one), it must win over the existing allocation. + existing := []corev1.ServicePort{ + {Name: "http-80", Port: 80, NodePort: 31234}, + } + desired := []corev1.ServicePort{ + {Name: "http-80", Port: 80, NodePort: 32000}, + } + + out := mergeServicePorts(existing, desired) + + if out[0].NodePort != 32000 { + t.Errorf("NodePort = %d, want 32000 (desired wins)", out[0].NodePort) + } +} + +func TestMergeServicePorts_RenamedPortGetsNewAllocation(t *testing.T) { + // If the port name changes (e.g. listener renamed), there's no + // existing entry to carry NodePort from — desired's zero stays, + // and the API server allocates a fresh one. + existing := []corev1.ServicePort{ + {Name: "http-80", Port: 80, NodePort: 31234}, + } + desired := []corev1.ServicePort{ + {Name: "http-8080", Port: 8080}, + } + + out := mergeServicePorts(existing, desired) + + if out[0].NodePort != 0 { + t.Errorf("renamed port should not inherit NodePort, got %d", out[0].NodePort) + } +} + func TestNeedsSecretUpdate(t *testing.T) { tests := []struct { name string diff --git a/internal/controller/resources.go b/internal/controller/resources.go index 6765356d..04145ee1 100644 --- a/internal/controller/resources.go +++ b/internal/controller/resources.go @@ -596,12 +596,23 @@ func (r *GatewayReconciler) buildService(gateway *gatewayv1.Gateway, resolved Re // Merge user labels onto controller labels via the sentinel. Existing // state is the controller-label baseline; protected keys can't be - // overridden by user input. + // overridden by user input. Log any collisions so users get feedback + // when their config tried to overwrite a controller-managed label. + for k := range resolved.Labels { + if _, isProtected := controllerManagedLabelKeys()[k]; isProtected { + r.Logger.Warn("ignoring service label that collides with a controller-managed key", + "gateway", gateway.Namespace+"/"+gateway.Name, + "label", k) + } + } mergedLabels, labelSentinel := mergeWithManaged(resolved.Labels, controllerLabels, "", controllerManagedLabelKeys()) - mergedLabels[AnnotationManagedLabels] = labelSentinel mergedAnnotations, annoSentinel := mergeWithManaged(resolved.Annotations, nil, "", nil) + // Both sentinels live in .metadata.annotations. The label sentinel's + // value is a comma-separated key list, which is not a valid label value + // (no commas, no slashes) — annotations have no such restriction. mergedAnnotations[AnnotationManagedAnnotations] = annoSentinel + mergedAnnotations[AnnotationManagedLabels] = labelSentinel svc := &corev1.Service{ TypeMeta: metav1.TypeMeta{APIVersion: "v1", Kind: "Service"}, diff --git a/internal/controller/resources_test.go b/internal/controller/resources_test.go index 2d6316a3..07bfc32a 100644 --- a/internal/controller/resources_test.go +++ b/internal/controller/resources_test.go @@ -393,8 +393,12 @@ func TestBuildService_AppliesLabelsWithSentinel_ProtectsControllerLabels(t *test if svc.Labels["team"] != "edge" { t.Errorf("user label = %q, want edge", svc.Labels["team"]) } - if svc.Labels[AnnotationManagedLabels] != "team" { - t.Errorf("sentinel = %q, want team", svc.Labels[AnnotationManagedLabels]) + // Sentinel lives on annotations (label values can't carry commas/slashes). + if svc.Annotations[AnnotationManagedLabels] != "team" { + t.Errorf("sentinel = %q, want team", svc.Annotations[AnnotationManagedLabels]) + } + if _, present := svc.Labels[AnnotationManagedLabels]; present { + t.Errorf("sentinel must not live in .Labels (invalid label value when multi-key): %v", svc.Labels) } // CRITICAL invariant: user labels MUST NOT leak into the selector. // If they did, the Service would fail to find any pods. @@ -406,6 +410,43 @@ func TestBuildService_AppliesLabelsWithSentinel_ProtectsControllerLabels(t *test } } +func TestBuildService_MultipleLabels_SentinelIsValidLabelValue(t *testing.T) { + // Regression: prior to moving the sentinel into .Annotations, a config + // with ≥2 service labels (or any DNS-prefixed key) produced a sentinel + // value containing ',' or '/', both forbidden in label values — the API + // server rejected the Service. The sentinel must now live in annotations + // and .Labels must not contain it at all. + r := testReconcilerSimple() + gw := testGateway("my-gw", "default") + + svc := r.buildService(gw, ResolvedServiceConfig{ + Type: corev1.ServiceTypeLoadBalancer, + Annotations: map[string]string{}, + Labels: map[string]string{ + "team": "edge", + "tier": "cache", + "app.kubernetes.io/component": "gateway", + }, + }) + + for k, v := range svc.Labels { + for _, bad := range []rune{',', '/'} { + for _, r := range v { + if r == bad { + t.Errorf("label %q has value %q containing %q (invalid in label values)", k, v, string(bad)) + } + } + } + } + if _, present := svc.Labels[AnnotationManagedLabels]; present { + t.Errorf("sentinel leaked into .Labels (will be rejected by API server): %v", svc.Labels) + } + sentinel := svc.Annotations[AnnotationManagedLabels] + if sentinel != "app.kubernetes.io/component,team,tier" { + t.Errorf("annotation sentinel = %q, want %q", sentinel, "app.kubernetes.io/component,team,tier") + } +} + func TestBuildService_EmptyUserMaps_WritesEmptySentinel(t *testing.T) { r := testReconcilerSimple() gw := testGateway("my-gw", "default") @@ -416,9 +457,10 @@ func TestBuildService_EmptyUserMaps_WritesEmptySentinel(t *testing.T) { Labels: map[string]string{}, }) - // When no user-supplied annotations/labels exist, the sentinel keys - // are still written with empty values, signalling "this Service was - // touched by this version of the operator, currently managing nothing." + // When no user-supplied annotations/labels exist, both sentinels are + // still written (in .Annotations) with empty values, signalling "this + // Service was touched by this version of the operator, currently + // managing nothing." v, ok := svc.Annotations[AnnotationManagedAnnotations] if !ok { t.Error("AnnotationManagedAnnotations sentinel missing") @@ -426,13 +468,16 @@ func TestBuildService_EmptyUserMaps_WritesEmptySentinel(t *testing.T) { if v != "" { t.Errorf("annotation sentinel = %q, want empty string", v) } - lv, lok := svc.Labels[AnnotationManagedLabels] + lv, lok := svc.Annotations[AnnotationManagedLabels] if !lok { - t.Error("AnnotationManagedLabels sentinel missing") + t.Error("AnnotationManagedLabels sentinel missing from .Annotations") } if lv != "" { t.Errorf("label sentinel = %q, want empty string", lv) } + if _, present := svc.Labels[AnnotationManagedLabels]; present { + t.Errorf("label sentinel must not live in .Labels: %v", svc.Labels) + } } // --- buildVolumes --- diff --git a/internal/controller/service_resolver.go b/internal/controller/service_resolver.go index c76de2e3..aaf764d8 100644 --- a/internal/controller/service_resolver.go +++ b/internal/controller/service_resolver.go @@ -10,10 +10,15 @@ import ( gatewayparamsv1alpha1 "github.com/varnish/gateway/api/v1alpha1" ) -// Sentinel annotation keys recording which annotation and label keys the -// operator manages on the Service. Pruning on update consults these to avoid +// Sentinel keys recording which annotation and label keys the operator +// manages on the Service. Pruning on update consults these to avoid // stomping on annotations added by cloud controllers, service-mesh sidecar // injectors, observability tooling, etc. +// +// Both sentinels live in .metadata.annotations. The label sentinel's value +// is a comma-separated list of label keys; commas and slashes are not +// valid in a Kubernetes label value, so it cannot be stored in +// .metadata.labels. (The label keys it names are looked up in .Labels.) const ( AnnotationManagedAnnotations = "varnish.io/managed-annotations" AnnotationManagedLabels = "varnish.io/managed-labels"