From 0288113b2039b5c5915de98cbf820553d1c87db8 Mon Sep 17 00:00:00 2001 From: Josiah England Date: Wed, 27 May 2026 14:34:43 -0600 Subject: [PATCH] fix(tracing): direct OTel SDK setup for chain-coherent sampling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Knative's config-observability ConfigMap only exposes a flat tracing-sampling-rate, so at fractional rates each service in the chain rolls independently — PaC can drop a trace while Tekton keeps it, leaving execution spans whose parent_spanID points at nothing. Switching to the OTel SDK opens up OTEL_TRACES_SAMPLER's parentbased_* family, which honors the root span's sample decision in the W3C traceparent flag so the whole chain is kept or dropped together. Controller and watcher call tracing.New() at startup. Tracing is opt-in: both OTEL_EXPORTER_OTLP_ENDPOINT and OTEL_TRACES_SAMPLER must be set, otherwise PaC falls back to noop (matching the prior tracing-sampling-rate "0" default). Resource service.name is pipelines-as-code. Propagator is W3C TraceContext only; Baggage is intentionally not honored per Konflux-CI ADR 0061. otlptracegrpc and otlptracehttp promoted from indirect to direct dependencies. Assisted-by: Claude Code Signed-off-by: Josiah England --- config/305-config-observability.yaml | 14 -- docs/content/docs/operations/tracing.md | 55 +++--- go.mod | 4 +- pkg/adapter/adapter.go | 9 + pkg/reconciler/controller.go | 13 ++ pkg/tracing/provider.go | 156 ++++++++++++++++ pkg/tracing/provider_test.go | 229 ++++++++++++++++++++++++ 7 files changed, 441 insertions(+), 39 deletions(-) create mode 100644 pkg/tracing/provider.go create mode 100644 pkg/tracing/provider_test.go diff --git a/config/305-config-observability.yaml b/config/305-config-observability.yaml index bfa697624e..78e2b009a4 100644 --- a/config/305-config-observability.yaml +++ b/config/305-config-observability.yaml @@ -52,20 +52,6 @@ data: # Only applicable for grpc and http/protobuf protocols. # metrics-export-interval: "30s" - # tracing-protocol specifies the trace export protocol. - # Supported values: "grpc", "http/protobuf", "none". - # Default is "none" (tracing disabled). - # tracing-protocol: "none" - - # tracing-endpoint specifies the OTLP collector endpoint. - # Required when tracing-protocol is "grpc" or "http/protobuf". - # The OTEL_EXPORTER_OTLP_ENDPOINT env var takes precedence if set. - # tracing-endpoint: "http://otel-collector.observability.svc.cluster.local:4317" - - # tracing-sampling-rate controls the fraction of traces sampled. - # 0.0 = none, 1.0 = all. Default is 0 (none). - # tracing-sampling-rate: "1.0" - # runtime-profiling enables/disables the pprof profiling server on port 8008. # Supported values: "enabled", "disabled" (default). # runtime-profiling: "disabled" diff --git a/docs/content/docs/operations/tracing.md b/docs/content/docs/operations/tracing.md index a3e3aa3e5c..ec57cbc2e9 100644 --- a/docs/content/docs/operations/tracing.md +++ b/docs/content/docs/operations/tracing.md @@ -3,39 +3,48 @@ title: Distributed Tracing weight: 5 --- -This page describes how to enable OpenTelemetry distributed tracing for Pipelines-as-Code. When enabled, PaC emits trace spans for webhook event processing and PipelineRun lifecycle timing. +Pipelines-as-Code emits trace spans for webhook event processing and PipelineRun lifecycle timing. ## Enabling tracing -The ConfigMap `pipelines-as-code-config-observability` controls tracing configuration. It must exist in the same namespace as the Pipelines-as-Code controller and watcher deployments. See [config/305-config-observability.yaml](https://github.com/tektoncd/pipelines-as-code/blob/main/config/305-config-observability.yaml) for the full example. +Two configuration paths can enable tracing. -It contains the following tracing fields: +### Via OpenTelemetry environment variables -* `tracing-protocol`: Export protocol. Supported values: `grpc`, `http/protobuf`, `none`. Default is `none` (tracing disabled). -* `tracing-endpoint`: OTLP collector endpoint. Required when protocol is not `none`. The `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable takes precedence if set. -* `tracing-sampling-rate`: Fraction of traces to sample. `0.0` = none, `1.0` = all. Default is `0`. +Set on the controller and watcher pods: -### Example +* `OTEL_EXPORTER_OTLP_ENDPOINT` - OTLP collector endpoint URL. Required. +* `OTEL_TRACES_SAMPLER` - Sampler family. Required. Supported: `always_on`, `always_off`, `traceidratio`, `parentbased_always_on`, `parentbased_always_off`, `parentbased_traceidratio`. +* `OTEL_TRACES_SAMPLER_ARG` - Numeric argument for ratio samplers. Example: `0.1` with `parentbased_traceidratio` samples 10% of root traces while keeping the chain coherent. +* `OTEL_EXPORTER_OTLP_PROTOCOL` (or traces-specific `OTEL_EXPORTER_OTLP_TRACES_PROTOCOL`) - OTLP transport: `grpc` or `http/protobuf`. Default: `grpc`. -```yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: pipelines-as-code-config-observability - namespace: pipelines-as-code -data: - tracing-protocol: grpc - tracing-endpoint: "http://otel-collector.observability.svc.cluster.local:4317" - tracing-sampling-rate: "1.0" -``` +Both `OTEL_EXPORTER_OTLP_ENDPOINT` and `OTEL_TRACES_SAMPLER` must be set. Inbound `traceparent` headers on webhook requests are honored via the W3C TraceContext propagator. Changes take effect on the next pod restart. -Changes to `tracing-protocol`, `tracing-endpoint`, and `tracing-sampling-rate` require restarting the controller and watcher pods. The trace exporter is created once at startup from the ConfigMap values at that time. Set `tracing-protocol` to `none` or remove the tracing keys to disable tracing. +#### Sampler choice and chain coherency -The controller and watcher locate this ConfigMap by name via the `CONFIG_OBSERVABILITY_NAME` environment variable set in their deployment manifests. Operator-based installations may manage this differently; consult the operator documentation for details. +The `parentbased_*` sampler family honors the parent span's sample decision carried in the W3C `traceparent` flag bit. When every service in the delivery chain uses parent-based samplers, the root span's sampling decision propagates end to end: each service either keeps its spans or drops them based on what the root chose. Flat-rate samplers (`traceidratio` without parent-based) cause each service to roll independently, which at fractional sampling fragments the chain into orphaned spans whose `parent_spanID` references a span that was dropped. `parentbased_always_on` keeps everything; `parentbased_traceidratio` with a numeric argument samples a coherent fraction. + +### Via Knative observability ConfigMap + +Set in `pipelines-as-code-config-observability`: + +* `tracing-protocol` - `grpc`, `http/protobuf`, `stdout`, or `none`. Default: `none`. +* `tracing-endpoint` - Collector endpoint for `grpc` or `http/protobuf`. +* `tracing-sampling-rate` - Sample fraction. Per-component independent. + +Changes to Knative's tracing config require restarting the controller and watcher pods. The tracer is built once at startup. + +### When both are configured + +OpenTelemetry takes precedence: all spans flow through the OpenTelemetry exporter. The Knative tracer is initialized at startup but unused. + +To use only OpenTelemetry, set `tracing-protocol: none` in `pipelines-as-code-config-observability`. + +To use only Knative, unset `OTEL_EXPORTER_OTLP_ENDPOINT` on the controller and watcher pods. ## Emitted spans -The controller emits a `PipelinesAsCode:ProcessEvent` span for each webhook event. The watcher emits `waitDuration` and `executeDuration` spans for completed PipelineRuns. +The controller emits a `PipelinesAsCode:ProcessEvent` span for each webhook event. The watcher emits `waitDuration` and `executeDuration` spans for completed PipelineRuns. The OTel resource attribute `service.name` on all emitted spans is `pipelines-as-code`. ### Webhook event span (`PipelinesAsCode:ProcessEvent`) @@ -103,13 +112,13 @@ Unlike the observability ConfigMap above (which requires a pod restart), changes ## Trace context propagation -When Pipelines-as-Code creates a PipelineRun, it sets the `tekton.dev/pipelinerunSpanContext` annotation with a JSON-encoded OTel TextMapCarrier containing the W3C `traceparent`. PaC tracing works independently — you get PaC spans regardless of whether Tekton Pipelines has tracing enabled. +When Pipelines-as-Code creates a PipelineRun, it sets the `tekton.dev/pipelinerunSpanContext` annotation with a JSON-encoded OTel TextMapCarrier containing the W3C `traceparent`. PaC tracing works independently - you get PaC spans regardless of whether Tekton Pipelines has tracing enabled. If Tekton Pipelines is also configured with tracing pointing at the same collector, its reconciler spans appear as children of the PaC span, providing a single end-to-end trace from webhook receipt through task execution. See the [Tekton Pipelines tracing documentation](https://github.com/tektoncd/pipeline/blob/main/docs/developers/tracing.md) for Tekton's independent tracing setup. ## Deploying a trace collector -Pipelines-as-Code exports traces using the standard OpenTelemetry Protocol (OTLP). You need a running OTLP-compatible collector for the `tracing-endpoint` to point to. Common options include: +Pipelines-as-Code exports traces using the standard OpenTelemetry Protocol (OTLP). You need a running OTLP-compatible collector for `OTEL_EXPORTER_OTLP_ENDPOINT` to point to. Common options include: * [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) -- the vendor-neutral reference collector * [Jaeger](https://www.jaegertracing.io/docs/latest/getting-started/) -- supports OTLP ingestion natively since v1.35 diff --git a/go.mod b/go.mod index 7e47ac8df8..c35fd2ce33 100755 --- a/go.mod +++ b/go.mod @@ -30,6 +30,8 @@ require ( github.com/tektoncd/pipeline v1.13.1 gitlab.com/gitlab-org/api/client-go v1.46.0 go.opentelemetry.io/otel v1.44.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 go.opentelemetry.io/otel/metric v1.44.0 go.opentelemetry.io/otel/sdk v1.43.0 go.opentelemetry.io/otel/sdk/metric v1.43.0 @@ -90,8 +92,6 @@ require ( go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.43.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.43.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.43.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.43.0 // indirect go.opentelemetry.io/otel/exporters/prometheus v0.65.0 // indirect go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.43.0 // indirect go.opentelemetry.io/proto/otlp v1.10.0 // indirect diff --git a/pkg/adapter/adapter.go b/pkg/adapter/adapter.go index a2cb2925d1..281fdd0dc3 100644 --- a/pkg/adapter/adapter.go +++ b/pkg/adapter/adapter.go @@ -79,6 +79,15 @@ func New(run *params.Run, k *kubeinteraction.Interaction) adapter.AdapterConstru } func (l *listener) Start(ctx context.Context) error { + tp := tracing.New(l.logger) + defer func() { + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := tp.Shutdown(shutdownCtx); err != nil { + l.logger.Errorw("failed to shut down tracer provider", "error", err) + } + }() + adapterPort := globalAdapterPort envAdapterPort := os.Getenv("PAC_CONTROLLER_PORT") if envAdapterPort != "" { diff --git a/pkg/reconciler/controller.go b/pkg/reconciler/controller.go index 3221231e7a..1af01d6ce8 100644 --- a/pkg/reconciler/controller.go +++ b/pkg/reconciler/controller.go @@ -3,6 +3,7 @@ package reconciler import ( "context" "path" + "time" "github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode" "github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/keys" @@ -14,6 +15,7 @@ import ( "github.com/openshift-pipelines/pipelines-as-code/pkg/params/info" prmetrics "github.com/openshift-pipelines/pipelines-as-code/pkg/pipelinerunmetrics" queuepkg "github.com/openshift-pipelines/pipelines-as-code/pkg/queue" + "github.com/openshift-pipelines/pipelines-as-code/pkg/tracing" tektonv1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" tektonPipelineRunInformerv1 "github.com/tektoncd/pipeline/pkg/client/injection/informers/pipeline/v1/pipelinerun" tektonPipelineRunReconcilerv1 "github.com/tektoncd/pipeline/pkg/client/injection/reconciler/pipeline/v1/pipelinerun" @@ -30,6 +32,17 @@ func NewController() func(context.Context, configmap.Watcher) *controller.Impl { ctx = info.StoreNS(ctx, system.Namespace()) log := logging.FromContext(ctx) + tp := tracing.New(log) + // linter false positive: fresh Background is required because outer ctx is cancelled past <-ctx.Done(). + go func() { //nolint:gosec + <-ctx.Done() + shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + if err := tp.Shutdown(shutdownCtx); err != nil { + log.Errorw("failed to shut down tracer provider", "error", err) + } + }() + run := params.New() err := run.Clients.NewClients(ctx, &run.Info) if err != nil { diff --git a/pkg/tracing/provider.go b/pkg/tracing/provider.go new file mode 100644 index 0000000000..aeb60e7aec --- /dev/null +++ b/pkg/tracing/provider.go @@ -0,0 +1,156 @@ +package tracing + +import ( + "context" + "os" + "strconv" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.40.0" + "go.opentelemetry.io/otel/trace/noop" + "go.uber.org/zap" + knativetracing "knative.dev/pkg/observability/tracing" +) + +const ( + EnvOTLPEndpoint = "OTEL_EXPORTER_OTLP_ENDPOINT" + EnvOTLPProtocol = "OTEL_EXPORTER_OTLP_PROTOCOL" + EnvOTLPTracesProtocol = "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL" + EnvTracesSampler = "OTEL_TRACES_SAMPLER" + EnvTracesSamplerArg = "OTEL_TRACES_SAMPLER_ARG" + + protocolGRPC = "grpc" + protocolHTTP = "http/protobuf" +) + +type TracerProvider struct { + shutdown func(context.Context) error +} + +func New(logger *zap.SugaredLogger) *TracerProvider { + otelConfigured := os.Getenv(EnvOTLPEndpoint) != "" && os.Getenv(EnvTracesSampler) != "" + if otelConfigured && !globalIsNoop() { + logger.Warn("OpenTelemetry and Knative tracing both configured; spans go through OpenTelemetry, Knative's tracer is unused. Set `tracing-protocol: none` in `pipelines-as-code-config-observability` to disable Knative, or unset `OTEL_EXPORTER_OTLP_ENDPOINT` to disable OpenTelemetry.") + } + + if os.Getenv(EnvOTLPEndpoint) == "" { + logger.Info("OpenTelemetry not configured (OTLP endpoint missing)") + return passthroughProvider() + } + if os.Getenv(EnvTracesSampler) == "" { + logger.Info("OpenTelemetry not configured (sampler missing)") + return passthroughProvider() + } + + proto := protocolFromEnv() + exporter, err := newExporter(context.Background(), logger, proto) + if err != nil { + logger.Errorw("failed to create OTLP exporter", "error", err) + return passthroughProvider() + } + + res, err := resource.Merge( + resource.Default(), + resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceName(TracerName), + ), + ) + if err != nil { + logger.Errorw("failed to create resource", "error", err) + res = resource.Default() + } + + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(exporter), + sdktrace.WithResource(res), + sdktrace.WithSampler(samplerFromEnv(logger)), + ) + + otel.SetTracerProvider(tp) + otel.SetTextMapPropagator(propagation.TraceContext{}) + + logger.Infow("tracing initialized", "endpoint", os.Getenv(EnvOTLPEndpoint), "protocol", proto) + + return &TracerProvider{shutdown: tp.Shutdown} +} + +func passthroughProvider() *TracerProvider { + return &TracerProvider{shutdown: func(context.Context) error { return nil }} +} + +func globalIsNoop() bool { + tp := otel.GetTracerProvider() + if _, ok := tp.(noop.TracerProvider); ok { + return true + } + // Knative wraps noop in its own TracerProvider when tracing-protocol is none/absent. + if knativeProvider, ok := tp.(*knativetracing.TracerProvider); ok { + _, isNoop := knativeProvider.TracerProvider.(noop.TracerProvider) + return isNoop + } + return false +} + +func protocolFromEnv() string { + if v := os.Getenv(EnvOTLPTracesProtocol); v != "" { + return v + } + if v := os.Getenv(EnvOTLPProtocol); v != "" { + return v + } + return protocolGRPC +} + +func newExporter(ctx context.Context, logger *zap.SugaredLogger, proto string) (sdktrace.SpanExporter, error) { + endpoint := os.Getenv(EnvOTLPEndpoint) + switch proto { + case protocolHTTP: + return otlptracehttp.New(ctx, otlptracehttp.WithEndpointURL(endpoint)) + case protocolGRPC: + return otlptracegrpc.New(ctx, otlptracegrpc.WithEndpointURL(endpoint)) + default: + logger.Errorw("unsupported OTLP protocol; falling back to grpc", "protocol", proto) + return otlptracegrpc.New(ctx, otlptracegrpc.WithEndpointURL(endpoint)) + } +} + +func (tp *TracerProvider) Shutdown(ctx context.Context) error { + if tp.shutdown != nil { + return tp.shutdown(ctx) + } + return nil +} + +func samplerFromEnv(logger *zap.SugaredLogger) sdktrace.Sampler { + name := os.Getenv(EnvTracesSampler) + argStr := os.Getenv(EnvTracesSamplerArg) + arg, err := strconv.ParseFloat(argStr, 64) + if err != nil && argStr != "" { + logger.Errorw("ignoring malformed sampler argument; defaulting to 0% sampling", "env", EnvTracesSamplerArg, "value", argStr) + } + if argStr == "" && (name == "traceidratio" || name == "parentbased_traceidratio") { + logger.Infow("ratio sampler selected without "+EnvTracesSamplerArg+"; defaulting to 0% sampling", "env", EnvTracesSampler, "value", name) + } + switch name { + case "always_on": + return sdktrace.AlwaysSample() + case "always_off": + return sdktrace.NeverSample() + case "traceidratio": + return sdktrace.TraceIDRatioBased(arg) + case "parentbased_always_on": + return sdktrace.ParentBased(sdktrace.AlwaysSample()) + case "parentbased_always_off": + return sdktrace.ParentBased(sdktrace.NeverSample()) + case "parentbased_traceidratio": + return sdktrace.ParentBased(sdktrace.TraceIDRatioBased(arg)) + } + logger.Warnw("unrecognized OTEL_TRACES_SAMPLER value; falling back to never sample", "value", name) + return sdktrace.NeverSample() +} diff --git a/pkg/tracing/provider_test.go b/pkg/tracing/provider_test.go new file mode 100644 index 0000000000..1f0670d5d5 --- /dev/null +++ b/pkg/tracing/provider_test.go @@ -0,0 +1,229 @@ +package tracing + +import ( + "context" + "strings" + "testing" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/propagation" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace/noop" + "go.uber.org/zap" + "go.uber.org/zap/zapcore" + "go.uber.org/zap/zaptest/observer" + "gotest.tools/v3/assert" + knativetracing "knative.dev/pkg/observability/tracing" +) + +func nopLogger() *zap.SugaredLogger { + return zap.NewNop().Sugar() +} + +func saveGlobalProvider(t *testing.T) { + t.Helper() + prevTP := otel.GetTracerProvider() + prevProp := otel.GetTextMapPropagator() + t.Cleanup(func() { + otel.SetTracerProvider(prevTP) + otel.SetTextMapPropagator(prevProp) + }) +} + +func TestNewReturnsNoopWhenEndpointUnset(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "") + t.Setenv(EnvTracesSampler, "always_on") + + tp := New(nopLogger()) + assert.Assert(t, tp != nil) + + _, isSDK := otel.GetTracerProvider().(*sdktrace.TracerProvider) + assert.Assert(t, !isSDK, "should not install SDK provider when endpoint unset") +} + +func TestNewReturnsNoopWhenSamplerUnset(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "http://localhost:4317") + t.Setenv(EnvTracesSampler, "") + + tp := New(nopLogger()) + assert.Assert(t, tp != nil) + + _, isSDK := otel.GetTracerProvider().(*sdktrace.TracerProvider) + assert.Assert(t, !isSDK, "should not install SDK provider when sampler unset") +} + +func TestNewInstallsSDKAndW3CPropagatorOnGRPC(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "http://localhost:4317") + t.Setenv(EnvTracesSampler, "parentbased_always_on") + t.Setenv(EnvOTLPProtocol, "grpc") + + tp := New(nopLogger()) + assert.Assert(t, tp != nil) + + _, isSDK := otel.GetTracerProvider().(*sdktrace.TracerProvider) + assert.Assert(t, isSDK, "should install SDK provider when endpoint and sampler are both set") + + _, isW3C := otel.GetTextMapPropagator().(propagation.TraceContext) + assert.Assert(t, isW3C, "should set W3C TraceContext as the global propagator") + + assert.NilError(t, tp.Shutdown(context.Background())) +} + +func TestNewInstallsSDKOnHTTPProtobuf(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "http://localhost:4318") + t.Setenv(EnvTracesSampler, "parentbased_traceidratio") + t.Setenv(EnvTracesSamplerArg, "0.5") + t.Setenv(EnvOTLPProtocol, "http/protobuf") + + tp := New(nopLogger()) + + _, isSDK := otel.GetTracerProvider().(*sdktrace.TracerProvider) + assert.Assert(t, isSDK, "http/protobuf protocol should also install an SDK provider") + + assert.NilError(t, tp.Shutdown(context.Background())) +} + +func TestProtocolFromEnv(t *testing.T) { + tests := []struct { + name string + tracesProtocol string + otlpProtocol string + want string + }{ + {"defaults to grpc when neither is set", "", "", protocolGRPC}, + {"falls back to OTLPProtocol when the traces-specific var is unset", "", "http/protobuf", "http/protobuf"}, + {"traces-specific takes precedence over the generic var", "grpc", "http/protobuf", "grpc"}, + {"OTLPProtocol grpc applies", "", "grpc", "grpc"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Setenv(EnvOTLPTracesProtocol, tt.tracesProtocol) + t.Setenv(EnvOTLPProtocol, tt.otlpProtocol) + assert.Equal(t, protocolFromEnv(), tt.want) + }) + } +} + +func TestSamplerFromEnv(t *testing.T) { + tests := []struct { + name string + sampler string + arg string + wantDescPrefix string + }{ + {"always_on", "always_on", "", "AlwaysOnSampler"}, + {"always_off", "always_off", "", "AlwaysOffSampler"}, + {"traceidratio half", "traceidratio", "0.5", "TraceIDRatioBased{0.5}"}, + {"parentbased_always_on", "parentbased_always_on", "", "ParentBased{root:AlwaysOnSampler"}, + {"parentbased_always_off", "parentbased_always_off", "", "ParentBased{root:AlwaysOffSampler"}, + {"parentbased_traceidratio one tenth", "parentbased_traceidratio", "0.1", "ParentBased{root:TraceIDRatioBased{0.1}"}, + {"unrecognized falls back to never sample", "some-future-otel-keyword", "", "AlwaysOffSampler"}, + {"empty value falls back to never sample", "", "", "AlwaysOffSampler"}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Setenv(EnvTracesSampler, tt.sampler) + t.Setenv(EnvTracesSamplerArg, tt.arg) + s := samplerFromEnv(nopLogger()) + assert.Assert(t, strings.Contains(s.Description(), tt.wantDescPrefix), + "expected sampler description containing %q, got %q", tt.wantDescPrefix, s.Description()) + }) + } +} + +func TestShutdownReturnsNilForPassthroughProvider(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "") + t.Setenv(EnvTracesSampler, "") + + tp := New(nopLogger()) + assert.NilError(t, tp.Shutdown(context.Background())) +} + +func TestShutdownIsIdempotent(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "http://localhost:4317") + t.Setenv(EnvTracesSampler, "always_on") + + tp := New(nopLogger()) + assert.NilError(t, tp.Shutdown(context.Background())) + assert.NilError(t, tp.Shutdown(context.Background()), "second shutdown must not panic or error") +} + +func TestShutdownOnProviderWithoutHookReturnsNil(t *testing.T) { + tp := &TracerProvider{} + assert.NilError(t, tp.Shutdown(context.Background())) +} + +func TestNewPreservesExistingProviderWhenOTelUnset(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "") + t.Setenv(EnvTracesSampler, "") + + existing := sdktrace.NewTracerProvider() + otel.SetTracerProvider(existing) + + _ = New(nopLogger()) + + got, ok := otel.GetTracerProvider().(*sdktrace.TracerProvider) + assert.Assert(t, ok, "global should remain the previously-set SDK provider") + assert.Equal(t, got, existing, "global should be the same instance as before") +} + +func TestGlobalIsNoop(t *testing.T) { + saveGlobalProvider(t) + + otel.SetTracerProvider(noop.NewTracerProvider()) + assert.Assert(t, globalIsNoop(), "bare noop provider should be detected") + + otel.SetTracerProvider(sdktrace.NewTracerProvider()) + assert.Assert(t, !globalIsNoop(), "bare SDK provider should not be detected as noop") + + knativeNoop, err := knativetracing.NewTracerProvider(context.Background(), knativetracing.Config{Protocol: knativetracing.ProtocolNone}) + assert.NilError(t, err) + otel.SetTracerProvider(knativeNoop) + assert.Assert(t, globalIsNoop(), "Knative wrapper around noop should be detected") + + knativeSDK := &knativetracing.TracerProvider{TracerProvider: sdktrace.NewTracerProvider()} + otel.SetTracerProvider(knativeSDK) + assert.Assert(t, !globalIsNoop(), "Knative wrapper around SDK provider should not be detected as noop") +} + +func TestNewWarnsWhenBothBackendsConfigured(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "http://localhost:4317") + t.Setenv(EnvTracesSampler, "always_on") + + otel.SetTracerProvider(sdktrace.NewTracerProvider()) + + core, observed := observer.New(zapcore.WarnLevel) + logger := zap.New(core).Sugar() + + _ = New(logger) + + matches := observed.FilterMessageSnippet("OpenTelemetry and Knative tracing both configured").All() + assert.Assert(t, len(matches) == 1, "should emit exactly one dual-config warning") +} + +func TestNewDoesNotWarnWhenOnlyOTelConfigured(t *testing.T) { + saveGlobalProvider(t) + t.Setenv(EnvOTLPEndpoint, "http://localhost:4317") + t.Setenv(EnvTracesSampler, "always_on") + + // Default Knative deployment: tracing-protocol "none" returns a noop-wrapped provider. + knativeNoop, err := knativetracing.NewTracerProvider(context.Background(), knativetracing.Config{Protocol: knativetracing.ProtocolNone}) + assert.NilError(t, err) + otel.SetTracerProvider(knativeNoop) + + core, observed := observer.New(zapcore.WarnLevel) + logger := zap.New(core).Sugar() + + _ = New(logger) + + matches := observed.FilterMessageSnippet("both configured").All() + assert.Equal(t, len(matches), 0, "should not warn when only OTel is configured") +}