diff --git a/chains/cosmos/runner/runner.go b/chains/cosmos/runner/runner.go index 30110f4..e176a4d 100644 --- a/chains/cosmos/runner/runner.go +++ b/chains/cosmos/runner/runner.go @@ -189,7 +189,11 @@ func NewRunner(ctx context.Context, spec loadtesttypes.LoadTestSpec) (*Runner, e } if spec.Relay != nil { - relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID) + var relayMetrics *iftrelayer.Metrics + if spec.MetricsEnabled { + relayMetrics = iftrelayer.NewMetrics() + } + relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID, relayMetrics) if err != nil { return nil, fmt.Errorf("create relayer client: %w", err) } diff --git a/chains/ethereum/runner/runner.go b/chains/ethereum/runner/runner.go index 2d14cd0..ac30e2e 100644 --- a/chains/ethereum/runner/runner.go +++ b/chains/ethereum/runner/runner.go @@ -178,7 +178,11 @@ func NewRunner(ctx context.Context, logger *zap.Logger, spec loadtesttypes.LoadT } if spec.Relay != nil { - relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID) + var relayMetrics *iftrelayer.Metrics + if spec.MetricsEnabled { + relayMetrics = iftrelayer.NewMetrics() + } + relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID, relayMetrics) if err != nil { return nil, fmt.Errorf("create relayer client: %w", err) } diff --git a/ift/relayer/client.go b/ift/relayer/client.go index d5c37dd..2d7304b 100644 --- a/ift/relayer/client.go +++ b/ift/relayer/client.go @@ -26,9 +26,10 @@ type GRPCClient struct { client relayerapi.RelayerApiServiceClient chainID string timeout time.Duration + metrics *Metrics } -func NewGRPCClient(cfg loadtesttypes.RelayConfig, chainID string) (*GRPCClient, error) { +func NewGRPCClient(cfg loadtesttypes.RelayConfig, chainID string, metrics *Metrics) (*GRPCClient, error) { timeout := cfg.Timeout if timeout == 0 { timeout = 10 * time.Second @@ -47,6 +48,7 @@ func NewGRPCClient(cfg loadtesttypes.RelayConfig, chainID string) (*GRPCClient, client: relayerapi.NewRelayerApiServiceClient(conn), chainID: chainID, timeout: timeout, + metrics: metrics, }, nil } @@ -58,24 +60,37 @@ func (c *GRPCClient) SubmitTxHash(ctx context.Context, txHash string) error { select { case <-ctx.Done(): timer.Stop() + if c.metrics != nil { + c.metrics.Failure.WithLabelValues(c.chainID).Inc() + } return ctx.Err() case <-timer.C: } } callCtx, cancel := context.WithTimeout(ctx, c.timeout) + start := time.Now() _, err := c.client.Relay(callCtx, &relayerapi.RelayRequest{ TxHash: txHash, ChainId: c.chainID, }) cancel() + if c.metrics != nil { + c.metrics.Duration.WithLabelValues(c.chainID).Observe(time.Since(start).Seconds()) + } if err == nil { + if c.metrics != nil { + c.metrics.Success.WithLabelValues(c.chainID).Inc() + } return nil } lastErr = err } + if c.metrics != nil { + c.metrics.Failure.WithLabelValues(c.chainID).Inc() + } return fmt.Errorf("submit tx hash to relayer after %d attempts: %w", maxRelayRetries, lastErr) } diff --git a/ift/relayer/metrics.go b/ift/relayer/metrics.go new file mode 100644 index 0000000..2ec83c7 --- /dev/null +++ b/ift/relayer/metrics.go @@ -0,0 +1,42 @@ +package relayer + +import "github.com/prometheus/client_golang/prometheus" + +const ( + promNamespace = "catalyst" + promSubsystem = "relay" +) + +type Metrics struct { + Success *prometheus.CounterVec + Failure *prometheus.CounterVec + Duration *prometheus.HistogramVec +} + +// NewMetrics constructs and registers the relay metric vectors. Pass the +// result to NewGRPCClient; pass nil to disable instrumentation entirely. +func NewMetrics() *Metrics { + m := &Metrics{ + Success: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: promNamespace, + Subsystem: promSubsystem, + Name: "success_total", + Help: "Tx hashes successfully submitted to the relayer (terminal, per submission).", + }, []string{"chain_id"}), + Failure: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: promNamespace, + Subsystem: promSubsystem, + Name: "failure_total", + Help: "Tx hashes that failed to be submitted to the relayer after all retries.", + }, []string{"chain_id"}), + Duration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: promNamespace, + Subsystem: promSubsystem, + Name: "duration_seconds", + Help: "Duration of a single gRPC relay request (per-attempt, not including retry backoff).", + Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10}, + }, []string{"chain_id"}), + } + prometheus.MustRegister(m.Success, m.Failure, m.Duration) + return m +}