Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion chains/cosmos/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,11 @@ func NewRunner(ctx context.Context, spec loadtesttypes.LoadTestSpec) (*Runner, e
}

if spec.Relay != nil {
relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID)
var relayMetrics *iftrelayer.Metrics
if spec.MetricsEnabled {
relayMetrics = iftrelayer.NewMetrics()
}
relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID, relayMetrics)
if err != nil {
return nil, fmt.Errorf("create relayer client: %w", err)
}
Expand Down
6 changes: 5 additions & 1 deletion chains/ethereum/runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,11 @@ func NewRunner(ctx context.Context, logger *zap.Logger, spec loadtesttypes.LoadT
}

if spec.Relay != nil {
relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID)
var relayMetrics *iftrelayer.Metrics
if spec.MetricsEnabled {
relayMetrics = iftrelayer.NewMetrics()
}
relayerClient, err := iftrelayer.NewGRPCClient(*spec.Relay, spec.ChainID, relayMetrics)
if err != nil {
return nil, fmt.Errorf("create relayer client: %w", err)
}
Expand Down
17 changes: 16 additions & 1 deletion ift/relayer/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,10 @@ type GRPCClient struct {
client relayerapi.RelayerApiServiceClient
chainID string
timeout time.Duration
metrics *Metrics
}

func NewGRPCClient(cfg loadtesttypes.RelayConfig, chainID string) (*GRPCClient, error) {
func NewGRPCClient(cfg loadtesttypes.RelayConfig, chainID string, metrics *Metrics) (*GRPCClient, error) {
timeout := cfg.Timeout
if timeout == 0 {
timeout = 10 * time.Second
Expand All @@ -47,6 +48,7 @@ func NewGRPCClient(cfg loadtesttypes.RelayConfig, chainID string) (*GRPCClient,
client: relayerapi.NewRelayerApiServiceClient(conn),
chainID: chainID,
timeout: timeout,
metrics: metrics,
}, nil
}

Expand All @@ -58,24 +60,37 @@ func (c *GRPCClient) SubmitTxHash(ctx context.Context, txHash string) error {
select {
case <-ctx.Done():
timer.Stop()
if c.metrics != nil {
c.metrics.Failure.WithLabelValues(c.chainID).Inc()
}
return ctx.Err()
case <-timer.C:
}
}

callCtx, cancel := context.WithTimeout(ctx, c.timeout)
start := time.Now()
_, err := c.client.Relay(callCtx, &relayerapi.RelayRequest{
TxHash: txHash,
ChainId: c.chainID,
})
cancel()
if c.metrics != nil {
c.metrics.Duration.WithLabelValues(c.chainID).Observe(time.Since(start).Seconds())
}

if err == nil {
if c.metrics != nil {
c.metrics.Success.WithLabelValues(c.chainID).Inc()
}
return nil
}
lastErr = err
}

if c.metrics != nil {
c.metrics.Failure.WithLabelValues(c.chainID).Inc()
}
return fmt.Errorf("submit tx hash to relayer after %d attempts: %w", maxRelayRetries, lastErr)
}

Expand Down
42 changes: 42 additions & 0 deletions ift/relayer/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package relayer

import "github.com/prometheus/client_golang/prometheus"

const (
promNamespace = "catalyst"
promSubsystem = "relay"
)

type Metrics struct {
Success *prometheus.CounterVec
Failure *prometheus.CounterVec
Duration *prometheus.HistogramVec
}

// NewMetrics constructs and registers the relay metric vectors. Pass the
// result to NewGRPCClient; pass nil to disable instrumentation entirely.
func NewMetrics() *Metrics {
m := &Metrics{
Success: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: promNamespace,
Subsystem: promSubsystem,
Name: "success_total",
Help: "Tx hashes successfully submitted to the relayer (terminal, per submission).",
}, []string{"chain_id"}),
Failure: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: promNamespace,
Subsystem: promSubsystem,
Name: "failure_total",
Help: "Tx hashes that failed to be submitted to the relayer after all retries.",
}, []string{"chain_id"}),
Duration: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: promNamespace,
Subsystem: promSubsystem,
Name: "duration_seconds",
Help: "Duration of a single gRPC relay request (per-attempt, not including retry backoff).",
Buckets: []float64{0.01, 0.05, 0.1, 0.25, 0.5, 1, 2, 5, 10},
}, []string{"chain_id"}),
}
prometheus.MustRegister(m.Success, m.Failure, m.Duration)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 MustRegister panics on duplicate registration

prometheus.MustRegister panics if the same metric name is registered twice in the default global registry. If NewMetrics() is called more than once in the same process — for example, in integration tests that instantiate a runner for each test case — the second call will panic. Consider using prometheus.Register with an already-registered check, or accepting a prometheus.Registerer as a parameter so callers can use isolated registries for tests.

return m
}
Loading