diff --git a/pkg/cli/clickhouse_cmd.go b/pkg/cli/clickhouse_cmd.go index 00539dcc..1f2b080b 100644 --- a/pkg/cli/clickhouse_cmd.go +++ b/pkg/cli/clickhouse_cmd.go @@ -3,7 +3,9 @@ package cli import ( "bytes" "encoding/csv" + "errors" "fmt" + "strings" "github.com/spf13/cobra" ) @@ -76,7 +78,7 @@ func runClickHouseOperation(cmd *cobra.Command, operationID, datasource, sql str "sql": sql, }) if err != nil { - return err + return wrapClickHouseError(err) } if raw { @@ -125,6 +127,35 @@ func printClickHouseJSON(data []byte, raw bool) error { }) } +// clickHouseUnknownTableHint sits next to the datasource (rather than in the +// generic error helper) and replaces the misleading generic 404 hint that a +// ClickHouse unknown-table/database error would otherwise surface. +const clickHouseUnknownTableHint = "ClickHouse could not find that table. List real tables with 'panda schema '. " + + "OTel log tables must be fully qualified: use 'external.otel_logs' for hosted devnets/testnets, " + + "or 'otel.otel_logs' for local Kurtosis devnets." + +// wrapClickHouseError swaps the generic status hint for a table-specific hint +// when ClickHouse reports an unknown table/database. ClickHouse returns these +// as HTTP 404, whose generic hint would wrongly suggest the datasource itself +// is missing. +func wrapClickHouseError(err error) error { + var apiErr *apiError + if !errors.As(err, &apiErr) || !isUnknownClickHouseTableError(apiErr.Message) { + return err + } + + return fmt.Errorf("HTTP %d: %s\n\n hint: %s", apiErr.Status, apiErr.Message, clickHouseUnknownTableHint) +} + +func isUnknownClickHouseTableError(message string) bool { + lower := strings.ToLower(message) + + return strings.Contains(lower, "unknown table") || + strings.Contains(lower, "unknown_table") || + strings.Contains(lower, "unknown database") || + strings.Contains(lower, "unknown_database") +} + func parseClickHouseTSV(data []byte) ([]string, [][]string, error) { trimmed := bytes.TrimSpace(data) if len(trimmed) == 0 { diff --git a/pkg/cli/clickhouse_cmd_test.go b/pkg/cli/clickhouse_cmd_test.go new file mode 100644 index 00000000..8355008d --- /dev/null +++ b/pkg/cli/clickhouse_cmd_test.go @@ -0,0 +1,69 @@ +package cli + +import ( + "errors" + "strings" + "testing" +) + +func TestWrapClickHouseError(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + err error + wantHint bool + wantGeneric bool + }{ + { + name: "unknown table replaces generic 404 hint", + err: &apiError{ + Status: 404, + Message: "Code: 60. DB::Exception: Unknown table expression identifier 'otel_logs'. (UNKNOWN_TABLE)", + }, + wantHint: true, + wantGeneric: false, + }, + { + name: "unknown database gets the clickhouse hint", + err: &apiError{ + Status: 404, + Message: "Code: 81. DB::Exception: Database bogusdb does not exist. (UNKNOWN_DATABASE)", + }, + wantHint: true, + wantGeneric: false, + }, + { + name: "unknown datasource keeps the generic hint", + err: &apiError{ + Status: 404, + Message: `clickhouse datasource "nonexistent" not found`, + }, + wantHint: false, + wantGeneric: true, + }, + { + name: "non-apiError is returned untouched", + err: errors.New("connection refused"), + wantHint: false, + wantGeneric: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := wrapClickHouseError(tt.err).Error() + + if hasHint := strings.Contains(got, clickHouseUnknownTableHint); hasHint != tt.wantHint { + t.Errorf("clickhouse hint present = %v, want %v\nerror: %s", hasHint, tt.wantHint, got) + } + + genericHint := serverErrorHint(404, "") + if hasGeneric := strings.Contains(got, genericHint); hasGeneric != tt.wantGeneric { + t.Errorf("generic hint present = %v, want %v\nerror: %s", hasGeneric, tt.wantGeneric, got) + } + }) + } +} diff --git a/pkg/cli/dora.go b/pkg/cli/dora.go index 2f4f9c80..d1234c21 100644 --- a/pkg/cli/dora.go +++ b/pkg/cli/dora.go @@ -136,7 +136,19 @@ var doraSlotCmd = &cobra.Command{ var doraEpochCmd = &cobra.Command{ Use: "epoch ", Short: "Get epoch summary (always JSON)", - Args: cobra.ExactArgs(2), + Long: `Get the Dora summary for a single epoch (always JSON). + +The summary fields are nested under the top-level "data" key and use lowercase +names (e.g. data.finalized, data.globalparticipationrate, data.attestationscount). + +Query a COMPLETED epoch. A future epoch returns no data, and the head +(in-progress) epoch reports partial, artificially low participation because it +is still being filled (and can be null right at the epoch boundary). For a +reliable snapshot use head_epoch - 1. + +Examples: + panda dora epoch hoodi 100`, + Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { response, err := runServerOperationRaw(cmd, "dora.get_epoch", map[string]any{ "network": args[0], diff --git a/pkg/cli/ethnode.go b/pkg/cli/ethnode.go index 25a18a1d..bc63382a 100644 --- a/pkg/cli/ethnode.go +++ b/pkg/cli/ethnode.go @@ -16,8 +16,21 @@ var ethnodeCmd = &cobra.Command{ Long: `Direct access to Ethereum beacon and execution node APIs. Nodes are identified by network and instance name (e.g., "lighthouse-geth-1"). +Discovering instance names: + The proxy relays to any / host on demand and holds NO + enumerable instance list — there is no "list nodes" command, and the + -- convention is NOT guessable (a plausible name like + "teku-geth-1" may simply not exist, which surfaces as an HTTP 502). Get the + real names from a source that knows what nodes exist: + • Dora: panda dora ... (the /v1/clients/consensus endpoint lists CL nodes) + • OTel logs: panda clickhouse query clickhouse-raw \ + "SELECT DISTINCT ResourceAttributes['host.name'] FROM external.otel_logs \ + WHERE ResourceAttributes['network'] = '' \ + AND Timestamp >= now() - INTERVAL 1 HOUR" + Examples: panda ethnode list-datasources + panda ethnode list-networks panda ethnode syncing dencun-devnet-12 lighthouse-geth-1 panda ethnode peers dencun-devnet-12 lighthouse-geth-1 panda ethnode finality dencun-devnet-12 lighthouse-geth-1 @@ -29,6 +42,7 @@ func init() { ethnodeCmd.AddCommand( ethNodeListDatasourcesCmd, + ethNodeListNetworksCmd, ethNodeSyncingCmd, ethNodeVersionCmd, ethNodeHealthCmd, @@ -65,6 +79,24 @@ var ethNodeListDatasourcesCmd = &cobra.Command{ }, } +var ethNodeListNetworksCmd = &cobra.Command{ + Use: "list-networks", + Short: "List networks reachable for direct node access", + Long: `List the networks that support direct ethnode (beacon/execution) access. + +This lists NETWORKS, not node instances. Instance names cannot be enumerated — +see 'panda ethnode --help' for how to discover the node names within a network.`, + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, _ []string) error { + response, err := runServerOperation(cmd, "ethnode.list_networks", map[string]any{}) + if err != nil { + return err + } + + return printListing(response, "networks", "No networks reachable for direct node access.") + }, +} + var ethNodeSyncingCmd = &cobra.Command{ Use: "syncing ", Short: "Get beacon node sync status", @@ -309,6 +341,10 @@ var ethNodeBeaconGetCmd = &cobra.Command{ Long: `Make a GET request to any beacon API endpoint. The path should start with / (e.g., /eth/v1/node/identity). + must be a real node name — instances cannot be enumerated and the +client-pair convention is not guessable. An HTTP 502 usually means the instance +does not exist; see 'panda ethnode --help' for how to discover valid names. + Examples: panda ethnode beacon-get my-devnet lighthouse-geth-1 /eth/v1/node/identity panda ethnode beacon-get my-devnet lighthouse-geth-1 /eth/v1/config/deposit_contract`, diff --git a/pkg/cli/loki_cmd.go b/pkg/cli/loki_cmd.go index 9f2fbd1c..cbf6d6f3 100644 --- a/pkg/cli/loki_cmd.go +++ b/pkg/cli/loki_cmd.go @@ -2,13 +2,60 @@ package cli import ( "encoding/json" + "errors" "fmt" + "net/http" "strconv" + "strings" "time" "github.com/spf13/cobra" ) +// lokiRedirectSynopsis points users at the ClickHouse OTel log tables, which +// have replaced Loki for container logs. +const lokiRedirectSynopsis = `Loki is not available here. Container logs now live in ClickHouse — query them with 'panda clickhouse query': + + • Hosted (multi-VM) devnets/testnets → external.otel_logs (filter ResourceAttributes['network']) + • Local Kurtosis devnets → otel.otel_logs (filter EnclaveName) + +Qualify the table with its database — the example below targets a hosted cluster. +Severity lives in Body for these Docker logs (SeverityText is usually empty), so +match on Body. Example: + + panda clickhouse query clickhouse-raw "SELECT Timestamp, ResourceAttributes['host.name'] AS host, Body \ + FROM external.otel_logs \ + WHERE ResourceAttributes['network'] = '' \ + AND Timestamp >= now() - INTERVAL 1 HOUR \ + AND match(Body, '(?i)(crit|err|error|fatal)') \ + ORDER BY Timestamp DESC LIMIT 100"` + +// redirectLokiError swaps the generic status hint for the ClickHouse redirect +// synopsis when a Loki command failed because the Loki module is unavailable on +// this server. The module being disabled returns HTTP 404, whose generic hint +// ("check 'panda datasources'") is redundant once we point straight at the +// replacement datasource. +func redirectLokiError(err error) error { + var apiErr *apiError + if !errors.As(err, &apiErr) || !lokiUnavailable(apiErr) { + return err + } + + return fmt.Errorf("HTTP %d: %s\n\n%s", apiErr.Status, apiErr.Message, lokiRedirectSynopsis) +} + +// lokiUnavailable reports whether an API error means the Loki module/datasource +// is not served here (rather than a genuine query failure against a live Loki). +func lokiUnavailable(e *apiError) bool { + if e.Status == http.StatusNotFound { + return true + } + + lower := strings.ToLower(e.Message) + + return strings.Contains(lower, "not enabled") || strings.Contains(lower, "not available") +} + var ( lokiLimit int lokiStart string @@ -27,6 +74,8 @@ DEPRECATED: Loki is being retired for container logs. Query the ClickHouse OTel log tables instead via 'panda clickhouse query': • Hosted (multi-VM) devnets/testnets → external.otel_logs (filter ResourceAttributes['network']) • Local Kurtosis devnets → otel.otel_logs (filter EnclaveName) +If a command reports Loki is unavailable, the error output prints a ready-to-run +ClickHouse example. Examples: panda loki list-datasources @@ -77,7 +126,7 @@ var lokiListDatasourcesCmd = &cobra.Command{ RunE: func(cmd *cobra.Command, _ []string) error { response, err := runServerOperation(cmd, "loki.list_datasources", map[string]any{}) if err != nil { - return err + return redirectLokiError(err) } return printDatasourceList(response) @@ -98,7 +147,7 @@ var lokiQueryCmd = &cobra.Command{ "direction": lokiDirection, }) if err != nil { - return err + return redirectLokiError(err) } if isJSON() { @@ -122,7 +171,7 @@ var lokiQueryInstantCmd = &cobra.Command{ "direction": lokiDirection, }) if err != nil { - return err + return redirectLokiError(err) } if isJSON() { @@ -144,7 +193,7 @@ var lokiLabelsCmd = &cobra.Command{ "end": lokiEnd, }) if err != nil { - return err + return redirectLokiError(err) } if isJSON() { @@ -167,7 +216,7 @@ var lokiLabelValuesCmd = &cobra.Command{ "end": lokiEnd, }) if err != nil { - return err + return redirectLokiError(err) } if isJSON() { diff --git a/pkg/cli/loki_cmd_test.go b/pkg/cli/loki_cmd_test.go new file mode 100644 index 00000000..3b009b89 --- /dev/null +++ b/pkg/cli/loki_cmd_test.go @@ -0,0 +1,61 @@ +package cli + +import ( + "errors" + "strings" + "testing" +) + +func TestRedirectLokiError(t *testing.T) { + t.Parallel() + + genericHint := serverErrorHint(404, "") + + tests := []struct { + name string + err error + wantSynopsis bool + wantGeneric bool + }{ + { + name: "module not enabled (404) redirects and drops generic hint", + err: &apiError{Status: 404, Message: `module "loki" is not enabled`}, + wantSynopsis: true, + wantGeneric: false, + }, + { + name: "not available message redirects", + err: &apiError{Status: 503, Message: "loki datasource not available"}, + wantSynopsis: true, + wantGeneric: false, + }, + { + name: "genuine query failure against live loki is not redirected", + err: &apiError{Status: 400, Message: "parse error: unexpected token"}, + wantSynopsis: false, + wantGeneric: false, + }, + { + name: "non-apiError is returned untouched", + err: errors.New("connection refused"), + wantSynopsis: false, + wantGeneric: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + got := redirectLokiError(tt.err).Error() + + if hasSynopsis := strings.Contains(got, lokiRedirectSynopsis); hasSynopsis != tt.wantSynopsis { + t.Errorf("synopsis present = %v, want %v\nerror: %s", hasSynopsis, tt.wantSynopsis, got) + } + + if hasGeneric := strings.Contains(got, genericHint); hasGeneric != tt.wantGeneric { + t.Errorf("generic hint present = %v, want %v\nerror: %s", hasGeneric, tt.wantGeneric, got) + } + }) + } +} diff --git a/pkg/cli/server_helpers.go b/pkg/cli/server_helpers.go index cb1de2d2..a6c60cf9 100644 --- a/pkg/cli/server_helpers.go +++ b/pkg/cli/server_helpers.go @@ -432,6 +432,23 @@ func getBuildStatus(ctx context.Context, runID int64) (*serverapi.BuildStatusRes return &response, nil } +// apiError is a non-2xx response from the server. It exposes the status and +// message so a command can attach a datasource-specific hint in place of the +// generic status hint — e.g. a ClickHouse unknown-table error arrives as a 404 +// whose generic hint would wrongly point at a missing datasource. +type apiError struct { + Status int + Message string +} + +func (e *apiError) Error() string { + if hint := serverErrorHint(e.Status, e.Message); hint != "" { + return fmt.Sprintf("HTTP %d: %s\n\n hint: %s", e.Status, e.Message, hint) + } + + return fmt.Sprintf("HTTP %d: %s", e.Status, e.Message) +} + func decodeAPIError(status int, data []byte) error { var message string @@ -446,12 +463,7 @@ func decodeAPIError(status int, data []byte) error { message = strings.TrimSpace(string(data)) } - hint := serverErrorHint(status, message) - if hint != "" { - return fmt.Errorf("HTTP %d: %s\n\n hint: %s", status, message, hint) - } - - return fmt.Errorf("HTTP %d: %s", status, message) + return &apiError{Status: status, Message: message} } func serverErrorHint(status int, _ string) string {