diff --git a/docs/partials/metrics.md b/docs/partials/metrics.md index 0111c106..0b0e95ee 100644 --- a/docs/partials/metrics.md +++ b/docs/partials/metrics.md @@ -241,6 +241,36 @@ github_runner_repo_busy{owner, id, name, os, status} github_runner_repo_online{owner, id, name, os, status} : Static metrics of runner is online or not +github_status_actions_up{} +: Current health status of Actions on githubstatus.com + +github_status_api_requests_up{} +: Current health status of API Requests on githubstatus.com + +github_status_codespaces_up{} +: Current health status of Codespaces on githubstatus.com + +github_status_copilot_up{} +: Current health status of Copilot on githubstatus.com + +github_status_git_operations_up{} +: Current health status of Git Operations on githubstatus.com + +github_status_issues_up{} +: Current health status of Issues on githubstatus.com + +github_status_packages_up{} +: Current health status of Packages on githubstatus.com + +github_status_pages_up{} +: Current health status of Pages on githubstatus.com + +github_status_pull_requests_up{} +: Current health status of Pull Requests on githubstatus.com + +github_status_webhooks_up{} +: Current health status of Webhooks on githubstatus.com + github_workflow_job_created_timestamp{owner, repo, name, title, branch, sha, identifier, run_id, run_attempt, labels, runner_id, runner_name, runner_group_id, runner_group_name, workflow_name, conclusion} : Timestamp when the workflow job have been created diff --git a/hack/generate-metrics-docs.go b/hack/generate-metrics-docs.go index 168bdb06..a119efb1 100644 --- a/hack/generate-metrics-docs.go +++ b/hack/generate-metrics-docs.go @@ -65,6 +65,11 @@ func main() { exporter.NewWorkflowJobCollector(slog.Default(), nil, nil, nil, nil, cfg).Metrics()..., ) + collectors = append( + collectors, + exporter.NewStatusCollector(slog.Default(), nil, nil, nil, nil, cfg).Metrics()..., + ) + metrics := make([]metric, 0) metrics = append(metrics, metric{ diff --git a/pkg/action/server.go b/pkg/action/server.go index 63a314b6..d8ca7693 100644 --- a/pkg/action/server.go +++ b/pkg/action/server.go @@ -201,6 +201,19 @@ func handler(cfg *config.Config, db store.Store, logger *slog.Logger, client *gi )) } + if cfg.Collector.Status { + logger.Debug("Status collector registered") + + registry.MustRegister(exporter.NewStatusCollector( + logger, + client, + db, + requestFailures, + requestDuration, + cfg.Target, + )) + } + reg := promhttp.HandlerFor( registry, promhttp.HandlerOpts{ diff --git a/pkg/command/command.go b/pkg/command/command.go index d0cd021b..7bba2dd1 100644 --- a/pkg/command/command.go +++ b/pkg/command/command.go @@ -363,6 +363,13 @@ func RootFlags(cfg *config.Config) []cli.Flag { Sources: cli.EnvVars("GITHUB_EXPORTER_COLLECTOR_RUNNERS"), Destination: &cfg.Collector.Runners, }, + &cli.BoolFlag{ + Name: "collector.status", + Value: false, + Usage: "Enable collector for github.com service status", + Sources: cli.EnvVars("GITHUB_EXPORTER_COLLECTOR_STATUS"), + Destination: &cfg.Collector.Status, + }, &cli.StringSliceFlag{ Name: "collector.runners.labels", Value: config.RunnerLabels(), diff --git a/pkg/config/config.go b/pkg/config/config.go index 18f47abe..fd3826ed 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -75,6 +75,7 @@ type Collector struct { WorkflowRuns bool WorkflowJobs bool Runners bool + Status bool } // Database defines the database specific configuration. diff --git a/pkg/exporter/status.go b/pkg/exporter/status.go new file mode 100644 index 00000000..a92580f4 --- /dev/null +++ b/pkg/exporter/status.go @@ -0,0 +1,300 @@ +package exporter + +import ( + "encoding/json" + "io" + "log/slog" + "net/http" + "strings" + "time" + + "github.com/google/go-github/v74/github" + "github.com/prometheus/client_golang/prometheus" + "github.com/promhippie/github_exporter/pkg/config" + "github.com/promhippie/github_exporter/pkg/store" +) + +// statusComponent represents a GitHub.com service shown on githubstatus.com. +type statusComponent string + +const ( + compGitOperations statusComponent = "Git Operations" + compWebhooks statusComponent = "Webhooks" + compAPIRequests statusComponent = "API Requests" + compIssues statusComponent = "Issues" + compPullRequests statusComponent = "Pull Requests" + compActions statusComponent = "Actions" + compPackages statusComponent = "Packages" + compPages statusComponent = "Pages" + compCodespaces statusComponent = "Codespaces" + compCopilot statusComponent = "Copilot" +) + +// statusComponents defines the ordered list of services we expose as gauges. +var statusComponents = []statusComponent{ + compGitOperations, + compWebhooks, + compAPIRequests, + compIssues, + compPullRequests, + compActions, + compPackages, + compPages, + compCodespaces, + compCopilot, +} + +func isStatusComponent(name string) bool { + trimmed := strings.TrimSpace(name) + for _, c := range statusComponents { + if string(c) == trimmed { + return true + } + } + return false +} + +// StatusCollector exposes gauges for GitHub component status. +type StatusCollector struct { + client *github.Client + logger *slog.Logger + db store.Store + failures *prometheus.CounterVec + duration *prometheus.HistogramVec + config config.Target + + GitOperationsUp *prometheus.Desc + WebhooksUp *prometheus.Desc + APIRequestsUp *prometheus.Desc + IssuesUp *prometheus.Desc + PullRequestsUp *prometheus.Desc + ActionsUp *prometheus.Desc + PackagesUp *prometheus.Desc + PagesUp *prometheus.Desc + CodespacesUp *prometheus.Desc + CopilotUp *prometheus.Desc +} + +// NewStatusCollector returns a new StatusCollector with metric descriptors only. +func NewStatusCollector(logger *slog.Logger, client *github.Client, db store.Store, failures *prometheus.CounterVec, duration *prometheus.HistogramVec, cfg config.Target) *StatusCollector { + if failures != nil { + failures.WithLabelValues("status").Add(0) + } + + labels := []string{} + return &StatusCollector{ + client: client, + logger: logger.With("collector", "status"), + db: db, + failures: failures, + duration: duration, + config: cfg, + + GitOperationsUp: prometheus.NewDesc( + "github_status_git_operations_up", + "Current health status of Git Operations on githubstatus.com", + labels, + nil, + ), + WebhooksUp: prometheus.NewDesc( + "github_status_webhooks_up", + "Current health status of Webhooks on githubstatus.com", + labels, + nil, + ), + APIRequestsUp: prometheus.NewDesc( + "github_status_api_requests_up", + "Current health status of API Requests on githubstatus.com", + labels, + nil, + ), + IssuesUp: prometheus.NewDesc( + "github_status_issues_up", + "Current health status of Issues on githubstatus.com", + labels, + nil, + ), + PullRequestsUp: prometheus.NewDesc( + "github_status_pull_requests_up", + "Current health status of Pull Requests on githubstatus.com", + labels, + nil, + ), + ActionsUp: prometheus.NewDesc( + "github_status_actions_up", + "Current health status of Actions on githubstatus.com", + labels, + nil, + ), + PackagesUp: prometheus.NewDesc( + "github_status_packages_up", + "Current health status of Packages on githubstatus.com", + labels, + nil, + ), + PagesUp: prometheus.NewDesc( + "github_status_pages_up", + "Current health status of Pages on githubstatus.com", + labels, + nil, + ), + CodespacesUp: prometheus.NewDesc( + "github_status_codespaces_up", + "Current health status of Codespaces on githubstatus.com", + labels, + nil, + ), + CopilotUp: prometheus.NewDesc( + "github_status_copilot_up", + "Current health status of Copilot on githubstatus.com", + labels, + nil, + ), + } +} + +// Metrics returns descriptors for documentation generation. +func (c *StatusCollector) Metrics() []*prometheus.Desc { + return []*prometheus.Desc{ + c.GitOperationsUp, + c.WebhooksUp, + c.APIRequestsUp, + c.IssuesUp, + c.PullRequestsUp, + c.ActionsUp, + c.PackagesUp, + c.PagesUp, + c.CodespacesUp, + c.CopilotUp, + } +} + +// Describe sends all possible descriptors. +func (c *StatusCollector) Describe(ch chan<- *prometheus.Desc) { + ch <- c.GitOperationsUp + ch <- c.WebhooksUp + ch <- c.APIRequestsUp + ch <- c.IssuesUp + ch <- c.PullRequestsUp + ch <- c.ActionsUp + ch <- c.PackagesUp + ch <- c.PagesUp + ch <- c.CodespacesUp + ch <- c.CopilotUp +} + +// Collect gathers component status metrics from githubstatus.com and sends them +// to the provided channel. +func (c *StatusCollector) Collect(ch chan<- prometheus.Metric) { + // Perform a single scrape of the status JSON and populate all gauges. + // Treat "operational" as up (1), everything else as down (0). + client := &http.Client{Timeout: c.config.Timeout} + + now := time.Now() + req, err := http.NewRequest("GET", "https://www.githubstatus.com/api/v2/summary.json", nil) + if err != nil { + c.logger.Error("Failed to build status summary request", "err", err) + if c.failures != nil { + c.failures.WithLabelValues("status").Inc() + } + return + } + + resp, err := client.Do(req) + if err != nil { + c.logger.Error("Failed to fetch status summary", "err", err) + if c.failures != nil { + c.failures.WithLabelValues("status").Inc() + } + return + } + defer func() { _ = resp.Body.Close() }() + + if c.duration != nil { + c.duration.WithLabelValues("status").Observe(time.Since(now).Seconds()) + } + + bodyBytes, err := io.ReadAll(resp.Body) + if err != nil { + c.logger.Error("Failed to read status summary", "err", err) + if c.failures != nil { + c.failures.WithLabelValues("status").Inc() + } + return + } + + statuses := extractStatusFromJSON(bodyBytes) + + // Prepare the set of components to check → metric descriptors mapping. + components := []struct { + name statusComponent + desc *prometheus.Desc + }{ + {compGitOperations, c.GitOperationsUp}, + {compWebhooks, c.WebhooksUp}, + {compAPIRequests, c.APIRequestsUp}, + {compIssues, c.IssuesUp}, + {compPullRequests, c.PullRequestsUp}, + {compActions, c.ActionsUp}, + {compPackages, c.PackagesUp}, + {compPages, c.PagesUp}, + {compCodespaces, c.CodespacesUp}, + {compCopilot, c.CopilotUp}, + } + + // No labels for these metrics. + labels := []string{} + + // Emit metrics for each component. + for _, comp := range components { + var up float64 + if ok, exists := statuses[string(comp.name)]; exists { + if ok { + up = 1.0 + } else { + up = 0.0 + } + } else { + // If not found at all, consider as down and log for visibility. + c.logger.Warn("Component status not found in status summary", "component", comp.name) + up = 0.0 + } + c.logger.Debug("Component status scraped", "component", string(comp.name), "up", up) + ch <- prometheus.MustNewConstMetric( + comp.desc, + prometheus.GaugeValue, + up, + labels..., + ) + } +} + +// extractStatusFromJSON parses the GitHub Status summary JSON and returns +// a map of component name -> up (true if operational). +func extractStatusFromJSON(data []byte) map[string]bool { + type component struct { + Name string `json:"name"` + Status string `json:"status"` + } + type summary struct { + Components []component `json:"components"` + } + + result := make(map[string]bool) + + var s summary + if err := json.Unmarshal(data, &s); err != nil { + return result + } + + for _, c := range s.Components { + name := strings.TrimSpace(c.Name) + if name == "" || !isStatusComponent(name) { + continue + } + statusText := strings.ToLower(strings.TrimSpace(c.Status)) + result[name] = statusText == "operational" + } + + return result +} diff --git a/pkg/exporter/status_test.go b/pkg/exporter/status_test.go new file mode 100644 index 00000000..8f9b0a45 --- /dev/null +++ b/pkg/exporter/status_test.go @@ -0,0 +1,85 @@ +package exporter + +import ( + "testing" +) + +func TestExtractStatusFromJSON_Basic(t *testing.T) { + data := []byte(`{ + "components": [ + {"name": "Git Operations", "status": "operational"}, + {"name": "Webhooks", "status": "partial_outage"} + ] + }`) + + got := extractStatusFromJSON(data) + + if up, ok := got["Git Operations"]; !ok || !up { + t.Fatalf("expected Git Operations to be up, got ok=%v up=%v", ok, up) + } + if up, ok := got["Webhooks"]; !ok || up { + t.Fatalf("expected Webhooks to be down, got ok=%v up=%v", ok, up) + } +} + +func TestExtractStatusFromJSON_NonOperationalVariants(t *testing.T) { + data := []byte(`{ + "components": [ + {"name": "Issues", "status": "degraded_performance"}, + {"name": "Pages", "status": "partial_outage"}, + {"name": "Copilot", "status": "major_outage"}, + {"name": "Actions", "status": "maintenance"} + ] + }`) + + got := extractStatusFromJSON(data) + + for _, name := range []string{"Issues", "Pages", "Copilot", "Actions"} { + up, ok := got[name] + if !ok { + t.Fatalf("expected known component %q to be present", name) + } + if up { + t.Fatalf("expected %s to be down for non-operational status", name) + } + } +} + +func TestExtractStatusFromJSON_UnknownComponentsIgnored(t *testing.T) { + data := []byte(`{ + "components": [ + {"name": "Not A Known Component", "status": "operational"}, + {"name": "API Requests", "status": "operational"} + ] + }`) + + got := extractStatusFromJSON(data) + + if _, ok := got["Not A Known Component"]; ok { + t.Fatalf("expected unknown component to be ignored (not present)") + } + if up, ok := got["API Requests"]; !ok || !up { + t.Fatalf("expected known component 'API Requests' to be up and present, got ok=%v up=%v", ok, up) + } +} + +func TestExtractStatusFromJSON_TrimsNameAndStatus(t *testing.T) { + data := []byte(`{ + "components": [ + {"name": " Git Operations ", "status": " operational "} + ] + }`) + + got := extractStatusFromJSON(data) + if up, ok := got["Git Operations"]; !ok || !up { + t.Fatalf("expected trimmed name/status to be recognized as up, got ok=%v up=%v", ok, up) + } +} + +func TestExtractStatusFromJSON_Empty(t *testing.T) { + data := []byte(`{"components": []}`) + got := extractStatusFromJSON(data) + if len(got) != 0 { + t.Fatalf("expected empty result when components list is empty, got len=%d", len(got)) + } +}