From 915759e077d143341ff29bbcb855443c6662b995 Mon Sep 17 00:00:00 2001 From: YiqinZhang Date: Wed, 11 Mar 2026 17:03:23 -0400 Subject: [PATCH 1/2] prioritize saas slack channel --- pkg/common/config/config.go | 4 ++-- pkg/e2e/e2e.go | 26 +++++++++++++++++--------- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/pkg/common/config/config.go b/pkg/common/config/config.go index 5ddd35ebae..c3063ab587 100644 --- a/pkg/common/config/config.go +++ b/pkg/common/config/config.go @@ -14,7 +14,7 @@ import ( ) // channel id for #hcm-cicd-notifications -const defaultNotificationsChannel = "C06HQR8HN0L" +const DefaultNotificationsChannel = "C06HQR8HN0L" type Secret struct { FileLocation string @@ -949,7 +949,7 @@ func InitOSDe2eViper() { _ = viper.BindEnv(LogAnalysis.SlackWebhook, "LOG_ANALYSIS_SLACK_WEBHOOK") RegisterSecret(LogAnalysis.SlackWebhook, "notifier_slack_webhook") - viper.SetDefault(LogAnalysis.SlackChannel, defaultNotificationsChannel) + viper.SetDefault(LogAnalysis.SlackChannel, DefaultNotificationsChannel) _ = viper.BindEnv(LogAnalysis.SlackChannel, "LOG_ANALYSIS_SLACK_CHANNEL") // ----- KrknAI Configuration ----- diff --git a/pkg/e2e/e2e.go b/pkg/e2e/e2e.go index 2e2593971d..56039ecf53 100644 --- a/pkg/e2e/e2e.go +++ b/pkg/e2e/e2e.go @@ -276,14 +276,13 @@ func (o *E2EOrchestrator) Report(ctx context.Context) error { } } - // Send failure notification (with or without LLM analysis) - if o.result.ExitCode != config.Success && viper.GetBool(config.Tests.EnableSlackNotify) { + // Send deferred per-suite notifications first; fall back to global notification + // when no per-suite notifications were sent + deferredSent := o.sendDeferredNotifications(ctx) + if !deferredSent && o.result.ExitCode != config.Success && viper.GetBool(config.Tests.EnableSlackNotify) { o.sendFailureNotification(ctx) } - // Send deferred ad-hoc test suite notifications (with S3 URLs) - o.sendDeferredNotifications(ctx) - runner.ReportClusterInstallLogs(o.provider) return nil } @@ -294,9 +293,13 @@ func (o *E2EOrchestrator) Report(ctx context.Context) error { // presigned URLs are available. func (o *E2EOrchestrator) sendFailureNotification(ctx context.Context) { reportDir := viper.GetString(config.ReportDir) + channel := viper.GetString(config.Tests.SlackChannel) + if ch := viper.GetString(config.LogAnalysis.SlackChannel); ch != "" && ch != config.DefaultNotificationsChannel { + channel = ch + } notificationConfig := slack.BuildNotificationConfig( viper.GetString(config.LogAnalysis.SlackWebhook), - viper.GetString(config.LogAnalysis.SlackChannel), + channel, &slack.ClusterInfo{ ID: viper.GetString(config.Cluster.ID), Name: viper.GetString(config.Cluster.Name), @@ -345,17 +348,19 @@ func (o *E2EOrchestrator) sendFailureNotification(ctx context.Context) { // sendDeferredNotifications delivers Slack notifications that were queued by // adhoctestimages during test execution. Called by Report after S3 upload so // that presigned URLs are available for inclusion in the message. -func (o *E2EOrchestrator) sendDeferredNotifications(ctx context.Context) { +// Returns true if at least one notification was sent. +func (o *E2EOrchestrator) sendDeferredNotifications(ctx context.Context) bool { pending := adhoctestimages.DrainPendingNotifications() if len(pending) == 0 { - return + return false } webhook := viper.GetString(config.LogAnalysis.SlackWebhook) if webhook == "" || !viper.GetBool(config.Tests.EnableSlackNotify) { - return + return false } + sent := false artifactLinks := s3ResultsToArtifactLinks(o.s3Results) slackReporter := slack.NewSlackReporter() @@ -385,8 +390,11 @@ func (o *E2EOrchestrator) sendDeferredNotifications(ctx context.Context) { if err := slackReporter.Report(ctx, result, &cfg); err != nil { log.Printf("Failed to send deferred notification for %s: %v", p.TestSuite.Image, err) + } else { + sent = true } } + return sent } // uploadToS3 uploads the report directory contents to S3 and caches results. From c1963c5ac364d5e71e2d1776cec2406eec1fd600 Mon Sep 17 00:00:00 2001 From: YiqinZhang Date: Thu, 12 Mar 2026 12:39:59 -0400 Subject: [PATCH 2/2] decouple queueNotification from EnableAnalysis --- pkg/common/config/config.go | 4 +- pkg/e2e/adhoctestimages/adhoctestimages.go | 51 ++++++++++++++-------- pkg/e2e/e2e.go | 36 +++++---------- 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/pkg/common/config/config.go b/pkg/common/config/config.go index c3063ab587..5ddd35ebae 100644 --- a/pkg/common/config/config.go +++ b/pkg/common/config/config.go @@ -14,7 +14,7 @@ import ( ) // channel id for #hcm-cicd-notifications -const DefaultNotificationsChannel = "C06HQR8HN0L" +const defaultNotificationsChannel = "C06HQR8HN0L" type Secret struct { FileLocation string @@ -949,7 +949,7 @@ func InitOSDe2eViper() { _ = viper.BindEnv(LogAnalysis.SlackWebhook, "LOG_ANALYSIS_SLACK_WEBHOOK") RegisterSecret(LogAnalysis.SlackWebhook, "notifier_slack_webhook") - viper.SetDefault(LogAnalysis.SlackChannel, DefaultNotificationsChannel) + viper.SetDefault(LogAnalysis.SlackChannel, defaultNotificationsChannel) _ = viper.BindEnv(LogAnalysis.SlackChannel, "LOG_ANALYSIS_SLACK_CHANNEL") // ----- KrknAI Configuration ----- diff --git a/pkg/e2e/adhoctestimages/adhoctestimages.go b/pkg/e2e/adhoctestimages/adhoctestimages.go index 0d181f3e9c..01b0679a30 100644 --- a/pkg/e2e/adhoctestimages/adhoctestimages.go +++ b/pkg/e2e/adhoctestimages/adhoctestimages.go @@ -129,19 +129,43 @@ var _ = ginkgo.Describe("Ad Hoc Test Images", ginkgo.Ordered, ginkgo.ContinueOnF } } - if len(allFailures) > 0 && viper.GetBool(config.LogAnalysis.EnableAnalysis) { + if len(allFailures) > 0 { combinedErr := fmt.Errorf("failures in %s: %s", testImage, strings.Join(allFailures, "; ")) - runLogAnalysisForAdHocTestImage(ctx, logger, testSuite, combinedErr, exeConfig.OutputDir) + var analysisContent string + if viper.GetBool(config.LogAnalysis.EnableAnalysis) { + analysisContent = runLogAnalysisForAdHocTestImage(ctx, logger, testSuite, combinedErr, exeConfig.OutputDir) + } + queueNotification(testSuite, analysisContent) } }, testImageEntries) }) -// runLogAnalysisForAdHocTestImage runs AI analysis and queues the result for -// deferred Slack delivery. The engine runs without sending notifications because -// S3 artifacts have not been uploaded yet at this point. The queued result is -// later sent by Report after S3 upload populates presigned URLs. -func runLogAnalysisForAdHocTestImage(ctx context.Context, logger logr.Logger, testSuite config.TestSuite, err error, artifactsDir string) { +// queueNotification adds a PendingNotification for deferred Slack delivery. +// Called directly when log analysis is disabled so notifications are still sent. +func queueNotification(testSuite config.TestSuite, analysisContent string) { + clusterInfo := &analysisengine.ClusterInfo{ + ID: viper.GetString(config.Cluster.ID), + Name: viper.GetString(config.Cluster.Name), + Provider: viper.GetString(config.Provider), + Region: viper.GetString(config.CloudProvider.Region), + CloudProvider: viper.GetString(config.CloudProvider.CloudProviderID), + Version: viper.GetString(config.Cluster.Version), + } + pendingMu.Lock() + pendingNotifications = append(pendingNotifications, PendingNotification{ + AnalysisContent: analysisContent, + TestSuite: testSuite, + ClusterInfo: clusterInfo, + Env: viper.GetString(ocmprovider.Env), + }) + pendingMu.Unlock() +} + +// runLogAnalysisForAdHocTestImage runs AI analysis and returns the result content. +// Returns an empty string if analysis fails. The caller is responsible for +// queuing the notification via queueNotification. +func runLogAnalysisForAdHocTestImage(ctx context.Context, logger logr.Logger, testSuite config.TestSuite, err error, artifactsDir string) string { logger.Info("Running Log analysis for test image", "image", testSuite.Image, "slackChannel", testSuite.SlackChannel) clusterInfo := &analysisengine.ClusterInfo{ @@ -166,24 +190,17 @@ func runLogAnalysisForAdHocTestImage(ctx context.Context, logger logr.Logger, te engine, err := analysisengine.New(ctx, engineConfig) if err != nil { logger.Error(err, "Unable to create analysis engine for image", "image", testSuite.Image) - return + return "" } result, runErr := engine.Run(ctx) if runErr != nil { logger.Error(runErr, "Log analysis failed for image", "image", testSuite.Image) - return + return "" } logger.Info("Log analysis completed successfully", "image", testSuite.Image, "resultsDir", fmt.Sprintf("%s/%s/", artifactsDir, analysisengine.AnalysisDirName)) log.Printf("=== Log Analysis Result for %s ===\n%s", testSuite.Image, result.Content) - pendingMu.Lock() - pendingNotifications = append(pendingNotifications, PendingNotification{ - AnalysisContent: result.Content, - TestSuite: testSuite, - ClusterInfo: clusterInfo, - Env: viper.GetString(ocmprovider.Env), - }) - pendingMu.Unlock() + return result.Content } diff --git a/pkg/e2e/e2e.go b/pkg/e2e/e2e.go index 56039ecf53..9693c3262d 100644 --- a/pkg/e2e/e2e.go +++ b/pkg/e2e/e2e.go @@ -276,10 +276,12 @@ func (o *E2EOrchestrator) Report(ctx context.Context) error { } } - // Send deferred per-suite notifications first; fall back to global notification - // when no per-suite notifications were sent - deferredSent := o.sendDeferredNotifications(ctx) - if !deferredSent && o.result.ExitCode != config.Success && viper.GetBool(config.Tests.EnableSlackNotify) { + // Drain per-suite pending notifications. If any were queued (suites ran), + // send them; otherwise fall back to global failure notification. + pending := adhoctestimages.DrainPendingNotifications() + if len(pending) > 0 { + o.sendDeferredNotifications(ctx, pending) + } else if o.result.ExitCode != config.Success && viper.GetBool(config.Tests.EnableSlackNotify) { o.sendFailureNotification(ctx) } @@ -293,13 +295,9 @@ func (o *E2EOrchestrator) Report(ctx context.Context) error { // presigned URLs are available. func (o *E2EOrchestrator) sendFailureNotification(ctx context.Context) { reportDir := viper.GetString(config.ReportDir) - channel := viper.GetString(config.Tests.SlackChannel) - if ch := viper.GetString(config.LogAnalysis.SlackChannel); ch != "" && ch != config.DefaultNotificationsChannel { - channel = ch - } notificationConfig := slack.BuildNotificationConfig( viper.GetString(config.LogAnalysis.SlackWebhook), - channel, + viper.GetString(config.Tests.SlackChannel), &slack.ClusterInfo{ ID: viper.GetString(config.Cluster.ID), Name: viper.GetString(config.Cluster.Name), @@ -345,22 +343,15 @@ func (o *E2EOrchestrator) sendFailureNotification(ctx context.Context) { } } -// sendDeferredNotifications delivers Slack notifications that were queued by -// adhoctestimages during test execution. Called by Report after S3 upload so -// that presigned URLs are available for inclusion in the message. -// Returns true if at least one notification was sent. -func (o *E2EOrchestrator) sendDeferredNotifications(ctx context.Context) bool { - pending := adhoctestimages.DrainPendingNotifications() - if len(pending) == 0 { - return false - } - +// sendDeferredNotifications delivers the given Slack notifications that were +// queued by adhoctestimages during test execution. Called by Report after S3 +// upload so that presigned URLs are available for inclusion in the message. +func (o *E2EOrchestrator) sendDeferredNotifications(ctx context.Context, pending []adhoctestimages.PendingNotification) { webhook := viper.GetString(config.LogAnalysis.SlackWebhook) if webhook == "" || !viper.GetBool(config.Tests.EnableSlackNotify) { - return false + return } - sent := false artifactLinks := s3ResultsToArtifactLinks(o.s3Results) slackReporter := slack.NewSlackReporter() @@ -390,11 +381,8 @@ func (o *E2EOrchestrator) sendDeferredNotifications(ctx context.Context) bool { if err := slackReporter.Report(ctx, result, &cfg); err != nil { log.Printf("Failed to send deferred notification for %s: %v", p.TestSuite.Image, err) - } else { - sent = true } } - return sent } // uploadToS3 uploads the report directory contents to S3 and caches results.