From f12430d922d5307366fde0def79153aaa3d4591f Mon Sep 17 00:00:00 2001 From: Nicolas Grauss Date: Fri, 2 Jan 2026 19:07:58 +0100 Subject: [PATCH] SREP-2079: Making sure that alerts are processed in an atomic way RHOBS or classic webhook may process an alert twice in a sequential or in a parallel way due to Prometheus or AlertManager redundancy. This change makes sure that the custom resources used to track notifications are tested and set (TAS) in an atomic way. This avoids sending notifications for alerts duplicates. --- pkg/handlers/webhookreceiver.go | 439 +++++---- pkg/handlers/webhookreceiver_test.go | 798 +++++++--------- pkg/handlers/webhookrhobsreceiver.go | 512 ++++++----- pkg/handlers/webhookrhobsreceiver_test.go | 1017 +++++++++------------ 4 files changed, 1291 insertions(+), 1475 deletions(-) diff --git a/pkg/handlers/webhookreceiver.go b/pkg/handlers/webhookreceiver.go index 07448e0..9f7e249 100644 --- a/pkg/handlers/webhookreceiver.go +++ b/pkg/handlers/webhookreceiver.go @@ -8,7 +8,6 @@ import ( "time" "github.com/openshift/ocm-agent/pkg/config" - "github.com/openshift/ocm-agent/pkg/httpchecker" "github.com/openshift/ocm-agent/pkg/ocm" "github.com/spf13/viper" @@ -66,23 +65,97 @@ func (h *WebhookReceiverHandler) ServeHTTP(w http.ResponseWriter, r *http.Reques metrics.ResetMetric(metrics.MetricRequestFailure) } -func (h *WebhookReceiverHandler) processAMReceiver(d AMReceiverData, ctx context.Context) *AMReceiverResponse { - log.WithField("AMReceiverData", fmt.Sprintf("%+v", d)).Info("Process alert data") +type notificationRetriever struct { + ctx context.Context + kubeCli client.Client + notificationNameToManagedNotificationName map[string]string +} - // Let's get all ManagedNotifications - mnl := &oav1alpha1.ManagedNotificationList{} +func newNotificationRetriever(kubeCli client.Client, ctx context.Context) (*notificationRetriever, error) { + managedNotificationList := &oav1alpha1.ManagedNotificationList{} listOptions := []client.ListOption{ - client.InNamespace("openshift-ocm-agent-operator"), + client.InNamespace(OCMAgentNamespaceName), } - err := h.c.List(ctx, mnl, listOptions...) + + err := kubeCli.List(ctx, managedNotificationList, listOptions...) if err != nil { log.WithError(err).Error("unable to list managed notifications") - return &AMReceiverResponse{Error: err, Status: "unable to list managed notifications", Code: http.StatusInternalServerError} + return nil, err + } + + result := ¬ificationRetriever{ctx, kubeCli, make(map[string]string)} + + for _, managedNotification := range managedNotificationList.Items { + for _, notification := range managedNotification.Spec.Notifications { + result.notificationNameToManagedNotificationName[notification.Name] = managedNotification.Name + } + } + + return result, nil +} + +// retrieveNotificationContext returns the notification from the ManagedNotification bundle if one exists, or error if one does not +func (r *notificationRetriever) retrieveNotificationContext(notificationName string) (*notificationContext, error) { + managedNotificationName, ok := r.notificationNameToManagedNotificationName[notificationName] + if !ok { + return nil, fmt.Errorf("no managed notification found for notification %s", notificationName) + } + + managedNotification := &oav1alpha1.ManagedNotification{} + + err := r.kubeCli.Get(r.ctx, client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: managedNotificationName, + }, managedNotification) + if err != nil { + return nil, err + } + + notification, err := managedNotification.GetNotificationForName(notificationName) + if err != nil { + return nil, err + } + if notification == nil { + return nil, fmt.Errorf("managed notification %s does not contain notification %s anymore", managedNotificationName, notificationName) + } + + var notificationRecord *oav1alpha1.NotificationRecord + if managedNotification.Status.HasNotificationRecord(notificationName) { + notificationRecord, err = managedNotification.Status.GetNotificationRecord(notificationName) + if err != nil { + return nil, err + } + if notificationRecord == nil { + return nil, fmt.Errorf("managed notification %s does not have its status contain notification record %s", managedNotificationName, notificationName) + } + } else { + notificationRecord = &oav1alpha1.NotificationRecord{ + Name: notificationName, + ServiceLogSentCount: 0, + } + } + + firingCondition := notificationRecord.Conditions.GetCondition(oav1alpha1.ConditionAlertFiring) + return ¬ificationContext{ + retriever: r, + notification: notification, + managedNotification: managedNotification, + notificationRecord: notificationRecord, + wasFiring: firingCondition != nil && firingCondition.Status == corev1.ConditionTrue, + }, nil +} + +func (h *WebhookReceiverHandler) processAMReceiver(d AMReceiverData, ctx context.Context) *AMReceiverResponse { + log.WithField("AMReceiverData", fmt.Sprintf("%+v", d)).Info("Process alert data") + + notificationRetriever, err := newNotificationRetriever(h.c, ctx) + if err != nil { + return &AMReceiverResponse{Error: err, Status: "unable to retrieve managed notifications", Code: http.StatusInternalServerError} } // Handle each firing alert for _, alert := range d.Alerts.Firing() { - err = h.processAlert(alert, mnl, true) + err := h.processAlert(alert, notificationRetriever, true) if err != nil { log.WithError(err).Error("a firing alert could not be successfully processed") } @@ -90,7 +163,7 @@ func (h *WebhookReceiverHandler) processAMReceiver(d AMReceiverData, ctx context // Handle resolved alerts for _, alert := range d.Alerts.Resolved() { - err := h.processAlert(alert, mnl, false) + err := h.processAlert(alert, notificationRetriever, false) if err != nil { log.WithError(err).Error("a resolved alert could not be successfully processed") } @@ -98,205 +171,227 @@ func (h *WebhookReceiverHandler) processAMReceiver(d AMReceiverData, ctx context return &AMReceiverResponse{Error: nil, Status: "ok", Code: http.StatusOK} } -// processAlert handles the pre-check verification and sending of a notification for a particular alert -// and returns an error if that process completed successfully or false otherwise -func (h *WebhookReceiverHandler) processAlert(alert template.Alert, mnl *oav1alpha1.ManagedNotificationList, firing bool) error { - // Should this alert be handled? - if !isValidAlert(alert, false) { - log.WithField(LogFieldAlert, fmt.Sprintf("%+v", alert)).Info("alert does not meet valid criteria") - return fmt.Errorf("alert does not meet valid criteria") - } +type notificationContext struct { + retriever *notificationRetriever + notification *oav1alpha1.Notification + managedNotification *oav1alpha1.ManagedNotification // The custom resource wrapping the notification among others + notificationRecord *oav1alpha1.NotificationRecord // The notification status + wasFiring bool +} - // Can the alert be mapped to an existing notification definition? - notification, managedNotifications, err := getNotification(alert.Labels[AMLabelTemplateName], mnl) - if err != nil { - log.WithError(err).WithField(LogFieldAlert, fmt.Sprintf("%+v", alert)).Warning("an alert fired with no associated notification template definition") - return err - } +func (c *notificationContext) canSendServiceLog(isCurrentlyFiring bool) bool { + nowTime := time.Now() + slSentCondition := c.notificationRecord.Conditions.GetCondition(oav1alpha1.ConditionServiceLogSent) - // Has a servicelog already been sent and we are within the notification's "do-not-resend" window? - canBeSent, err := managedNotifications.CanBeSent(notification.Name, firing) - if err != nil { - log.WithError(err).WithField(LogFieldNotificationName, notification.Name).Error("unable to validate if notification can be sent") - return err - } - if !canBeSent { - if firing { - log.WithFields(log.Fields{"notification": notification.Name, - LogFieldResendInterval: notification.ResendWait, - }).Info("not sending a notification as one was already sent recently") - // Reset the metric for correct service log response from OCM - metrics.ResetResponseMetricFailure(config.ServiceLogService, notification.Name, alert.Labels["alertname"]) - } else { - log.WithFields(log.Fields{"notification": notification.Name}).Info("not sending a resolve notification if it was not firing or resolved body is empty") - s, err := managedNotifications.Status.GetNotificationRecord(notification.Name) - // If a status history exists but can't be fetched, this is an irregular situation - if err != nil { - return err - } - firingStatus := s.Conditions.GetCondition(oav1alpha1.ConditionAlertFiring).Status - if firingStatus == corev1.ConditionTrue { - // Update the notification status for the resolved alert without sending resolved SL - _, err := h.updateNotificationStatus(notification, managedNotifications, firing, corev1.ConditionTrue) - if err != nil { - log.WithFields(log.Fields{LogFieldNotificationName: notification.Name, LogFieldManagedNotification: managedNotifications.Name}).WithError(err).Error("unable to update notification status") - return err - } + // If alert is firing + if isCurrentlyFiring { + // No SL sent yet -> send a SL + if slSentCondition == nil { + return true + } + + // Check if we are within the "don't resend" time window; if so -> nothing to do + nextAllowedSendTime := slSentCondition.LastTransitionTime.Add(time.Duration(c.notification.ResendWait) * time.Hour) + if slSentCondition.Status == corev1.ConditionTrue && nowTime.Before(nextAllowedSendTime) { + return false + } + + // Change of state -> send a SL + if !c.wasFiring { + return true + } + + // We use the AlertResolved condition as, unlike the AlertFiring condition, the timestamp for this + // condition is updated when the webhook is called and the alert is already firing. + // The timestamp for the AlertFiring condition is only updated when the condition status toggles. + resolvedCondition := c.notificationRecord.Conditions.GetCondition(oav1alpha1.ConditionAlertResolved) + + // While AlertResolved condition is tested and set in an atomic way; ServiceLogSent condition is not atomically managed. + // ServiceLogSent may be udated up to 2 minutes after the AlertResolved condition is updated (that's the max allowed time to send a SL) + // If we are in this time window; this means, we are currently already trying to send the SL -> nothing to do + if resolvedCondition != nil { + lastWebhookCallTime := resolvedCondition.LastTransitionTime + + if nowTime.Before(lastWebhookCallTime.Add(3 * time.Minute)) { + return false } } - // This is not an error state - return nil - } + } else { + // No resolved body -> nothing to do + if len(c.notification.ResolvedDesc) == 0 { + return false + } - var attempts int = 3 - var sleep time.Duration = 30 * time.Second - ocmURL := viper.GetString(config.OcmURL) - if ocmURL == "" { - return fmt.Errorf("OCM URL is missing or empty in the configuration") - } - err = checkURLWithRetries(ocmURL, attempts, sleep) - if err != nil { - return err - } + // No change of state -> nothing to do + if !c.wasFiring { + return false + } - // Send the servicelog for the alert - log.WithFields(log.Fields{LogFieldNotificationName: notification.Name}).Info("will send servicelog for notification") - slerr := ocm.BuildAndSendServiceLog( - ocm.NewServiceLogBuilder(notification.Summary, notification.ActiveDesc, notification.ResolvedDesc, viper.GetString(config.ExternalClusterID), notification.Severity, notification.LogType, notification.References), - firing, &alert, h.ocm) - if slerr != nil { - log.WithError(err).WithFields(log.Fields{LogFieldNotificationName: notification.Name, LogFieldIsFiring: true}).Error("unable to send a notification") - _, err := h.updateNotificationStatus(notification, managedNotifications, firing, corev1.ConditionFalse) - if err != nil { - log.WithFields(log.Fields{LogFieldNotificationName: notification.Name, LogFieldManagedNotification: managedNotifications.Name}).WithError(err).Error("unable to update notification status") + // We use the AlertFiring condition as, unlike the AlertResolved condition, its timestamp is only updated + // when the condition status toggles. + firingCondition := c.notificationRecord.Conditions.GetCondition(oav1alpha1.ConditionAlertFiring) + + // No SL sent when the alert was firing -> nothing to do + if slSentCondition == nil || firingCondition == nil || slSentCondition.Status != corev1.ConditionTrue || slSentCondition.LastTransitionTime.Before(firingCondition.LastTransitionTime) { + return false } - // Set the metric for failed service log response from OCM - metrics.SetResponseMetricFailure(config.ServiceLogService, notification.Name, alert.Labels["alertname"]) - metrics.CountFailedServiceLogs(notification.Name) - return slerr } - // Reset the metric for correct service log response from OCM - metrics.ResetResponseMetricFailure(config.ServiceLogService, notification.Name, alert.Labels["alertname"]) + return true +} - // Count the service log sent by the template name - if firing { - metrics.CountServiceLogSent(notification.Name, "firing") +func (c *notificationContext) setCondition(condType oav1alpha1.NotificationConditionType, reason string, boolStatus bool, updateTime bool, nowTime *v1.Time) { + var status corev1.ConditionStatus + if boolStatus { + status = corev1.ConditionTrue } else { - metrics.CountServiceLogSent(notification.Name, "resolved") + status = corev1.ConditionFalse } - // Update the notification status to indicate a servicelog has been sent - m, err := h.updateNotificationStatus(notification, managedNotifications, firing, corev1.ConditionTrue) - if err != nil { - log.WithFields(log.Fields{LogFieldNotificationName: notification.Name, LogFieldManagedNotification: managedNotifications.Name}).WithError(err).Error("unable to update notification status") - return err + + condTime := nowTime + if !updateTime { + currentCondition := c.notificationRecord.Conditions.GetCondition(condType) + if currentCondition != nil { + condTime = currentCondition.LastTransitionTime + } } - status, err := m.Status.GetNotificationRecord(notification.Name) - if err != nil { - return err + + _ = c.notificationRecord.SetStatus(condType, reason, status, condTime) +} + +func (c *notificationContext) updateFiringAndResolvedConditions(isCurrentlyFiring bool) error { + nowTime := &v1.Time{Time: time.Now()} + var firingReason, resolvedReason string + + if isCurrentlyFiring { + firingReason = "Alert is firing" + resolvedReason = "Alert has not resolved" + } else { + firingReason = "Alert is not firing" + resolvedReason = "Alert has resolved" } - metrics.SetTotalServiceLogCount(notification.Name, status.ServiceLogSentCount) + hasStateChanged := isCurrentlyFiring != c.wasFiring - return nil + c.setCondition(oav1alpha1.ConditionAlertFiring, firingReason, isCurrentlyFiring, hasStateChanged, nowTime) + c.setCondition(oav1alpha1.ConditionAlertResolved, resolvedReason, !isCurrentlyFiring, hasStateChanged || isCurrentlyFiring, nowTime) + + c.managedNotification.Status.NotificationRecords.SetNotificationRecord(*c.notificationRecord) + + return c.retriever.kubeCli.Status().Update(c.retriever.ctx, c.managedNotification) } -// getNotification returns the notification from the ManagedNotification bundle if one exists, or error if one does not -func getNotification(name string, m *oav1alpha1.ManagedNotificationList) (*oav1alpha1.Notification, *oav1alpha1.ManagedNotification, error) { - for _, mn := range m.Items { - notification, err := mn.GetNotificationForName(name) - if notification != nil && err == nil { - return notification, &mn, nil +func (c *notificationContext) updateServiceLogSentCondition(isCurrentlyFiring, hasSLBeenSent bool) error { + var slSentReason string + + if isCurrentlyFiring { + if c.wasFiring { + if !hasSLBeenSent { + return nil + } + slSentReason = "Service log sent again after the resend window passed" + } else { + slSentReason = "Service log sent for alert firing" } + } else { + slSentReason = "Service log sent for alert resolved" } - return nil, nil, fmt.Errorf("matching managed notification not found for %s", name) + + c.setCondition(oav1alpha1.ConditionServiceLogSent, slSentReason, hasSLBeenSent, true, &v1.Time{Time: time.Now()}) + + c.managedNotification.Status.NotificationRecords.SetNotificationRecord(*c.notificationRecord) + + return c.retriever.kubeCli.Status().Update(c.retriever.ctx, c.managedNotification) } -// checkURLWithRetries returns err for response code outside >=200 and <300 -func checkURLWithRetries(url string, attempts int, sleep time.Duration) error { - // Use the default HTTP client - urlchecker := httpchecker.NewHTTPChecker(nil) - err := httpchecker.Reattempt(attempts, sleep, func() error { - return urlchecker.UrlAvailabilityCheck(url) - }) +func (c *notificationContext) sendServiceLog(ocmCli ocm.OCMClient, alert template.Alert, isCurrentlyFiring bool) error { + // Send the servicelog for the alert + log.WithFields(log.Fields{LogFieldNotificationName: c.notification.Name}).Info("will send service log") + + slErr := ocm.BuildAndSendServiceLog( + ocm.NewServiceLogBuilder(c.notification.Summary, c.notification.ActiveDesc, c.notification.ResolvedDesc, viper.GetString(config.ExternalClusterID), c.notification.Severity, c.notification.LogType, c.notification.References), + isCurrentlyFiring, &alert, ocmCli) + + err := c.updateServiceLogSentCondition(isCurrentlyFiring, slErr == nil) if err != nil { - return err + log.WithFields(log.Fields{LogFieldNotificationName: c.notification.Name, LogFieldManagedNotification: c.managedNotification.Name}).WithError(err).Error("unable to update ServiceLogSent condition") } - return nil + + return slErr } -func (h *WebhookReceiverHandler) updateNotificationStatus(n *oav1alpha1.Notification, mn *oav1alpha1.ManagedNotification, firing bool, slsentstatus corev1.ConditionStatus) (*oav1alpha1.ManagedNotification, error) { - var m *oav1alpha1.ManagedNotification +// processAlert handles the pre-check verification and sending of a notification for a particular alert +// and returns an error if that process completed successfully or false otherwise +func (h *WebhookReceiverHandler) processAlert(alert template.Alert, notificationRetriever *notificationRetriever, isCurrentlyFiring bool) error { + // Should this alert be handled? + if !isValidAlert(alert, false) { + log.WithField(LogFieldAlert, fmt.Sprintf("%+v", alert)).Info("alert does not meet valid criteria") + return fmt.Errorf("alert does not meet valid criteria") + } + + // Can the alert be mapped to an existing notification definition? + notificationName := alert.Labels[AMLabelTemplateName] + if _, ok := notificationRetriever.notificationNameToManagedNotificationName[notificationName]; !ok { + log.WithField(LogFieldAlert, fmt.Sprintf("%+v", alert)).Warning("an alert fired with no associated notification") + return fmt.Errorf("an alert fired with no associated notification") + } + + var c *notificationContext + var canSend bool - // Update lastSent timestamp + // Critical section: AlertFiring and AlertResolved conditions are read and set/updated in an atomic way err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - m = &oav1alpha1.ManagedNotification{} + var err error - err := h.c.Get(context.TODO(), client.ObjectKey{ - Namespace: mn.Namespace, - Name: mn.Name, - }, m) + c, err = notificationRetriever.retrieveNotificationContext(notificationName) if err != nil { return err } - timeNow := &v1.Time{Time: time.Now()} - status, err := m.Status.GetNotificationRecord(n.Name) - if err != nil { - // Status does not exist, create it - // This will happen only when the alert is firing first time - status = &oav1alpha1.NotificationRecord{ - Name: n.Name, - ServiceLogSentCount: 0, - } - _ = status.SetStatus(oav1alpha1.ConditionAlertFiring, "Alert starts firing", corev1.ConditionTrue, timeNow) - _ = status.SetStatus(oav1alpha1.ConditionAlertResolved, "Alert has not resolved", corev1.ConditionFalse, timeNow) - _ = status.SetStatus(oav1alpha1.ConditionServiceLogSent, "Service log sent for firing alert", slsentstatus, timeNow) + // Has a servicelog already been sent and we are within the notification's "do-not-resend" window? + canSend = c.canSendServiceLog(isCurrentlyFiring) + + return c.updateFiringAndResolvedConditions(isCurrentlyFiring) + }) + if err != nil { + return err + } + + if !canSend { + if isCurrentlyFiring { + log.WithFields(log.Fields{"notification": notificationName, + LogFieldResendInterval: c.notification.ResendWait, + }).Info("not sending a notification as one was already sent recently") + // Reset the metric for correct service log response from OCM + metrics.ResetResponseMetricFailure(config.ServiceLogService, notificationName, alert.Labels["alertname"]) } else { - // Status exists, update it - // When the alert is already firing - firingCondition := status.Conditions.GetCondition(oav1alpha1.ConditionAlertFiring).Status - if firingCondition == corev1.ConditionTrue { - firedConditionTime := status.Conditions.GetCondition(oav1alpha1.ConditionAlertFiring).LastTransitionTime - resolvedConditionTime := status.Conditions.GetCondition(oav1alpha1.ConditionAlertResolved).LastTransitionTime - if firing { - // Status transition is Firing to Firing - // Do not update the condition for AlertFiring and AlertResolved - // Only update the timestamp for the ServiceLogSent - _ = status.SetStatus(oav1alpha1.ConditionAlertFiring, "Alert is still firing", corev1.ConditionTrue, firedConditionTime) - _ = status.SetStatus(oav1alpha1.ConditionAlertResolved, "Alert has not resolved", corev1.ConditionFalse, resolvedConditionTime) - _ = status.SetStatus(oav1alpha1.ConditionServiceLogSent, "Service log sent again after the resend window passed", slsentstatus, timeNow) - } else { - // Status transition is Firing to Resolved - // Update the condition status and timestamp for AlertFiring - // Update the condition status and timestamp for AlertResolved - // Update the timestamp for the ServiceLogSent - _ = status.SetStatus(oav1alpha1.ConditionAlertFiring, "Alert is not firing", corev1.ConditionFalse, timeNow) - _ = status.SetStatus(oav1alpha1.ConditionAlertResolved, "Alert resolved", corev1.ConditionTrue, timeNow) - if len(n.ResolvedDesc) > 0 { - _ = status.SetStatus(oav1alpha1.ConditionServiceLogSent, "Service log sent for alert resolved", slsentstatus, timeNow) - } else { - // This is for the total serviceLogSentCount while should not be increased by SetNotificationRecord if resolved SL is not sent - status.ServiceLogSentCount-- - } - } - } else { - // Status transition is Resolved to Firing - // Update the condition status and timestamp for AlertFiring - // Update the condition status and timestamp for AlertResolved - // Update the timestamp for the ServiceLogSent - _ = status.SetStatus(oav1alpha1.ConditionAlertFiring, "Alert fired again", corev1.ConditionTrue, timeNow) - _ = status.SetStatus(oav1alpha1.ConditionAlertResolved, "Alert has not resolved", corev1.ConditionFalse, timeNow) - _ = status.SetStatus(oav1alpha1.ConditionServiceLogSent, "Service log sent for alert firing", slsentstatus, timeNow) - } + log.WithFields(log.Fields{"notification": notificationName}).Info("not sending a resolve notification if it was not firing or resolved body is empty") } + // This is not an error state + return nil + } - m.Status.NotificationRecords.SetNotificationRecord(*status) - - err = h.c.Status().Update(context.TODO(), m) + err = c.sendServiceLog(h.ocm, alert, isCurrentlyFiring) + if err != nil { + log.WithError(err).WithFields(log.Fields{LogFieldNotificationName: notificationName, LogFieldIsFiring: isCurrentlyFiring}).Error("unable to send a service log") + // Set the metric for failed service log response from OCM + metrics.SetResponseMetricFailure(config.ServiceLogService, notificationName, alert.Labels["alertname"]) + metrics.CountFailedServiceLogs(notificationName) return err - }) + } + + // Reset the metric for correct service log response from OCM + metrics.ResetResponseMetricFailure(config.ServiceLogService, notificationName, alert.Labels["alertname"]) - return m, err + // Count the service log sent by the template name + if isCurrentlyFiring { + metrics.CountServiceLogSent(notificationName, "firing") + } else { + metrics.CountServiceLogSent(notificationName, "resolved") + } + + metrics.SetTotalServiceLogCount(notificationName, c.notificationRecord.ServiceLogSentCount) + + return nil } diff --git a/pkg/handlers/webhookreceiver_test.go b/pkg/handlers/webhookreceiver_test.go index 0bd0a5d..adb9da6 100644 --- a/pkg/handlers/webhookreceiver_test.go +++ b/pkg/handlers/webhookreceiver_test.go @@ -4,26 +4,21 @@ import ( "bytes" "context" "encoding/json" - "time" - "fmt" "io" "net/http" - "reflect" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/onsi/gomega/ghttp" + "github.com/prometheus/alertmanager/template" "github.com/spf13/viper" "go.uber.org/mock/gomock" - - "sigs.k8s.io/controller-runtime/pkg/client" - - "github.com/prometheus/alertmanager/template" - corev1 "k8s.io/api/core/v1" k8serrs "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" ocmagentv1alpha1 "github.com/openshift/ocm-agent-operator/api/v1alpha1" @@ -40,21 +35,109 @@ func (fn RoundTripperFunc) RoundTrip(r *http.Request) (*http.Response, error) { return fn(r) } +func getConditions(isFiring, slSent, firingMinutesAgo, resolvedMinutesAgo, slSentMinutesAgo int) ocmagentv1alpha1.Conditions { + conditions := ocmagentv1alpha1.Conditions{} + nowTime := time.Now() + + if isFiring >= 0 { + var firingStatus, resolvedStatus corev1.ConditionStatus + if isFiring > 0 { + firingStatus = corev1.ConditionTrue + resolvedStatus = corev1.ConditionFalse + } else { + firingStatus = corev1.ConditionFalse + resolvedStatus = corev1.ConditionTrue + } + conditions = append(conditions, ocmagentv1alpha1.NotificationCondition{ + Type: ocmagentv1alpha1.ConditionAlertFiring, + Status: firingStatus, + LastTransitionTime: &metav1.Time{Time: nowTime.Add(time.Duration(-firingMinutesAgo) * time.Minute)}, + }) + conditions = append(conditions, ocmagentv1alpha1.NotificationCondition{ + Type: ocmagentv1alpha1.ConditionAlertResolved, + Status: resolvedStatus, + LastTransitionTime: &metav1.Time{Time: nowTime.Add(time.Duration(-resolvedMinutesAgo) * time.Minute)}, + }) + } + + if slSent >= 0 { + var slSentStatus corev1.ConditionStatus + if slSent > 0 { + slSentStatus = corev1.ConditionTrue + } else { + slSentStatus = corev1.ConditionFalse + } + conditions = append(conditions, ocmagentv1alpha1.NotificationCondition{ + Type: ocmagentv1alpha1.ConditionServiceLogSent, + Status: slSentStatus, + LastTransitionTime: &metav1.Time{Time: nowTime.Add(time.Duration(-slSentMinutesAgo) * time.Minute)}, + }) + } + + return conditions +} + +func assertConditions(conditions ocmagentv1alpha1.Conditions, isFiring, slSent, firingMinutesAgo, resolvedMinutesAgo, slSentMinutesAgo int) { + isResolved := -1 + if isFiring >= 0 { + isResolved = 1 - isFiring + } + assertStatus := func(status corev1.ConditionStatus, expectedValue bool) { + if expectedValue { + Expect(status).To(Equal(corev1.ConditionTrue)) + } else { + Expect(status).To(Equal(corev1.ConditionFalse)) + } + } + nowTime := time.Now() + assertMinutesAgo := func(conditionTime *metav1.Time, expectedMinutesAgo int) { + if expectedMinutesAgo < 0 { + Expect(conditionTime).To(BeNil()) + } else { + actualMinutesAgo := int(nowTime.Sub(conditionTime.Time).Minutes()) + Expect(actualMinutesAgo).To(Equal(expectedMinutesAgo)) + } + } + + for _, condition := range conditions { + switch condition.Type { + case ocmagentv1alpha1.ConditionAlertFiring: + Expect(isFiring >= 0).To(BeTrue()) + assertStatus(condition.Status, isFiring == 1) + assertMinutesAgo(condition.LastTransitionTime, firingMinutesAgo) + isFiring = -1 // to mark as found + case ocmagentv1alpha1.ConditionAlertResolved: + Expect(isResolved >= 0).To(BeTrue()) + assertStatus(condition.Status, isResolved == 1) + assertMinutesAgo(condition.LastTransitionTime, resolvedMinutesAgo) + isResolved = -1 // to mark as found + case ocmagentv1alpha1.ConditionServiceLogSent: + Expect(slSent >= 0).To(BeTrue()) + assertStatus(condition.Status, slSent == 1) + assertMinutesAgo(condition.LastTransitionTime, slSentMinutesAgo) + slSent = -1 // to mark as found + default: + Fail("Unknown condition type: " + string(condition.Type)) + } + } + Expect(isFiring).To(Equal(-1)) + Expect(isResolved).To(Equal(-1)) + Expect(slSent).To(Equal(-1)) +} + var _ = Describe("Webhook Handlers", func() { var ( - mockCtrl *gomock.Controller - mockClient *clientmocks.MockClient - mockStatusWriter *clientmocks.MockStatusWriter - //mockHTTPChecker *httpcheckermock.MockHTTPChecker - mockOCMClient *webhookreceivermock.MockOCMClient - webhookReceiverHandler *WebhookReceiverHandler - server *ghttp.Server - testAlert template.Alert - testAlertResolved template.Alert - testManagedNotificationList *ocmagentv1alpha1.ManagedNotificationList - activeServiceLog *ocm.ServiceLog - resolvedServiceLog *ocm.ServiceLog + mockCtrl *gomock.Controller + mockClient *clientmocks.MockClient + mockStatusWriter *clientmocks.MockStatusWriter + mockOCMClient *webhookreceivermock.MockOCMClient + webhookReceiverHandler *WebhookReceiverHandler + server *ghttp.Server + testAlert template.Alert + testAlertResolved template.Alert + activeServiceLog *ocm.ServiceLog + resolvedServiceLog *ocm.ServiceLog ) BeforeEach(func() { @@ -72,7 +155,7 @@ var _ = Describe("Webhook Handlers", func() { mockClient = clientmocks.NewMockClient(mockCtrl) mockStatusWriter = clientmocks.NewMockStatusWriter(mockCtrl) server = ghttp.NewServer() - //mockHTTPChecker = httpcheckermock.NewMockHTTPChecker(mockCtrl) + // //mockHTTPChecker = httpcheckermock.NewMockHTTPChecker(mockCtrl) mockOCMClient = webhookreceivermock.NewMockOCMClient(mockCtrl) webhookReceiverHandler = &WebhookReceiverHandler{ c: mockClient, @@ -110,7 +193,7 @@ var _ = Describe("Webhook Handlers", func() { Context("AMReceiver processAMReceiver", func() { var r http.Request BeforeEach(func() { - mockClient.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + mockClient.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) }) It("Returns the correct AMReceiverResponse", func() { alert := AMReceiverData{ @@ -132,7 +215,7 @@ var _ = Describe("Webhook Handlers", func() { // add handler to the server server.AppendHandlers(webhookReceiverHandler.ServeHTTP) // Expect call *client.List(arg1, arg2, arg3) on mocked WebhookReceiverHandler - mockClient.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1) + mockClient.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) // Set data to post postData := AMReceiverData{ Status: "foo", @@ -214,64 +297,83 @@ var _ = Describe("Webhook Handlers", func() { }) }) - Context("When looking for a matching notification for an alert", func() { - It("will return one if one exists", func() { - n, mn, err := getNotification(testconst.TestNotificationName, testconst.TestManagedNotificationList) - Expect(err).To(BeNil()) - Expect(reflect.DeepEqual(*n, testconst.TestNotification)).To(BeTrue()) - Expect(reflect.DeepEqual(*mn, testconst.TestManagedNotification)).To(BeTrue()) + Context("notificationRetriever", func() { + var notificationRetriever *notificationRetriever + var err error + BeforeEach(func() { + mockClient.EXPECT().List(gomock.Any(), gomock.Any(), gomock.Any()).SetArg(1, *testconst.TestManagedNotificationList).Return(nil) + notificationRetriever, err = newNotificationRetriever(mockClient, context.TODO()) + }) + It("Can be created", func() { + Expect(err).ShouldNot(HaveOccurred()) + Expect(notificationRetriever).ToNot(BeNil()) }) - It("will return nil if one does not exist", func() { - _, _, err := getNotification("dummy-nonexistent-test", testconst.TestManagedNotificationList) - Expect(err).ToNot(BeNil()) + It("Should return a notificationContext if one exists", func() { + mockClient.EXPECT().Get(gomock.Any(), client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: testconst.TestManagedNotification.ObjectMeta.Name, + }, gomock.Any()).Return(nil).SetArg(2, testconst.TestManagedNotificationList.Items[0]) + + notificationContext, err := notificationRetriever.retrieveNotificationContext(testconst.TestNotificationName) + Expect(err).ShouldNot(HaveOccurred()) + Expect(notificationContext).ToNot(BeNil()) + Expect(notificationContext.retriever).ToNot(BeNil()) + Expect(notificationContext.notification).To(Equal(&testconst.TestNotification)) + Expect(notificationContext.managedNotification).To(Equal(&testconst.TestManagedNotification)) + Expect(notificationContext.notificationRecord).To(Equal(&testconst.TestNotificationRecord)) + Expect(notificationContext.wasFiring).To(BeTrue()) + }) + It("Should return nil if there is no notification for the given name", func() { + notificationContext, err := notificationRetriever.retrieveNotificationContext("dummy-nonexistent-test") + Expect(err).Should(HaveOccurred()) + Expect(notificationContext).To(BeNil()) }) }) - Context("When processing an alert", func() { - Context("Check if an alert is valid or not", func() { + Context("WebhookReceiverHandler.processAlert", func() { + var testNotifRetriever *notificationRetriever + BeforeEach(func() { + testNotifRetriever = ¬ificationRetriever{context.TODO(), mockClient, map[string]string{testconst.TestNotificationName: testconst.TestManagedNotification.ObjectMeta.Name}} + }) + Context("Alert is invalid", func() { It("Reports error if alert does not have alertname label", func() { delete(testAlert.Labels, "alertname") - err := webhookReceiverHandler.processAlert(testAlert, testconst.TestManagedNotificationList, true) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).Should(HaveOccurred()) }) It("Reports error if alert does not have managed_notification_template label", func() { delete(testAlert.Labels, "managed_notification_template") - err := webhookReceiverHandler.processAlert(testAlert, testconst.TestManagedNotificationList, true) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).Should(HaveOccurred()) }) It("Reports error if alert does not have send_managed_notification label", func() { delete(testAlert.Labels, "send_managed_notification") - err := webhookReceiverHandler.processAlert(testAlert, testconst.TestManagedNotificationList, true) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).Should(HaveOccurred()) }) - }) - Context("Check if a valid alert can be mapped to existing notification template definition or not", func() { - BeforeEach(func() { - testAlert = template.Alert{ - Status: "firing", - Labels: map[string]string{ - "managed_notification_template": testconst.TestNotificationName, - "send_managed_notification": "true", - "alertname": "TestAlertName", - }, - StartsAt: time.Now(), - EndsAt: time.Time{}, - } - }) - It("Reports failure if cannot fetch notification for a valid alert", func() { - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{} - err := webhookReceiverHandler.processAlert(testAlert, testManagedNotificationList, true) - Expect(err).ToNot(BeNil()) + It("Reports error if alert send_managed_notification label does not name a valid notification", func() { + testAlert.Labels["managed_notification_template"] = "dummy-nonexistent-test" + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).Should(HaveOccurred()) }) }) - Context("Check if servicelog can be sent for an alert or not", func() { - It("Should not send service log for a firing alert if one is already sent within resend time", func() { - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { + Context("Alert is valid", func() { + var notification ocmagentv1alpha1.Notification + var conditions ocmagentv1alpha1.Conditions + var updatedConditions []ocmagentv1alpha1.Conditions + var updatedConditionsError error + BeforeEach(func() { + notification = testconst.TestNotification + updatedConditions = nil + mockClient.EXPECT().Get(gomock.Any(), client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: testconst.TestManagedNotification.ObjectMeta.Name, + }, gomock.Any()).DoAndReturn( + func(ctx context.Context, key client.ObjectKey, res *ocmagentv1alpha1.ManagedNotification, opts ...client.GetOption) error { + *res = ocmagentv1alpha1.ManagedNotification{ Spec: ocmagentv1alpha1.ManagedNotificationSpec{ Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, + notification, }, }, Status: ocmagentv1alpha1.ManagedNotificationStatus{ @@ -279,441 +381,187 @@ var _ = Describe("Webhook Handlers", func() { ocmagentv1alpha1.NotificationRecord{ Name: testconst.TestNotificationName, ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - }, + Conditions: conditions, }, }, }, - }, + } + return nil + }).MinTimes(1) + mockClient.EXPECT().Status().Return(mockStatusWriter).MinTimes(1) + mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( + func(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error { + updatedManagedNotification, _ := obj.(*ocmagentv1alpha1.ManagedNotification) + Expect(updatedManagedNotification).ToNot(BeNil()) + Expect(updatedManagedNotification.Spec.Notifications).To(Equal([]ocmagentv1alpha1.Notification{notification})) + updatedNotificationRecords := updatedManagedNotification.Status.NotificationRecords + Expect(len(updatedNotificationRecords)).To(Equal(1)) + Expect(updatedNotificationRecords[0].Name).To(Equal(testconst.TestNotificationName)) + updatedConditions = append(updatedConditions, updatedNotificationRecords[0].Conditions.DeepCopy()) + + return updatedConditionsError }, - } - err := webhookReceiverHandler.processAlert(testAlert, testManagedNotificationList, true) + ).MinTimes(1) + }) + It("Should send a service log when receiving a firing alert and the alert never fired before", func() { + conditions = getConditions(-1, -1, 0, 0, 0) + mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 1, -1, 0, 0, 0) + assertConditions(updatedConditions[1], 1, 1, 0, 0, 0) }) - It("Should send service log for a firing alert if one hasn't already sent after resend time and update notification", func() { - alerttest := template.Alert{ - Labels: map[string]string{ - "managed_notification_template": testconst.TestNotificationName, - "send_managed_notification": "true", - "alertname": "TestAlertName", - "alertstate": "firing", - "namespace": "openshift-monitoring", - "openshift_io_alert_source": "platform", - "prometheus": "openshift-monitoring/k8s", - "severity": "info", - "overriden-key": "label-value", - }, - Annotations: map[string]string{ - "description": "alert-desc", - "overriden-key": "annotation-value", - "recursive-key": "_${recursive-key}_", - }, - StartsAt: time.Now().Add(time.Duration(-27) * time.Hour), - } - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-25) * time.Hour)}, - }, - }, - }, - }, - }, - }, - }, - } - gomock.InOrder( - //mockHTTPChecker.EXPECT().UrlAvailabilityCheck(gomock.Any().String()).Return(nil).Times(5), - mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil), - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testManagedNotificationList.Items[0]), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - err := webhookReceiverHandler.processAlert(alerttest, testManagedNotificationList, true) + It("Should send a service log when receiving a firing alert and the alert was marked as resolved", func() { + conditions = getConditions(0, 1, 90, 90, 90) + mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 1, 1, 0, 0, 90) + assertConditions(updatedConditions[1], 1, 1, 0, 0, 0) }) - It("Should not send service log for a firing alert if some place holder cannot be resolved with an alert label or annotation", func() { - testAlert.Labels = nil - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-90) * time.Minute)}, - }, - }, - }, - }, - }, - }, - }, - } - err := webhookReceiverHandler.processAlert(testAlert, testManagedNotificationList, true) - Expect(err).Should(HaveOccurred()) + It("Should send a service log only once even if 2 firing alerts are received", func() { // SREP-2079 + conditions = getConditions(0, 1, 90, 90, 90) + mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).ShouldNot(HaveOccurred()) + err = webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(3)) + assertConditions(updatedConditions[0], 1, 1, 0, 0, 90) + assertConditions(updatedConditions[1], 1, 1, 0, 0, 0) + assertConditions(updatedConditions[2], 1, 1, 0, 0, 0) }) - It("Should not send servicelog if the alert was not in firing state and is resolved", func() { - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-90) * time.Minute)}, - }, - }, - }, - }, - }, - }, - }, - } - err := webhookReceiverHandler.processAlert(testAlert, testManagedNotificationList, false) + It("Should not resend a service log when receiving a firing alert within the resend time window", func() { + conditions = getConditions(1, 1, 30, 30, 30) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 1, 1, 30, 0, 30) }) - It("Should send servicelog if the alert was in firing state and is resolved", func() { - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-90) * time.Minute)}, - }, - }, - }, - }, - }, - }, - }, - } - gomock.InOrder( - mockOCMClient.EXPECT().SendServiceLog(resolvedServiceLog).Return(nil), - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testManagedNotificationList.Items[0]), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - err := webhookReceiverHandler.processAlert(testAlertResolved, testManagedNotificationList, false) + It("Should not send a service log when receiving a firing alert within the resend time window even if the alert was marked as resolved", func() { + conditions = getConditions(0, 1, 30, 30, 30) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 1, 1, 0, 0, 30) }) - It("Should not send resolved servicelog if the resolved body is empty", func() { - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.NotificationWithoutResolvedBody, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 1, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - }, - }, - }, - }, - }, - }, - } - gomock.InOrder( - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testManagedNotificationList.Items[0]), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - err := webhookReceiverHandler.processAlert(testAlertResolved, testManagedNotificationList, false) + It("Should not resend a service log when receiving a firing alert if the AlertResolved condition updated recently", func() { // SREP-2079 + conditions = getConditions(1, 1, 90, 0, 90) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 1, 1, 90, 0, 90) }) - It("Should report error if not able to send service log", func() { - alerttest := template.Alert{ - Labels: map[string]string{ - "managed_notification_template": testconst.TestNotificationName, - "send_managed_notification": "true", - "alertname": "TestAlertName", - "alertstate": "firing", - "namespace": "openshift-monitoring", - "openshift_io_alert_source": "platform", - "prometheus": "openshift-monitoring/k8s", - "severity": "info", - "overriden-key": "label-value", - }, - Annotations: map[string]string{ - "description": "alert-desc", - "overriden-key": "annotation-value", - "recursive-key": "_${recursive-key}_", - }, - StartsAt: time.Now(), - } - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-25) * time.Hour)}, - }, - }, - }, - }, - }, - }, - }, - } - gomock.InOrder( - mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(k8serrs.NewInternalError(fmt.Errorf("a fake error"))), - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testManagedNotificationList.Items[0]), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - err := webhookReceiverHandler.processAlert(alerttest, testManagedNotificationList, true) - Expect(err).Should(HaveOccurred()) + It("Should resend a service log when receiving a firing alert if out of the resend time window and the AlertResolved condition did not update recently", func() { + conditions = getConditions(1, 1, 90, 5, 90) + mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 1, 1, 90, 0, 90) + assertConditions(updatedConditions[1], 1, 1, 90, 0, 0) }) - It("Should report error if not able to update NotificationStatus", func() { - testManagedNotificationList = &ocmagentv1alpha1.ManagedNotificationList{ - Items: []ocmagentv1alpha1.ManagedNotification{ - { - Spec: ocmagentv1alpha1.ManagedNotificationSpec{ - Notifications: []ocmagentv1alpha1.Notification{ - testconst.TestNotification, - }, - }, - Status: ocmagentv1alpha1.ManagedNotificationStatus{ - NotificationRecords: ocmagentv1alpha1.NotificationRecords{ - ocmagentv1alpha1.NotificationRecord{ - Name: testconst.TestNotificationName, - ServiceLogSentCount: 0, - Conditions: []ocmagentv1alpha1.NotificationCondition{ - { - Type: ocmagentv1alpha1.ConditionAlertFiring, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionAlertResolved, - Status: corev1.ConditionFalse, - LastTransitionTime: &metav1.Time{Time: time.Now()}, - }, - { - Type: ocmagentv1alpha1.ConditionServiceLogSent, - Status: corev1.ConditionTrue, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-90) * time.Minute)}, - }, - }, - }, - }, - }, - }, - }, - } - gomock.InOrder( - mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil), - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testManagedNotificationList.Items[0]), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(k8serrs.NewInternalError(fmt.Errorf("a fake error"))), - ) - err := webhookReceiverHandler.processAlert(testAlert, testManagedNotificationList, true) - Expect(err).Should(HaveOccurred()) + It("Should resend a service log only once even if 2 firing alerts are received", func() { // SREP-2079 + conditions = getConditions(1, 1, 90, 5, 90) + mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).ShouldNot(HaveOccurred()) + err = webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(3)) + assertConditions(updatedConditions[0], 1, 1, 90, 0, 90) + assertConditions(updatedConditions[1], 1, 1, 90, 0, 0) + assertConditions(updatedConditions[2], 1, 1, 90, 0, 0) }) - }) - }) - Context("When updating Notification status", func() { - It("Report error if not able to get ManagedNotification", func() { - fakeError := k8serrs.NewInternalError(fmt.Errorf("a fake error")) - gomock.InOrder( - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeError), - ) - _, err := webhookReceiverHandler.updateNotificationStatus(&testconst.TestNotification, &testconst.TestManagedNotification, true, corev1.ConditionTrue) - Expect(err).ShouldNot(BeNil()) - }) - When("Getting NotificationRecord for which status does not exist", func() { - It("should create status if NotificationRecord not found", func() { - gomock.InOrder( - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.TestManagedNotificationWithoutStatus), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mn *ocmagentv1alpha1.ManagedNotification, client ...client.UpdateOptions) error { - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionAlertFiring).Status).To(Equal(corev1.ConditionTrue)) - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionAlertResolved).Status).To(Equal(corev1.ConditionFalse)) - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionServiceLogSent).Status).To(Equal(corev1.ConditionTrue)) - return nil - }), - ) - _, err := webhookReceiverHandler.updateNotificationStatus(&ocmagentv1alpha1.Notification{Name: "randomnotification"}, &testconst.TestManagedNotificationWithoutStatus, true, corev1.ConditionTrue) - Expect(err).Should(BeNil()) - Expect(&testconst.TestManagedNotificationWithoutStatus).ToNot(BeNil()) + It("Should send a service log when receiving an alert resolution and the alert was marked as firing", func() { + conditions = getConditions(1, 1, 90, 30, 90) + mockOCMClient.EXPECT().SendServiceLog(resolvedServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 0, 1, 0, 0, 90) + assertConditions(updatedConditions[1], 0, 1, 0, 0, 0) }) - }) - When("Getting NotificationRecord for which status already exists", func() { - It("should send service log again after resend window passed when alert is firing", func() { - gomock.InOrder( - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.TestManagedNotification), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mn *ocmagentv1alpha1.ManagedNotification, client ...client.UpdateOptions) error { - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionAlertFiring).Status).To(Equal(corev1.ConditionTrue)) - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionAlertResolved).Status).To(Equal(corev1.ConditionFalse)) - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionServiceLogSent).Status).To(Equal(corev1.ConditionTrue)) - return nil - }), - ) - _, err := webhookReceiverHandler.updateNotificationStatus(&testconst.TestNotification, &testconst.TestManagedNotification, true, corev1.ConditionTrue) - Expect(err).Should(BeNil()) + It("Should send a service log when receiving an alert resolution and even if the alert was marked as firing very recently", func() { + conditions = getConditions(1, 1, 1, 1, 1) + mockOCMClient.EXPECT().SendServiceLog(resolvedServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 0, 1, 0, 0, 1) + assertConditions(updatedConditions[1], 0, 1, 0, 0, 0) }) - It("should send service log for alert resolved when no longer firing", func() { - gomock.InOrder( - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.TestManagedNotification), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mn *ocmagentv1alpha1.ManagedNotification, client ...client.UpdateOptions) error { - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionAlertFiring).Status).To(Equal(corev1.ConditionFalse)) - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionAlertResolved).Status).To(Equal(corev1.ConditionTrue)) - Expect(mn.Status.NotificationRecords[0].Conditions.GetCondition(ocmagentv1alpha1.ConditionServiceLogSent).Status).To(Equal(corev1.ConditionTrue)) - return nil - }), - ) - _, err := webhookReceiverHandler.updateNotificationStatus(&testconst.TestNotification, &testconst.TestManagedNotification, false, corev1.ConditionTrue) - Expect(err).Should(BeNil()) + It("Should send a service log only once even if 2 alert resolutions are received", func() { // SREP-2079 + conditions = getConditions(1, 1, 90, 30, 90) + mockOCMClient.EXPECT().SendServiceLog(resolvedServiceLog).Return(nil) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + err = webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(3)) + assertConditions(updatedConditions[0], 0, 1, 0, 0, 90) + assertConditions(updatedConditions[1], 0, 1, 0, 0, 0) + assertConditions(updatedConditions[2], 0, 1, 0, 0, 0) + }) + It("Should not send a service log when receiving an alert resolution and the alert was not marked as firing", func() { + conditions = getConditions(0, 1, 90, 90, 30) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 0, 1, 90, 90, 30) + }) + It("Should not send a service log when receiving an alert resolution but the service log failed to be sent when the alert was firing", func() { + conditions = getConditions(1, 0, 30, 30, 30) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 0, 0, 0, 0, 30) + }) + It("Should not send a service log when receiving an alert resolution but the last service log was sent before the alert was firing", func() { + conditions = getConditions(1, 1, 30, 30, 50) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 0, 1, 0, 0, 50) + }) + + It("Should not send a service when receiving a firing alert and some place holder cannot be resolved with an alert label or annotation", func() { + conditions = getConditions(-1, -1, 0, 0, 0) + testAlert.Annotations = nil + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 1, -1, 0, 0, 0) + assertConditions(updatedConditions[1], 1, 0, 0, 0, 0) + }) + It("Should not send a service log when receiving an alert resolution and the resolved body is empty", func() { + notification = testconst.NotificationWithoutResolvedBody + conditions = getConditions(1, 1, 5, 5, 5) + err := webhookReceiverHandler.processAlert(testAlertResolved, testNotifRetriever, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 0, 1, 0, 0, 5) + }) + It("Should report an error if not able to send service log", func() { + conditions = getConditions(0, 1, 90, 90, 90) + mockOCMClient.EXPECT().SendServiceLog(activeServiceLog).Return(k8serrs.NewInternalError(fmt.Errorf("a fake error"))) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(2)) + assertConditions(updatedConditions[0], 1, 1, 0, 0, 90) + assertConditions(updatedConditions[1], 1, 0, 0, 0, 0) + }) + It("Should report an error if not able to update NotificationStatus", func() { + updatedConditionsError = k8serrs.NewInternalError(fmt.Errorf("a fake error")) + conditions = getConditions(0, 1, 90, 90, 90) + err := webhookReceiverHandler.processAlert(testAlert, testNotifRetriever, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedConditions)).To(Equal(1)) + assertConditions(updatedConditions[0], 1, 1, 0, 0, 90) }) - }) - It("Update ManagedNotificationStatus without any error", func() { - gomock.InOrder( - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.TestManagedNotification), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - _, err := webhookReceiverHandler.updateNotificationStatus(&testconst.TestNotification, &testconst.TestManagedNotification, true, corev1.ConditionTrue) - Expect(err).Should(BeNil()) }) }) diff --git a/pkg/handlers/webhookrhobsreceiver.go b/pkg/handlers/webhookrhobsreceiver.go index 9e148f8..7874b47 100644 --- a/pkg/handlers/webhookrhobsreceiver.go +++ b/pkg/handlers/webhookrhobsreceiver.go @@ -9,7 +9,6 @@ import ( "time" "github.com/prometheus/alertmanager/template" - "github.com/prometheus/common/model" log "github.com/sirupsen/logrus" cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" @@ -92,302 +91,369 @@ func (h *WebhookRHOBSReceiverHandler) ServeHTTP(w http.ResponseWriter, r *http.R func (h *WebhookRHOBSReceiverHandler) processAMReceiver(d AMReceiverData, ctx context.Context) *AMReceiverResponse { log.WithField("AMReceiverData", fmt.Sprintf("%+v", d)).Info("Process alert data") - for _, alert := range d.Alerts { - // Can we find a notification template for this alert? - templateName := alert.Labels[AMLabelTemplateName] - mfn := &oav1alpha1.ManagedFleetNotification{} - err := h.c.Get(ctx, client.ObjectKey{ - Namespace: OCMAgentNamespaceName, - Name: templateName, - }, mfn) + // Handle each firing alert + for _, alert := range d.Alerts.Firing() { + err := h.processAlert(alert, true) if err != nil { - log.WithError(err).Error("unable to locate corresponding notification template") - return &AMReceiverResponse{Error: err, - Status: fmt.Sprintf("unable to find ManagedFleetNotification %s", templateName), - Code: http.StatusInternalServerError} - } - - // Filter actionable alert based on Label - if !isValidAlert(alert, true) { - log.WithField(LogFieldAlert, fmt.Sprintf("%+v", alert)).Info("alert does not meet valid criteria") - continue + log.WithError(err).Error("a firing alert could not be successfully processed") } + } - err = h.processAlert(alert, mfn) + // Handle resolved alerts + for _, alert := range d.Alerts.Resolved() { + err := h.processAlert(alert, false) if err != nil { - log.WithError(err).Error("failed processing alert") + log.WithError(err).Error("a resolved alert could not be successfully processed") } } return &AMReceiverResponse{Error: nil, Status: "ok", Code: http.StatusOK} } -func (h *WebhookRHOBSReceiverHandler) processAlert(alert template.Alert, mfn *oav1alpha1.ManagedFleetNotification) error { - // Handle firing alerts - if alert.Status == string(model.AlertFiring) { - err := h.processFiringAlert(alert, mfn) +type fleetNotificationRetriever struct { + ctx context.Context + kubeCli client.Client + fleetNotification *oav1alpha1.FleetNotification + managementClusterID string + hostedClusterID string +} + +func newFleetNotificationRetriever(kubeCli client.Client, ctx context.Context, alert template.Alert) (*fleetNotificationRetriever, error) { + managedFleetNotificationName := alert.Labels[AMLabelTemplateName] + managedFleetNotification := &oav1alpha1.ManagedFleetNotification{} + err := kubeCli.Get(ctx, client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: managedFleetNotificationName, + }, managedFleetNotification) + if err != nil { + log.WithError(err).Error("unable to locate corresponding notification template") + return nil, err + } + + return &fleetNotificationRetriever{ + ctx: ctx, + kubeCli: kubeCli, + fleetNotification: &managedFleetNotification.Spec.FleetNotification, + managementClusterID: alert.Labels[AMLabelAlertMCID], + hostedClusterID: alert.Labels[AMLabelAlertHCID], + }, nil +} + +func (r *fleetNotificationRetriever) retrieveFleetNotificationContext() (*fleetNotificationContext, error) { + managedFleetNotificationRecord := &oav1alpha1.ManagedFleetNotificationRecord{} + err := r.kubeCli.Get(r.ctx, client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: r.managementClusterID, + }, managedFleetNotificationRecord) + + if err != nil { + if errors.IsNotFound(err) { + // Record does not exist, attempt to create it + managedFleetNotificationRecord = &oav1alpha1.ManagedFleetNotificationRecord{ + ObjectMeta: v1.ObjectMeta{ + Name: r.managementClusterID, + Namespace: OCMAgentNamespaceName, + }, + } + err = r.kubeCli.Create(r.ctx, managedFleetNotificationRecord) + } if err != nil { - return fmt.Errorf("a firing alert could not be successfully processed %w", err) + return nil, err } - return nil } - // Handle resolving alerts - if alert.Status == string(model.AlertResolved) { - err := h.processResolvedAlert(alert, mfn) + // Ideally, this field should have probably been part of the ManagedFleetNotificationRecord + // definition, not the status. + if managedFleetNotificationRecord.Status.ManagementCluster == "" { + managedFleetNotificationRecord.Status.ManagementCluster = r.managementClusterID + } + + notificationRecordItem, err := managedFleetNotificationRecord.GetNotificationRecordItem(r.managementClusterID, r.fleetNotification.Name, r.hostedClusterID) + if err != nil { + // Add the item it doesn't exist + + notificationRecordByName, err := managedFleetNotificationRecord.GetNotificationRecordByName(r.managementClusterID, r.fleetNotification.Name) if err != nil { - return fmt.Errorf("a resolving alert could not be successfully processed %w", err) + notificationRecordByName = &oav1alpha1.NotificationRecordByName{ + NotificationName: r.fleetNotification.Name, + ResendWait: r.fleetNotification.ResendWait, + NotificationRecordItems: nil, + } + managedFleetNotificationRecord.Status.NotificationRecordByName = append(managedFleetNotificationRecord.Status.NotificationRecordByName, *notificationRecordByName) + } + + notificationRecordItem, err = managedFleetNotificationRecord.AddNotificationRecordItem(r.hostedClusterID, notificationRecordByName) + if err != nil { + return nil, err } - return nil } - return fmt.Errorf("unable to process alert: unexpected status %s", alert.Status) + wasClusterInLimitedSupport := + r.fleetNotification.LimitedSupport && // Sending limited support notifications in place of service logs + notificationRecordItem.FiringNotificationSentCount > notificationRecordItem.ResolvedNotificationSentCount + // Counters are identical when no limited support is active + // Sent counter is higher than resolved counter by 1 when limited support is active + // TODO(ngrauss): record the limited support reason ID in the NotificationRecordItem object to be able to + // precisely track if limited support is active or not. + + return &fleetNotificationContext{ + retriever: r, + managedFleetNotificationRecord: managedFleetNotificationRecord, + notificationRecordItem: notificationRecordItem, + notificationRecordInitialValue: *notificationRecordItem, + wasClusterInLimitedSupport: wasClusterInLimitedSupport, + }, nil } -// processResolvedAlert handles resolve notifications for a particular alert -// currently only handles removing limited support -func (h *WebhookRHOBSReceiverHandler) processResolvedAlert(alert template.Alert, mfn *oav1alpha1.ManagedFleetNotification) error { - // MFN is not for limited support, thus we don't have an implementation for the alert resolving state yet - if !mfn.Spec.FleetNotification.LimitedSupport { - return nil +type fleetNotificationContext struct { + retriever *fleetNotificationRetriever + managedFleetNotificationRecord *oav1alpha1.ManagedFleetNotificationRecord + notificationRecordItem *oav1alpha1.NotificationRecordItem + notificationRecordInitialValue oav1alpha1.NotificationRecordItem + notificationRecordValueAfterUpdate oav1alpha1.NotificationRecordItem + wasClusterInLimitedSupport bool +} + +func (c *fleetNotificationContext) canSendNotification() bool { + nowTime := time.Now() + + // Cluster already in limited support -> nothing to do + if c.wasClusterInLimitedSupport { + log.WithFields(log.Fields{"notification": c.retriever.fleetNotification.Name}).Info("not sending a limited support notification as the previous one didn't resolve yet") + return false } - hcID := alert.Labels[AMLabelAlertHCID] - fn := mfn.Spec.FleetNotification - fnLimitedSupportReason := fn.NotificationMessage + // No last transition time -> send a notification + if c.notificationRecordItem.LastTransitionTime == nil { + return true + } - activeLSReasons, err := h.ocm.GetLimitedSupportReasons(hcID) - if err != nil { - return fmt.Errorf("unable to get limited support reasons for cluster %s:, %w", hcID, err) + var dontResendDuration time.Duration + if c.retriever.fleetNotification.ResendWait > 0 { + dontResendDuration = time.Duration(c.retriever.fleetNotification.ResendWait) * time.Hour + } else { + dontResendDuration = time.Duration(3) * time.Minute } - for _, reason := range activeLSReasons { - // If the reason matches the fleet notification LS reason, remove it - // TODO(Claudio): Find a way to make sure the removed LS was also posted by OA - if strings.Contains(reason.Details(), fnLimitedSupportReason) { - log.WithFields(log.Fields{LogFieldNotificationName: fn.Name}).Infof("will remove limited support reason '%s' for notification", reason.ID()) - err := h.ocm.RemoveLimitedSupport(hcID, reason.ID()) - if err != nil { - metrics.IncrementFailedLimitedSupportRemoved(fn.Name) - // Set the metric for failed limited support response from OCM - metrics.SetResponseMetricFailure(config.ClustersService, fn.Name, alert.Labels["alertname"]) - return fmt.Errorf("limited support reason with ID '%s' couldn't be removed for cluster %s, err: %w", reason.ID(), hcID, err) + // Check if we are within the "don't resend" time window; if so -> nothing to do + nextAllowedSendTime := c.notificationRecordItem.LastTransitionTime.Add(dontResendDuration) + return nowTime.After(nextAllowedSendTime) +} + +func (c *fleetNotificationContext) inPlaceStatusUpdate() error { + // c.notificationRecordItem is a pointer but it is not part of the managedFleetNotificationRecord object + // Below code makes sure to update the oav1alpha1.NotificationRecordItem inside the managedFleetNotificationRecord object with the latest values. + // TODO(ngrauss): refactor GetNotificationRecordItem method to return a reference to the object inside the managedFleetNotificationRecord + for i, notificationRecordByName := range c.managedFleetNotificationRecord.Status.NotificationRecordByName { + if notificationRecordByName.NotificationName != c.retriever.fleetNotification.Name { + continue + } + for j, notificationRecordItem := range notificationRecordByName.NotificationRecordItems { + if notificationRecordItem.HostedClusterID == c.retriever.hostedClusterID { + c.managedFleetNotificationRecord.Status.NotificationRecordByName[i].NotificationRecordItems[j] = *c.notificationRecordItem } - metrics.IncrementLimitedSupportRemovedCount(fn.Name) } } - // Reset the metric for correct limited support response from OCM - metrics.ResetResponseMetricFailure(config.ClustersService, fn.Name, alert.Labels["alertname"]) - return h.updateManagedFleetNotificationRecord(alert, mfn) + err := c.retriever.kubeCli.Status().Update(c.retriever.ctx, c.managedFleetNotificationRecord) + if err != nil { + log.WithFields(log.Fields{LogFieldNotificationRecordName: c.managedFleetNotificationRecord.Name}).Infof("update of managedfleetnotificationrecord failed: %s", err.Error()) + return err + } + return nil } -// processFiringAlert handles the pre-check verification and sending of a notification for a particular alert -// and returns an error if that process completed successfully or false otherwise -func (h *WebhookRHOBSReceiverHandler) processFiringAlert(alert template.Alert, mfn *oav1alpha1.ManagedFleetNotification) error { - fn := mfn.Spec.FleetNotification - hcID := alert.Labels[AMLabelAlertHCID] - - canBeSent := h.firingCanBeSent(alert, mfn) - // There's no need to send a notification so just return - if !canBeSent { - log.WithFields(log.Fields{"notification": fn.Name, - LogFieldResendInterval: fn.ResendWait, - }).Info("not sending a notification as one was already sent recently") - // Reset the metric for correct service log response from OCM - metrics.ResetResponseMetricFailure(config.ServiceLogService, fn.Name, alert.Labels["alertname"]) - return nil +func (c *fleetNotificationContext) updateNotificationStatus(isCurrentlyFiring, canSendNotification bool) error { + if isCurrentlyFiring { + if canSendNotification { + c.notificationRecordItem.FiringNotificationSentCount++ + c.notificationRecordItem.LastTransitionTime = &v1.Time{Time: time.Now()} + } + } else if c.retriever.fleetNotification.LimitedSupport { + c.notificationRecordItem.ResolvedNotificationSentCount = c.notificationRecordItem.FiringNotificationSentCount + } + + c.notificationRecordValueAfterUpdate = *c.notificationRecordItem + + return c.inPlaceStatusUpdate() +} + +// TODO(ngrauss): to be removed +// ManagedFleetNotificationRecord record item counters are currently updated before the notification is sent. +// This is because: +// - Those counters are also used to determine if the cluster is already in limitied support or not. +// - Determining this state needs to be done in an atomic way to avoid race conditions +// A new field about whether the alert is firing or not will be added to the NotificationRecordItem object +// to avoid this workaround. +func (c *fleetNotificationContext) restoreNotificationStatus() error { + // Critical section: notification record item is read and set/updated in an atomic way + + fleetNotificationContext, err := c.retriever.retrieveFleetNotificationContext() + if err != nil { + return err } - if mfn.Spec.FleetNotification.LimitedSupport { - // Send the limited support for the alert - log.WithFields(log.Fields{LogFieldNotificationName: fn.Name}).Info("will send limited support for notification") + if *c.notificationRecordItem == c.notificationRecordValueAfterUpdate { + *fleetNotificationContext.notificationRecordItem = c.notificationRecordInitialValue + + return fleetNotificationContext.inPlaceStatusUpdate() + } + + return nil +} + +func (c *fleetNotificationContext) sendNotification(ocmCli ocm.OCMClient, alert template.Alert) error { + fleetNotification := c.retriever.fleetNotification + hostedClusterID := c.retriever.hostedClusterID + + if fleetNotification.LimitedSupport { // Limited support case + log.WithFields(log.Fields{LogFieldNotificationName: fleetNotification.Name}).Info("will send limited support for notification") builder := &cmv1.LimitedSupportReasonBuilder{} - builder.Summary(fn.Summary) - builder.Details(fn.NotificationMessage) + builder.Summary(fleetNotification.Summary) + builder.Details(fleetNotification.NotificationMessage) builder.DetectionType(cmv1.DetectionTypeManual) reason, err := builder.Build() if err != nil { - return fmt.Errorf("unable to build limited support for fleetnotification '%s' reason: %w", fn.Name, err) + return fmt.Errorf("unable to build limited support for fleetnotification '%s' reason: %w", fleetNotification.Name, err) } - err = h.ocm.SendLimitedSupport(hcID, reason) + err = ocmCli.SendLimitedSupport(hostedClusterID, reason) if err != nil { // Set the metric for failed limited support response from OCM - metrics.SetResponseMetricFailure("clusters_mgmt", fn.Name, alert.Labels["alertname"]) - metrics.IncrementFailedLimitedSupportSend(fn.Name) - return fmt.Errorf("limited support reason for fleetnotification '%s' could not be set for cluster %s, err: %w", fn.Name, hcID, err) + return fmt.Errorf("limited support reason for fleetnotification '%s' could not be set for cluster %s, err: %w", fleetNotification.Name, hostedClusterID, err) } - metrics.IncrementLimitedSupportSentCount(fn.Name) - // Reset the metric for correct limited support response from OCM - metrics.ResetResponseMetricFailure(config.ClustersService, fn.Name, alert.Labels["alertname"]) - } else { // Notification is for a service log - log.WithFields(log.Fields{LogFieldNotificationName: fn.Name}).Info("will send servicelog for notification") + } else { // Service log case + log.WithFields(log.Fields{LogFieldNotificationName: fleetNotification.Name}).Info("will send servicelog for notification") err := ocm.BuildAndSendServiceLog( - ocm.NewServiceLogBuilder(fn.Summary, fn.NotificationMessage, "", hcID, fn.Severity, fn.LogType, fn.References), - true, &alert, h.ocm) + ocm.NewServiceLogBuilder(fleetNotification.Summary, fleetNotification.NotificationMessage, "", hostedClusterID, fleetNotification.Severity, fleetNotification.LogType, fleetNotification.References), + true, &alert, ocmCli) if err != nil { - log.WithError(err).WithFields(log.Fields{LogFieldNotificationName: fn.Name, LogFieldIsFiring: true}).Error("unable to send service log for notification") - // Set the metric for failed service log response from OCM - metrics.SetResponseMetricFailure(config.ServiceLogService, fn.Name, alert.Labels["alertname"]) - metrics.CountFailedServiceLogs(fn.Name) + log.WithError(err).WithFields(log.Fields{LogFieldNotificationName: fleetNotification.Name, LogFieldIsFiring: true}).Error("unable to send service log for notification") + return err } - // Count the service log sent by the template name - metrics.CountServiceLogSent(fn.Name, "firing") - // Reset the metric for correct service log response from OCM - metrics.ResetResponseMetricFailure(config.ServiceLogService, fn.Name, alert.Labels["alertname"]) } - return h.updateManagedFleetNotificationRecord(alert, mfn) + return nil } -// Get or create ManagedFleetNotificationRecord -func (h *WebhookRHOBSReceiverHandler) getOrCreateManagedFleetNotificationRecord(mcID string, hcID string, mfn *oav1alpha1.ManagedFleetNotification) (*oav1alpha1.ManagedFleetNotificationRecord, error) { - mfnr := &oav1alpha1.ManagedFleetNotificationRecord{} - - err := h.c.Get(context.Background(), client.ObjectKey{ - Namespace: OCMAgentNamespaceName, - Name: mcID, - }, mfnr) +func (c *fleetNotificationContext) removeLimitedSupport(ocmCli ocm.OCMClient) error { + hostedClusterID := c.retriever.hostedClusterID + limitedSupportReasons, err := ocmCli.GetLimitedSupportReasons(hostedClusterID) if err != nil { - if errors.IsNotFound(err) { - // Record does not exist, attempt to create it - mfnr = &oav1alpha1.ManagedFleetNotificationRecord{ - ObjectMeta: v1.ObjectMeta{ - Name: mcID, - Namespace: OCMAgentNamespaceName, - }, - } - if err := h.c.Create(context.Background(), mfnr); err != nil { - return nil, err - } - } else { - return nil, err - } + return fmt.Errorf("unable to get limited support reasons for cluster %s:, %w", hostedClusterID, err) } - return mfnr, nil -} - -// The upstream implementation of `RetryOnConflict` -// calls `IsConflict` which doesn't handle `AlreadyExists` as a conflict error, -// even though it is meant to be a subcategory of conflict. -// This function implements a retry for errors of type Conflict or AlreadyExists (both status code 409): -// - conflict errors are triggered when failing to Update() a resource -// - alreadyexists errors are triggered when failing to Create() a resource -func retryOnConflictOrAlreadyExists(backoff wait.Backoff, fn func() error) error { - return retry.OnError(backoff, customIs409, fn) -} + fleetNotification := c.retriever.fleetNotification -// Updates the managedfleetnotificationrecord with the alert's data -// This function creates the notificationrecordbyname as well as the notificationrecorditem in case they don't exist yet -// Increments the sent/resolved notification state based on the alert -func (h *WebhookRHOBSReceiverHandler) updateManagedFleetNotificationRecord(alert template.Alert, mfn *oav1alpha1.ManagedFleetNotification) error { - fn := mfn.Spec.FleetNotification - mcID := alert.Labels[AMLabelAlertMCID] - hcID := alert.Labels[AMLabelAlertHCID] - firing := alert.Status == string(model.AlertFiring) - - err := retryOnConflictOrAlreadyExists(retryConfig, func() error { - // Fetch the ManagedFleetNotificationRecord, or create it if it does not already exist - mfnr, err := h.getOrCreateManagedFleetNotificationRecord(mcID, hcID, mfn) - if err != nil { - log.WithFields(log.Fields{LogFieldNotificationRecordName: mcID}).Infof("getOrCreate of managedfleetnotificationrecord failed: %s. Retrying in case of conflict error", err.Error()) - return err + for _, limitedSupportReason := range limitedSupportReasons { + // If the reason matches the fleet notification LS reason, remove it + // TODO(ngrauss): The limitedSupportReason.ID() should be stored in the ManagedFleetNotificationRecord record item object + // and be given to this method; this would avoid: + // 1. removing limited support reasons potentially not created by OCM Agent. + // 2. do some kind of string matching which is prone to errors if the message format changes. + if strings.Contains(limitedSupportReason.Details(), fleetNotification.NotificationMessage) { + log.WithFields(log.Fields{LogFieldNotificationName: fleetNotification.Name}).Infof("will remove limited support reason '%s' for notification", limitedSupportReason.ID()) + err := ocmCli.RemoveLimitedSupport(hostedClusterID, limitedSupportReason.ID()) + if err != nil { + return fmt.Errorf("limited support reason with ID '%s' couldn't be removed for cluster %s, err: %w", limitedSupportReason.ID(), hostedClusterID, err) + } } + } - // Update the status for the notification record item + return nil +} - // Ideally, this field should have probably been part of the ManagedFleetNotificationRecord - // definition, not the status. - if mfnr.Status.ManagementCluster == "" { - mfnr.Status.ManagementCluster = mcID - } +func (h *WebhookRHOBSReceiverHandler) processAlert(alert template.Alert, isCurrentlyFiring bool) error { + // Filter actionable alert based on Label + if !isValidAlert(alert, true) { + log.WithField(LogFieldAlert, fmt.Sprintf("%+v", alert)).Info("alert does not meet valid criteria") + return fmt.Errorf("alert does not meet valid criteria") + } - // Fetch notificationRecordByName - recordByName, err := mfnr.GetNotificationRecordByName(mcID, fn.Name) - if err != nil { - // add it to our patch if it doesn't exist - recordByName = &oav1alpha1.NotificationRecordByName{ - NotificationName: fn.Name, - ResendWait: fn.ResendWait, - NotificationRecordItems: nil, - } - mfnr.Status.NotificationRecordByName = append(mfnr.Status.NotificationRecordByName, *recordByName) - } + fleetNotificationRetriever, err := newFleetNotificationRetriever(h.c, context.Background(), alert) + if err != nil { + return fmt.Errorf("unable to find ManagedFleetNotification %s", alert.Labels[AMLabelTemplateName]) + } - // Fetch notificationRecordItem - _, err = mfnr.GetNotificationRecordItem(mcID, fn.Name, hcID) - if err != nil { - // add it to our patch if it doesn't exist - _, err = mfnr.AddNotificationRecordItem(hcID, recordByName) - if err != nil { - return err - } - } + if !fleetNotificationRetriever.fleetNotification.LimitedSupport && !isCurrentlyFiring { + return nil + } - _, err = mfnr.UpdateNotificationRecordItem(fn.Name, hcID, firing) + var c *fleetNotificationContext + canSend := false + err = retryOnConflictOrAlreadyExists(retryConfig, func() error { + c, err = fleetNotificationRetriever.retrieveFleetNotificationContext() if err != nil { return err } - err = h.c.Status().Update(context.TODO(), mfnr) - if err != nil { - log.WithFields(log.Fields{LogFieldNotificationRecordName: mfnr.Name}).Infof("update of managedfleetnotificationrecord failed: %s. Retrying in case of conflict error", err.Error()) - return err + if isCurrentlyFiring { + canSend = c.canSendNotification() } - return nil + + return c.updateNotificationStatus(isCurrentlyFiring, canSend) }) + if err != nil { + return err + } - return err -} + fleetNotification := c.retriever.fleetNotification + alertName := alert.Labels[AMLabelAlertName] -// Firing can be sent if: -// - there's no fleetnotificationrecord for the MC -// - there's no fleetnotificationrecorditem for the hosted cluster -// - for limited support type notification specifically, we only resent if the previous one resolved -// - if the recorditem exists and we don't run in the above limited support case, firingCanBeSent is true if we exceeded the resendWait interval -func (h *WebhookRHOBSReceiverHandler) firingCanBeSent(alert template.Alert, mfn *oav1alpha1.ManagedFleetNotification) bool { - fn := mfn.Spec.FleetNotification - mcID := alert.Labels[AMLabelAlertMCID] - hcID := alert.Labels[AMLabelAlertHCID] - - mfnr := &oav1alpha1.ManagedFleetNotificationRecord{} - err := h.c.Get(context.Background(), client.ObjectKey{ - Namespace: OCMAgentNamespaceName, - Name: mcID, - }, mfnr) + if isCurrentlyFiring { + if canSend { + err := c.sendNotification(h.ocm, alert) - if err != nil { - // there's no fleetnotificationrecord for the MC - return true - } + var logService string + if fleetNotification.LimitedSupport { // Limited support case + logService = config.ClustersService + } else { // Service log case + logService = config.ServiceLogService + } - recordItem, err := mfnr.GetNotificationRecordItem(mcID, fn.Name, hcID) - if err != nil { - // there's no fleetnotificationrecorditem for the hosted cluster - return true - } + if err != nil { + if fleetNotification.LimitedSupport { // Limited support case + metrics.IncrementFailedLimitedSupportSend(fleetNotification.Name) + } else { // Service log case + metrics.CountFailedServiceLogs(fleetNotification.Name) + } + metrics.SetResponseMetricFailure(logService, fleetNotification.Name, alertName) + _ = c.restoreNotificationStatus() + return err + } - if recordItem.LastTransitionTime == nil { - // We have no last transition time - return true - } + if fleetNotification.LimitedSupport { // Limited support case + metrics.IncrementLimitedSupportSentCount(fleetNotification.Name) + } else { // Service log case + metrics.CountServiceLogSent(fleetNotification.Name, "firing") + } + metrics.ResetResponseMetricFailure(logService, fleetNotification.Name, alertName) + } + } else { + if c.wasClusterInLimitedSupport { + err := c.removeLimitedSupport(h.ocm) - // Check if a limited support notification can be sent: - // if it was already sent but never resolved, we don't want to resent it. - // This happens when e.g. alertmanager restarts and loses its state. - if mfn.Spec.FleetNotification.LimitedSupport { - // Make sure we didn't already send limited support - this happens in cases - // where alertmanager restarts. - if recordItem.FiringNotificationSentCount > recordItem.ResolvedNotificationSentCount { - log.WithFields(log.Fields{"notification": fn.Name}).Info("not sending a limited support notification as the previous one didn't resolve yet") - return false + if err != nil { + metrics.IncrementFailedLimitedSupportRemoved(fleetNotification.Name) + metrics.SetResponseMetricFailure(config.ClustersService, fleetNotification.Name, alertName) + _ = c.restoreNotificationStatus() + return err + } + metrics.IncrementLimitedSupportRemovedCount(fleetNotification.Name) + metrics.ResetResponseMetricFailure(config.ClustersService, fleetNotification.Name, alertName) } } - nextSend := recordItem.LastTransitionTime.Time.Add(time.Duration(fn.ResendWait) * time.Hour) + return nil +} - return time.Now().After(nextSend) +// The upstream implementation of `RetryOnConflict` +// calls `IsConflict` which doesn't handle `AlreadyExists` as a conflict error, +// even though it is meant to be a subcategory of conflict. +// This function implements a retry for errors of type Conflict or AlreadyExists (both status code 409): +// - conflict errors are triggered when failing to Update() a resource +// - alreadyexists errors are triggered when failing to Create() a resource +func retryOnConflictOrAlreadyExists(backoff wait.Backoff, fn func() error) error { + return retry.OnError(backoff, customIs409, fn) } diff --git a/pkg/handlers/webhookrhobsreceiver_test.go b/pkg/handlers/webhookrhobsreceiver_test.go index 1d7d16e..3493274 100644 --- a/pkg/handlers/webhookrhobsreceiver_test.go +++ b/pkg/handlers/webhookrhobsreceiver_test.go @@ -12,12 +12,13 @@ import ( "time" "go.uber.org/mock/gomock" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" cmv1 "github.com/openshift-online/ocm-sdk-go/clustersmgmt/v1" oav1alpha1 "github.com/openshift/ocm-agent-operator/api/v1alpha1" + ocmagentv1alpha1 "github.com/openshift/ocm-agent-operator/api/v1alpha1" "github.com/prometheus/alertmanager/template" kerrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" . "github.com/onsi/ginkgo/v2" @@ -32,23 +33,53 @@ import ( clientmocks "github.com/openshift/ocm-agent/pkg/util/test/generated/mocks/client" ) -var _ = Describe("WebhookRHOBSReceiverHandler HTTP Tests", func() { +func initItemInRecord(managedFleetNotificationRecord *ocmagentv1alpha1.ManagedFleetNotificationRecord, firingSentCount, resolvedSentCount, lastUpdateMinutesAgo int) { + notificationRecordItem := &managedFleetNotificationRecord.Status.NotificationRecordByName[0].NotificationRecordItems[0] + + notificationRecordItem.FiringNotificationSentCount = firingSentCount + notificationRecordItem.ResolvedNotificationSentCount = resolvedSentCount + notificationRecordItem.LastTransitionTime = &metav1.Time{Time: time.Now().Add(time.Duration(-lastUpdateMinutesAgo) * time.Minute)} +} + +func assertRecordMetadata(managedFleetNotificationRecord *ocmagentv1alpha1.ManagedFleetNotificationRecord) { + Expect(managedFleetNotificationRecord).ToNot(BeNil()) + Expect(managedFleetNotificationRecord.ObjectMeta.Name).To(Equal(testconst.TestManagedClusterID)) + Expect(managedFleetNotificationRecord.ObjectMeta.Namespace).To(Equal(OCMAgentNamespaceName)) +} + +func assertRecordStatus(managedFleetNotificationRecord *ocmagentv1alpha1.ManagedFleetNotificationRecord) { + Expect(managedFleetNotificationRecord.Status.ManagementCluster).To(Equal(testconst.TestManagedClusterID)) + Expect(len(managedFleetNotificationRecord.Status.NotificationRecordByName)).To(Equal(1)) + Expect(managedFleetNotificationRecord.Status.NotificationRecordByName[0].NotificationName).To(Equal(testconst.TestNotificationName)) + Expect(len(managedFleetNotificationRecord.Status.NotificationRecordByName[0].NotificationRecordItems)).To(Equal(1)) +} + +func assertRecordItem(notificationRecordItem *ocmagentv1alpha1.NotificationRecordItem, firingSentCount, resolvedSentCount, lastUpdateMinutesAgo int) { + Expect(notificationRecordItem.HostedClusterID).To(Equal(testconst.TestHostedClusterID)) + Expect(notificationRecordItem.FiringNotificationSentCount).To(Equal(firingSentCount)) + Expect(notificationRecordItem.ResolvedNotificationSentCount).To(Equal(resolvedSentCount)) + + if lastUpdateMinutesAgo < 0 { + Expect(notificationRecordItem.LastTransitionTime).To(BeNil()) + } else { + actualMinutesAgo := int(time.Since(notificationRecordItem.LastTransitionTime.Time).Minutes()) + Expect(actualMinutesAgo).To(Equal(lastUpdateMinutesAgo)) + } +} + +var _ = Describe("Webhook Handlers", func() { var ( - mockCtrl *gomock.Controller - mockClient *clientmocks.MockClient - mockOCMClient *webhookreceivermock.MockOCMClient - testHandler *WebhookRHOBSReceiverHandler - server *ghttp.Server - testAlertFiring template.Alert - testAlertResolved template.Alert - testMFN oav1alpha1.ManagedFleetNotification - testLimitedSupportMFN oav1alpha1.ManagedFleetNotification - testMFNR oav1alpha1.ManagedFleetNotificationRecord - testMFNRWithStatus oav1alpha1.ManagedFleetNotificationRecord - mockStatusWriter *clientmocks.MockStatusWriter - serviceLog *ocm.ServiceLog - limitedSupportReason *cmv1.LimitedSupportReason + mockCtrl *gomock.Controller + mockClient *clientmocks.MockClient + mockOCMClient *webhookreceivermock.MockOCMClient + testHandler *WebhookRHOBSReceiverHandler + server *ghttp.Server + testAlertFiring template.Alert + testAlertResolved template.Alert + mockStatusWriter *clientmocks.MockStatusWriter + serviceLog *ocm.ServiceLog + limitedSupportReason *cmv1.LimitedSupportReason ) BeforeEach(func() { @@ -63,10 +94,6 @@ var _ = Describe("WebhookRHOBSReceiverHandler HTTP Tests", func() { } testAlertFiring = testconst.NewTestAlert(false, true) testAlertResolved = testconst.NewTestAlert(true, true) - testMFN = testconst.NewManagedFleetNotification(false) - testLimitedSupportMFN = testconst.NewManagedFleetNotification(true) - testMFNR = testconst.NewManagedFleetNotificationRecord() - testMFNRWithStatus = testconst.NewManagedFleetNotificationRecordWithStatus() serviceLog = testconst.NewTestServiceLog( ocm.ServiceLogActivePrefix+": "+testconst.ServiceLogSummary, testconst.ServiceLogFleetDesc, @@ -74,287 +101,400 @@ var _ = Describe("WebhookRHOBSReceiverHandler HTTP Tests", func() { testconst.TestNotification.Severity, "", testconst.TestNotification.References) - limitedSupportReason, _ = cmv1.NewLimitedSupportReason().Summary(testLimitedSupportMFN.Spec.FleetNotification.Summary).Details(testLimitedSupportMFN.Spec.FleetNotification.NotificationMessage).DetectionType(cmv1.DetectionTypeManual).Build() + limitedSupportManagedFleetNotification := testconst.NewManagedFleetNotification(true) + limitedSupportReason, _ = cmv1.NewLimitedSupportReason().Summary(limitedSupportManagedFleetNotification.Spec.FleetNotification.Summary).Details(limitedSupportManagedFleetNotification.Spec.FleetNotification.NotificationMessage).DetectionType(cmv1.DetectionTypeManual).Build() }) AfterEach(func() { server.Close() }) - Context("When calling updateManagedFleetNotificationRecord", func() { - Context("When the fleet record and record and status don't exist yet", func() { - It("Creates it and sets the status", func() { - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{ - Group: oav1alpha1.GroupVersion.Group, Resource: "ManagedFleetNotificationRecord"}, testconst.TestManagedClusterID), - ), - // Create the MFNR - mockClient.EXPECT().Create(gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mfnr *oav1alpha1.ManagedFleetNotificationRecord, co ...client.CreateOption) error { - Expect(mfnr.Name).To(Equal(testconst.TestManagedClusterID)) - return nil - }), - // Update status for the handled alert - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.updateManagedFleetNotificationRecord(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) + Context("NewWebhookRHOBSReceiverHandler.processAlert", func() { + Context("Alert is invalid", func() { + It("Reports error if alert does not have alertname label", func() { + delete(testAlertFiring.Labels, "alertname") + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) }) - }) - Context("When the record and status already exists", func() { - It("Updates the existing record and updates the status for the notification type", func() { - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus), - - // Update status for the handled alert - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.updateManagedFleetNotificationRecord(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) + It("Reports error if alert does not have managed_notification_template label", func() { + delete(testAlertFiring.Labels, "managed_notification_template") + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) + }) + It("Reports error if alert does not have send_managed_notification label", func() { + delete(testAlertResolved.Labels, "send_managed_notification") + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).Should(HaveOccurred()) }) }) - }) - - Context("When processing a firing alert", func() { - Context("When the MFN of type limited support for a firing alert", func() { - It("Sends limited support", func() { - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus), - // Send limited support - mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).Return(nil), + Context("Alert is valid", func() { + var managedFleetNotification *ocmagentv1alpha1.ManagedFleetNotification - // Fetch the MFNR and update it's status - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus), + BeforeEach(func() { + defaultManagedFleetNotification := testconst.NewManagedFleetNotification(false) + defaultManagedFleetNotification.Spec.FleetNotification.ResendWait = 1 + managedFleetNotification = &defaultManagedFleetNotification - // Update status for the handled alert - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.processFiringAlert(testAlertFiring, &testLimitedSupportMFN) - Expect(err).ShouldNot(HaveOccurred()) - }) - Context("When the MFN of type limited support for a firing alert and a previous firing notification hasn't resolved yet", func() { - It("Doesn't re-send limited support", func() { - // Increment firing status (firing = 1 and resolved = 0) - _, err := testMFNRWithStatus.UpdateNotificationRecordItem(testconst.TestNotificationName, testconst.TestHostedClusterID, true) - Expect(err).ShouldNot(HaveOccurred()) - - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus) - // Return right after as there was already a LS sent that didn't resolve yet - - err = testHandler.processFiringAlert(testAlertFiring, &testLimitedSupportMFN) - Expect(err).ShouldNot(HaveOccurred()) - }) + mockClient.EXPECT().Get(gomock.Any(), client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: managedFleetNotification.ObjectMeta.Name, + }, gomock.Any()).DoAndReturn( + func(ctx context.Context, key client.ObjectKey, res *ocmagentv1alpha1.ManagedFleetNotification, opts ...client.GetOption) error { + if managedFleetNotification == nil { + return kerrors.NewNotFound(schema.GroupResource{}, key.Name) + } else { + *res = *managedFleetNotification + return nil + } + }).MinTimes(1) }) - }) - Context("When the MFN of type limited support for a firing alert", func() { - It("Removes no limited support if none exist", func() { - gomock.InOrder( - // Get limited support reasons, returns empty so no limited supports will be removed - mockOCMClient.EXPECT().GetLimitedSupportReasons(testconst.TestHostedClusterID).Return([]*cmv1.LimitedSupportReason{}, nil), + It("Should report an error if there is no ManagedFleetNotification", func() { + managedFleetNotification = nil - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus), - - // Update status for the handled alert - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.processAlert(testAlertResolved, &testLimitedSupportMFN) - Expect(err).ShouldNot(HaveOccurred()) + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) }) - It("Removes limited support if it was previously set", func() { - // This reason has an ID which is used to test deleting it - limitedSupportReason, _ = cmv1.NewLimitedSupportReason().Summary(testMFN.Spec.FleetNotification.Summary).Details(testMFN.Spec.FleetNotification.NotificationMessage).ID("1234").DetectionType(cmv1.DetectionTypeManual).Build() - gomock.InOrder( - // LS reasons are fetched - mockOCMClient.EXPECT().GetLimitedSupportReasons(testconst.TestHostedClusterID).Return([]*cmv1.LimitedSupportReason{limitedSupportReason}, nil), - // LS reason matching for the MFN is removed - mockOCMClient.EXPECT().RemoveLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason.ID()).Return(nil), + Context("There is a ManagedFleetNotification", func() { + var managedFleetNotificationRecord *ocmagentv1alpha1.ManagedFleetNotificationRecord + var updatedNotificationRecordItems []ocmagentv1alpha1.NotificationRecordItem + var updateNotificationRecordError error - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus), + BeforeEach(func() { + defaultManagedFleetNotificationRecord := testconst.NewManagedFleetNotificationRecordWithStatus() + managedFleetNotificationRecord = &defaultManagedFleetNotificationRecord + updatedNotificationRecordItems = nil + updateNotificationRecordError = nil + + // Fetch the ManagedFleetNotificationRecord + mockClient.EXPECT().Get(gomock.Any(), client.ObjectKey{ + Namespace: OCMAgentNamespaceName, + Name: managedFleetNotificationRecord.ObjectMeta.Name, + }, gomock.Any()).DoAndReturn( + func(ctx context.Context, key client.ObjectKey, res *ocmagentv1alpha1.ManagedFleetNotificationRecord, opts ...client.GetOption) error { + if managedFleetNotificationRecord == nil { + return kerrors.NewNotFound(schema.GroupResource{}, key.Name) + } else { + *res = *managedFleetNotificationRecord + return nil + } + }).AnyTimes() // Update status for the handled alert - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.processAlert(testAlertResolved, &testLimitedSupportMFN) - Expect(err).ShouldNot(HaveOccurred()) - }) - }) - - Context("When a managed fleet notification record does exist", func() { - Context("And doesn't have a Management Cluster in the status", func() { - BeforeEach(func() { - testMFNR.Status.ManagementCluster = "" - testMFNR.Status.NotificationRecordByName = []oav1alpha1.NotificationRecordByName{} - }) - It("Updates the status", func() { - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), - // Send the SL - mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil), - - // Update SL sent status - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNRWithStatus), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.processAlert(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) - }) - }) - Context("When a notification record doesn't exist", func() { - It("Creates one", func() { - // Let's add a notification record, but named differently to the one we want, - // so we can verify the handler doesn't try and use it - testMFNR.Status.NotificationRecordByName = []oav1alpha1.NotificationRecordByName{ - { - NotificationName: "dummy-name", - ResendWait: 24, - NotificationRecordItems: []oav1alpha1.NotificationRecordItem{}, + mockClient.EXPECT().Status().Return(mockStatusWriter).AnyTimes() + mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( + func(ctx context.Context, obj client.Object, opts ...client.SubResourceUpdateOption) error { + updatedManagedFleetNotificationRecord, _ := obj.(*ocmagentv1alpha1.ManagedFleetNotificationRecord) + assertRecordMetadata(updatedManagedFleetNotificationRecord) + assertRecordStatus(updatedManagedFleetNotificationRecord) + updatedNotificationRecordItems = append(updatedNotificationRecordItems, updatedManagedFleetNotificationRecord.Status.NotificationRecordByName[0].NotificationRecordItems[0]) + + return updateNotificationRecordError }, - } - testMFNR.Status.ManagementCluster = testconst.TestManagedClusterID - - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), + ).AnyTimes() + }) - // Send the SL - mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil), + Context("There is no ManagedFleetNotificationRecord", func() { + BeforeEach(func() { + managedFleetNotificationRecord = nil - // Update status (create the record item) - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), + mockClient.EXPECT().Create(gomock.Any(), gomock.Any()).DoAndReturn( + func(ctx context.Context, obj client.Object, opts ...client.CreateOption) error { + createdManagedFleetNotificationRecord, _ := obj.(*ocmagentv1alpha1.ManagedFleetNotificationRecord) + assertRecordMetadata(createdManagedFleetNotificationRecord) + Expect(len(createdManagedFleetNotificationRecord.Status.NotificationRecordByName)).To(Equal(0)) - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mfnr *oav1alpha1.ManagedFleetNotificationRecord, co ...client.UpdateOptions) error { - Expect(len(mfnr.Status.NotificationRecordByName)).To(Equal(2)) - Expect(mfnr.Status.NotificationRecordByName[1].NotificationRecordItems[0].FiringNotificationSentCount).To(Equal(1)) - Expect(mfnr.Status.NotificationRecordByName[1].NotificationRecordItems[0].HostedClusterID).To(Equal(testconst.TestHostedClusterID)) return nil - }), - ) - err := testHandler.processAlert(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) - }) - }) - Context("When a notification record item for the hosted cluster already exists", func() { - var testTime = &metav1.Time{Time: time.Now().Add(time.Duration(-48) * time.Hour)} - It("Updates the existing one", func() { - testMFNR.Status.NotificationRecordByName = []oav1alpha1.NotificationRecordByName{ - { - NotificationName: testconst.TestNotificationName, - ResendWait: 24, - NotificationRecordItems: []oav1alpha1.NotificationRecordItem{ - { - HostedClusterID: testconst.TestHostedClusterID, - FiringNotificationSentCount: 1, - LastTransitionTime: testTime, - }, }, - }, - } - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), - // Send the SL - mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil), - // Update existing MFNR item - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mfnr *oav1alpha1.ManagedFleetNotificationRecord, co ...client.UpdateOptions) error { - Expect(mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].FiringNotificationSentCount).To(Equal(2)) - Expect(mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].HostedClusterID).To(Equal(testconst.TestHostedClusterID)) - Expect(mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].LastTransitionTime.After(testTime.Time)).To(BeTrue()) - return nil - }), - ) - err := testHandler.processAlert(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) - }) - }) - Context("When a notification record item for a hosted cluster does not exist", func() { - It("Creates one", func() { - testMFNR.Status.NotificationRecordByName = []oav1alpha1.NotificationRecordByName{ - { - NotificationName: testconst.TestNotificationName, - ResendWait: 24, - NotificationRecordItems: []oav1alpha1.NotificationRecordItem{}, - }, - } - testMFNR.Status.ManagementCluster = testconst.TestManagedClusterID - - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), - // Send the SL - mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil), - - // Re-fetch the MFNR for the status update - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).DoAndReturn( - func(ctx context.Context, mfnr *oav1alpha1.ManagedFleetNotificationRecord, co ...client.UpdateOptions) error { - Expect(mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].FiringNotificationSentCount).To(Equal(1)) - Expect(mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].HostedClusterID).To(Equal(testconst.TestHostedClusterID)) - return nil - }), - ) - err := testHandler.processAlert(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) + ) + }) + Context("When notifications are of 'limited support' type", func() { + BeforeEach(func() { + managedFleetNotification.Spec.FleetNotification.LimitedSupport = true + }) + It("Sends limited support when processing a firing alert", func() { + // Send limited support + mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 1, 0, 0) + }) + It("Does nothing when processing a resolving alert", func() { + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 0, 0, -1) + }) + }) + Context("When notifications are of 'service log' type", func() { + It("Sends service log when processing a firing alert", func() { + // Send service log + mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 1, 0, 0) + }) + }) }) - }) - Context("When a SL has been sent for the alert recently", func() { - It("Does not re-send", func() { - // Set a notification record item with a last sent time within the 24 hour window of the notification record - testMFNR.Status.NotificationRecordByName = []oav1alpha1.NotificationRecordByName{ - { - NotificationName: testconst.TestNotificationName, - ResendWait: 24, - NotificationRecordItems: []oav1alpha1.NotificationRecordItem{ - { - HostedClusterID: testconst.TestHostedClusterID, - FiringNotificationSentCount: 1, - LastTransitionTime: &metav1.Time{Time: time.Now().Add(time.Duration(-1) * time.Hour)}, - }, - }, - }, - } - testMFN.Spec.FleetNotification.ResendWait = 24 - gomock.InOrder( - // Fetch the MFNR - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testMFNR), - ) - - err := testHandler.processAlert(testAlertFiring, &testMFN) - Expect(err).ShouldNot(HaveOccurred()) + + Context("There is a ManagedFleetNotificationRecord", func() { + Context("When notifications are of 'limited support' type", func() { + BeforeEach(func() { + managedFleetNotification.Spec.FleetNotification.LimitedSupport = true + }) + Context("When processing a firing alert", func() { + It("Sends limited support when there is a ManagedFleetNotificationRecord without status", func() { + managedFleetNotificationRecord.Status.NotificationRecordByName = nil + + // Send limited support + mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 1, 0, 0) + }) + It("Sends limited support when there is an empty ManagedFleetNotificationRecord status", func() { + // Send limited support + mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 1, 0, 0) + }) + It("Sends limited support when status ManagedFleetNotificationRecord counters are equal and out of the no-resend time window", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 42, 90) + + // Send limited support + mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 42, 0) + }) + It("Does nothing if the limited support cannot be sent", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 42, 90) + + // Send limited support + mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).Return(errors.New("cannot be put in LS")) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(2)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 42, 0) + assertRecordItem(&updatedNotificationRecordItems[1], 42, 42, 90) + }) + It("Does nothing if updating ManagedFleetNotificationRecord status is in error", func() { + updateNotificationRecordError = kerrors.NewInternalError(fmt.Errorf("a fake error")) + + initItemInRecord(managedFleetNotificationRecord, 42, 42, 90) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 42, 0) + }) + It("Does nothing if updating ManagedFleetNotificationRecord status is in conflict", func() { + updateNotificationRecordError = kerrors.NewConflict(schema.GroupResource{}, managedFleetNotificationRecord.Name, fmt.Errorf("a fake error")) + + initItemInRecord(managedFleetNotificationRecord, 42, 42, 90) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(5)) + for k := 0; k < 5; k++ { + assertRecordItem(&updatedNotificationRecordItems[k], 43, 42, 0) + } + }) + It("Does nothing when status ManagedFleetNotificationRecord counters are equal and inside the no-resend time window", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 42, 30) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 42, 42, 30) + }) + It("Does nothing when the status ManagedFleetNotificationRecord firing counter is already bigger than the resolved counter", func() { + initItemInRecord(managedFleetNotificationRecord, 43, 42, 90) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 42, 90) + }) + Context("2 alerts are received", func() { + It("Sends limited support only once even if the time window is null", func() { + managedFleetNotification.Spec.FleetNotification.ResendWait = 0 + initItemInRecord(managedFleetNotificationRecord, 42, 42, 90) + + // Send limited support + mockOCMClient.EXPECT().SendLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason).DoAndReturn( + func(hcid string, lsr *cmv1.LimitedSupportReason) error { + initItemInRecord(managedFleetNotificationRecord, 43, 42, 0) + return nil + }, + ) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + err = testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(2)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 42, 0) + assertRecordItem(&updatedNotificationRecordItems[1], 43, 42, 0) + }) + }) + }) + Context("When processing a resolving alert", func() { + It("Does nothing when status ManagedFleetNotificationRecord counters are already equal", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 42, 90) + + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 42, 42, 90) + }) + It("Removes limited support when the status ManagedFleetNotificationRecord firing counter is bigger than the resolved counter", func() { + initItemInRecord(managedFleetNotificationRecord, 43, 42, 0) + + gomock.InOrder( + // LS reasons are fetched + mockOCMClient.EXPECT().GetLimitedSupportReasons(testconst.TestHostedClusterID).Return([]*cmv1.LimitedSupportReason{limitedSupportReason}, nil), + // LS reason matching for the MFN is removed + mockOCMClient.EXPECT().RemoveLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason.ID()).Return(nil), + ) + + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 43, 0) + }) + It("Does nothing if the limited support cannot be removed", func() { + initItemInRecord(managedFleetNotificationRecord, 43, 42, 90) + + gomock.InOrder( + // LS reasons are fetched + mockOCMClient.EXPECT().GetLimitedSupportReasons(testconst.TestHostedClusterID).Return([]*cmv1.LimitedSupportReason{limitedSupportReason}, nil), + // LS reason matching for the MFN cannot be removed + mockOCMClient.EXPECT().RemoveLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason.ID()).Return(errors.New("cannot be removed from LS")), + ) + + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(2)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 43, 90) + assertRecordItem(&updatedNotificationRecordItems[1], 43, 42, 90) + }) + It("Removes limited support when the status ManagedFleetNotificationRecord firing counter is way bigger than the resolved counter", func() { + initItemInRecord(managedFleetNotificationRecord, 43, 0, 10) + + gomock.InOrder( + // LS reasons are fetched + mockOCMClient.EXPECT().GetLimitedSupportReasons(testconst.TestHostedClusterID).Return([]*cmv1.LimitedSupportReason{limitedSupportReason}, nil), + // LS reason matching for the MFN is removed + mockOCMClient.EXPECT().RemoveLimitedSupport(testconst.TestHostedClusterID, limitedSupportReason.ID()).Return(nil), + ) + + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 43, 10) + }) + }) + }) + + Context("When notifications are of 'service log' type", func() { + Context("When processing a firing alert", func() { + It("Sends service log when there is a ManagedFleetNotificationRecord without status", func() { + managedFleetNotificationRecord.Status.NotificationRecordByName = nil + + // Send service log + mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 1, 0, 0) + }) + It("Sends service log when status ManagedFleetNotificationRecord counters are equal and out of the no-resend time window", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 0, 90) + + // Send service log + mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(nil) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 0, 0) + }) + It("Does nothing if the service log cannot be sent", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 0, 90) + + // Send service log + mockOCMClient.EXPECT().SendServiceLog(serviceLog).Return(errors.New("cannot send SL")) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).Should(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(2)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 0, 0) + assertRecordItem(&updatedNotificationRecordItems[1], 42, 0, 90) + }) + It("Does nothing when inside the no-resend time window", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 0, 30) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(1)) + assertRecordItem(&updatedNotificationRecordItems[0], 42, 0, 30) + }) + Context("2 alerts are received", func() { + It("Sends service log once even if the time window is null", func() { + managedFleetNotification.Spec.FleetNotification.ResendWait = 0 + initItemInRecord(managedFleetNotificationRecord, 42, 0, 90) + + // Send service log once + mockOCMClient.EXPECT().SendServiceLog(serviceLog).DoAndReturn( + func(sl *ocm.ServiceLog) error { + initItemInRecord(managedFleetNotificationRecord, 43, 0, 0) + return nil + }, + ) + + err := testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + err = testHandler.processAlert(testAlertFiring, true) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(2)) + assertRecordItem(&updatedNotificationRecordItems[0], 43, 0, 0) + assertRecordItem(&updatedNotificationRecordItems[1], 43, 0, 0) + }) + }) + }) + Context("When processing a resolving alert", func() { + It("Does nothing", func() { + initItemInRecord(managedFleetNotificationRecord, 42, 0, 90) + + err := testHandler.processAlert(testAlertResolved, false) + Expect(err).ShouldNot(HaveOccurred()) + Expect(len(updatedNotificationRecordItems)).To(Equal(0)) + }) + }) + }) }) }) }) }) }) -var _ = Describe("WebhookRHOBSReceiverHandler Additional Tests", func() { +var _ = Describe("Webhook Handlers Additional Tests", func() { var ( mockCtrl *gomock.Controller mockClient *clientmocks.MockClient @@ -375,29 +515,6 @@ var _ = Describe("WebhookRHOBSReceiverHandler Additional Tests", func() { mockCtrl.Finish() }) - Context("Constructor Tests", func() { - It("should create a new WebhookRHOBSReceiverHandler with valid parameters", func() { - handler := NewWebhookRHOBSReceiverHandler(mockClient, mockOCMClient) - Expect(handler).ToNot(BeNil()) - Expect(handler.c).To(Equal(mockClient)) - Expect(handler.ocm).To(Equal(mockOCMClient)) - }) - - It("should create a new WebhookRHOBSReceiverHandler with nil client", func() { - handler := NewWebhookRHOBSReceiverHandler(nil, mockOCMClient) - Expect(handler).ToNot(BeNil()) - Expect(handler.c).To(BeNil()) - Expect(handler.ocm).To(Equal(mockOCMClient)) - }) - - It("should create a new WebhookRHOBSReceiverHandler with nil OCM client", func() { - handler := NewWebhookRHOBSReceiverHandler(mockClient, nil) - Expect(handler).ToNot(BeNil()) - Expect(handler.c).To(Equal(mockClient)) - Expect(handler.ocm).To(BeNil()) - }) - }) - Context("HTTP Handler Tests", func() { var ( responseRecorder *httptest.ResponseRecorder @@ -475,7 +592,7 @@ var _ = Describe("WebhookRHOBSReceiverHandler Additional Tests", func() { mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, validMFN) mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.NewManagedFleetNotificationRecordWithStatus()) mockOCMClient.EXPECT().SendServiceLog(gomock.Any()).Return(nil) - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.NewManagedFleetNotificationRecordWithStatus()) + //mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.NewManagedFleetNotificationRecordWithStatus()) mockClient.EXPECT().Status().Return(mockStatusWriter) mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) @@ -486,192 +603,6 @@ var _ = Describe("WebhookRHOBSReceiverHandler Additional Tests", func() { }) }) - Context("processAMReceiver Tests", func() { - It("should handle empty alerts slice", func() { - alertData := AMReceiverData{Alerts: []template.Alert{}} - response := testHandler.processAMReceiver(alertData, context.Background()) - - Expect(response.Status).To(Equal("ok")) - Expect(response.Code).To(Equal(http.StatusOK)) - Expect(response.Error).To(BeNil()) - }) - - It("should return error when ManagedFleetNotification is not found", func() { - alert := testconst.NewTestAlert(false, true) - alertData := AMReceiverData{Alerts: []template.Alert{alert}} - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{}, "not-found")) - - response := testHandler.processAMReceiver(alertData, context.Background()) - - Expect(response.Status).To(ContainSubstring("unable to find ManagedFleetNotification")) - Expect(response.Code).To(Equal(http.StatusInternalServerError)) - Expect(response.Error).ToNot(BeNil()) - }) - - It("should skip invalid alerts", func() { - invalidAlert := template.Alert{ - Labels: map[string]string{ - "alertname": "TestAlert", - // Missing send_managed_notification and managed_notification_template labels - // which are required for valid alerts - }, - Status: "firing", - } - alertData := AMReceiverData{Alerts: []template.Alert{invalidAlert}} - - // Even invalid alerts trigger client.Get() with empty templateName before validation - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{}, "not-found")) - - response := testHandler.processAMReceiver(alertData, context.Background()) - - Expect(response.Status).To(ContainSubstring("unable to find ManagedFleetNotification")) - Expect(response.Code).To(Equal(http.StatusInternalServerError)) - Expect(response.Error).ToNot(BeNil()) - }) - }) - - Context("processAlert Tests", func() { - var ( - validMFN oav1alpha1.ManagedFleetNotification - firingAlert template.Alert - ) - - BeforeEach(func() { - validMFN = testconst.NewManagedFleetNotification(false) - firingAlert = testconst.NewTestAlert(false, true) - }) - - It("should return error for alert with unknown status", func() { - unknownAlert := firingAlert - unknownAlert.Status = "unknown" - - err := testHandler.processAlert(unknownAlert, &validMFN) - - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("unexpected status unknown")) - }) - - It("should return error for alert with empty status", func() { - emptyAlert := firingAlert - emptyAlert.Status = "" - - err := testHandler.processAlert(emptyAlert, &validMFN) - - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("unexpected status")) - }) - }) - - Context("Error Handling Tests", func() { - It("should handle OCM client errors in processFiringAlert", func() { - alert := testconst.NewTestAlert(false, true) - limitedSupportMFN := testconst.NewManagedFleetNotification(true) - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.NewManagedFleetNotificationRecordWithStatus()) - mockOCMClient.EXPECT().SendLimitedSupport(gomock.Any(), gomock.Any()).Return(errors.New("OCM API error")) - - err := testHandler.processFiringAlert(alert, &limitedSupportMFN) - - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("OCM API error")) - }) - - It("should handle Kubernetes client errors in updateManagedFleetNotificationRecord", func() { - alert := testconst.NewTestAlert(false, true) - mfn := testconst.NewManagedFleetNotification(false) - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("k8s client error")) - - err := testHandler.updateManagedFleetNotificationRecord(alert, &mfn) - - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("k8s client error")) - }) - - It("should handle status update errors", func() { - alert := testconst.NewTestAlert(false, true) - mfn := testconst.NewManagedFleetNotification(false) - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr) - mockClient.EXPECT().Status().Return(mockStatusWriter) - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(errors.New("status update error")) - - err := testHandler.updateManagedFleetNotificationRecord(alert, &mfn) - - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("status update error")) - }) - }) - - Context("Edge Cases Tests", func() { - It("should handle alerts with missing required labels", func() { - alert := template.Alert{ - Labels: map[string]string{ - // Missing alertname, managed_notification_template, send_managed_notification - "some_other_label": "value", - }, - Status: "firing", - } - - // Even invalid alerts trigger client.Get() with empty templateName before validation - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{}, "not-found")) - - response := testHandler.processAMReceiver(AMReceiverData{Alerts: []template.Alert{alert}}, context.Background()) - - Expect(response.Status).To(ContainSubstring("unable to find ManagedFleetNotification")) - Expect(response.Code).To(Equal(http.StatusInternalServerError)) - }) - - It("should handle nil ManagedFleetNotification", func() { - alert := testconst.NewTestAlert(false, true) - - // Nil ManagedFleetNotification will cause a panic when accessing its fields, which is expected - defer func() { - r := recover() - Expect(r).ToNot(BeNil()) - Expect(fmt.Sprintf("%v", r)).To(ContainSubstring("runtime error: invalid memory address or nil pointer dereference")) - }() - - _ = testHandler.processAlert(alert, nil) - - // This line should not be reached due to panic - Fail("Expected panic for nil ManagedFleetNotification") - }) - - It("should handle context cancellation", func() { - ctx, cancel := context.WithCancel(context.Background()) - cancel() // Cancel immediately - - alert := testconst.NewTestAlert(false, true) - alertData := AMReceiverData{Alerts: []template.Alert{alert}} - - // Even with cancelled context, the alert is valid so it will try to process it - validMFN := testconst.NewManagedFleetNotification(false) - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - - gomock.InOrder( - // First client call to get ManagedFleetNotification - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, validMFN), - // Then check if firing can be sent - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr), - // Send service log - mockOCMClient.EXPECT().SendServiceLog(gomock.Any()).Return(nil), - // Update status - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - response := testHandler.processAMReceiver(alertData, ctx) - - // Should still process but with cancelled context - Expect(response).ToNot(BeNil()) - Expect(response.Status).To(Equal("ok")) - }) - }) - Context("JSON Error Handling Tests", func() { It("should handle JSON encoding errors in response", func() { // This test simulates a scenario where JSON encoding fails @@ -693,14 +624,11 @@ var _ = Describe("WebhookRHOBSReceiverHandler Additional Tests", func() { req := httptest.NewRequest("POST", "/webhook", strings.NewReader(malformedJSON)) responseRecorder := httptest.NewRecorder() - // The JSON parses successfully but creates an invalid alert (missing required labels) - // Even invalid alerts trigger client.Get() with empty templateName before validation - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{}, "not-found")) - testHandler.ServeHTTP(responseRecorder, req) - // The alert fails to find ManagedFleetNotification due to missing template name - Expect(responseRecorder.Code).To(Equal(http.StatusInternalServerError)) + // Error code 200 is returned as errors returned by NewWebhookRHOBSReceiverHandler.processAlert are not propagated to HTTP response + // (same behavior than the WebhookReceiverHandler handler) + Expect(responseRecorder.Code).To(Equal(http.StatusOK)) }) It("should handle JSON with unexpected structure", func() { @@ -714,127 +642,6 @@ var _ = Describe("WebhookRHOBSReceiverHandler Additional Tests", func() { Expect(responseRecorder.Code).To(Equal(http.StatusOK)) }) }) - - Context("Retry Logic Tests", func() { - It("should retry on conflict errors", func() { - alert := testconst.NewTestAlert(false, true) - mfn := testconst.NewManagedFleetNotification(false) - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - - gomock.InOrder( - // First attempt fails with conflict - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewConflict(schema.GroupResource{}, "conflict", nil)), - - // Second attempt succeeds - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.updateManagedFleetNotificationRecord(alert, &mfn) - - Expect(err).ToNot(HaveOccurred()) - }) - - It("should retry on AlreadyExists errors", func() { - alert := testconst.NewTestAlert(false, true) - mfn := testconst.NewManagedFleetNotification(false) - - gomock.InOrder( - // First attempt fails with AlreadyExists - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{}, "not-found")), - mockClient.EXPECT().Create(gomock.Any(), gomock.Any()).Return(kerrors.NewAlreadyExists(schema.GroupResource{}, "already-exists")), - - // Second attempt succeeds - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, testconst.NewManagedFleetNotificationRecordWithStatus()), - mockClient.EXPECT().Status().Return(mockStatusWriter), - mockStatusWriter.EXPECT().Update(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil), - ) - - err := testHandler.updateManagedFleetNotificationRecord(alert, &mfn) - - Expect(err).ToNot(HaveOccurred()) - }) - }) - - Context("firingCanBeSent Tests", func() { - var ( - alert template.Alert - mfn oav1alpha1.ManagedFleetNotification - ) - - BeforeEach(func() { - alert = testconst.NewTestAlert(false, true) - mfn = testconst.NewManagedFleetNotification(false) - }) - - It("should return true when no ManagedFleetNotificationRecord exists", func() { - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(kerrors.NewNotFound(schema.GroupResource{}, "not-found")) - - result := testHandler.firingCanBeSent(alert, &mfn) - - Expect(result).To(BeTrue()) - }) - - It("should return true when no NotificationRecordItem exists", func() { - mfnr := testconst.NewManagedFleetNotificationRecord() - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr) - - result := testHandler.firingCanBeSent(alert, &mfn) - - Expect(result).To(BeTrue()) - }) - - It("should return true when LastTransitionTime is nil", func() { - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].LastTransitionTime = nil - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr) - - result := testHandler.firingCanBeSent(alert, &mfn) - - Expect(result).To(BeTrue()) - }) - - It("should return false for limited support when previous firing didn't resolve", func() { - limitedSupportMFN := testconst.NewManagedFleetNotification(true) - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].FiringNotificationSentCount = 2 - mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].ResolvedNotificationSentCount = 1 - mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].LastTransitionTime = &metav1.Time{Time: time.Now()} - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr) - - result := testHandler.firingCanBeSent(alert, &limitedSupportMFN) - - Expect(result).To(BeFalse()) - }) - - It("should return false when within resend wait interval", func() { - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].LastTransitionTime = &metav1.Time{Time: time.Now()} - mfnr.Status.NotificationRecordByName[0].ResendWait = 24 // 24 hours - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr) - - result := testHandler.firingCanBeSent(alert, &mfn) - - Expect(result).To(BeFalse()) - }) - - It("should return true when past resend wait interval", func() { - mfnr := testconst.NewManagedFleetNotificationRecordWithStatus() - mfnr.Status.NotificationRecordByName[0].NotificationRecordItems[0].LastTransitionTime = &metav1.Time{Time: time.Now().Add(-25 * time.Hour)} - mfnr.Status.NotificationRecordByName[0].ResendWait = 24 // 24 hours - - mockClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).SetArg(2, mfnr) - - result := testHandler.firingCanBeSent(alert, &mfn) - - Expect(result).To(BeTrue()) - }) - }) }) // Mock Response Writer for testing JSON encoding errors