From d7bd2222c0a842dce8f07eb30b73173c11d7a8d5 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Thu, 19 Dec 2024 10:15:19 +0000
Subject: [PATCH] Update upstream alerts

---
 .../master/collector_prometheus_alerts.yaml   | 65 +------------
 .../master/lokistack_prometheus_alerts.yaml   | 18 ++++
 .../lokistack_prometheus_alerts.yaml          | 18 ++++
 .../lokistack_prometheus_alerts.yaml          | 18 ++++
 .../lokistack_prometheus_alerts.yaml          | 18 ++++
 .../60_lokistack_alerts.yaml                  | 21 +++++
 .../60_lokistack_alerts.yaml                  | 21 +++++
 .../60_collector_alerts.yaml                  | 93 +------------------
 .../60_lokistack_alerts.yaml                  | 21 +++++
 .../60_lokistack_alerts.yaml                  | 21 +++++
 10 files changed, 161 insertions(+), 153 deletions(-)

diff --git a/component/extracted_alerts/master/collector_prometheus_alerts.yaml b/component/extracted_alerts/master/collector_prometheus_alerts.yaml
index 1942d35..2d5cdf8 100644
--- a/component/extracted_alerts/master/collector_prometheus_alerts.yaml
+++ b/component/extracted_alerts/master/collector_prometheus_alerts.yaml
@@ -9,7 +9,7 @@ spec:
     rules:
     - alert: CollectorNodeDown
       annotations:
-        message: "Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod }} collector component for more than 10m."
+        description: "Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod }} collector component for more than 10m."
         summary: "Collector cannot be scraped"
       expr: |
         up{app_kubernetes_io_component = "collector", app_kubernetes_io_part_of = "cluster-logging"} == 0
@@ -17,70 +17,9 @@ spec:
       labels:
         service: collector
         severity: critical
-    - alert: CollectorHighErrorRate
-      annotations:
-        message: "{{ $value }}% of records have resulted in an error by {{ $labels.namespace }}/{{ $labels.pod }} collector component."
-        summary: "{{ $labels.namespace }}/{{ $labels.pod }} collector component errors are high"
-      expr: |
-        100 * (
-            collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-          /
-            collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-          ) > 0.001
-      for: 15m
-      labels:
-        service: collector
-        severity: critical
-    - alert: CollectorVeryHighErrorRate
-      annotations:
-        message: "{{ $value }}% of records have resulted in an error by {{ $labels.namespace }}/{{ $labels.pod }} collector component."
-        summary: "{{ $labels.namespace }}/{{ $labels.pod }} collector component errors are very high"
-      expr: |
-        100 * (
-            collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-          /
-            collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-          ) > 0.05
-      for: 15m
-      labels:
-        service: collector
-        severity: critical
-    - alert: ElasticsearchDeprecation
-      annotations:
-        message: "In Red Hat OpenShift Logging Operator 6.0, support for the Red Hat Elasticsearch Operator has been removed. Bug fixes and support are provided only through the end of the 5.9 lifecycle. As an alternative to the Elasticsearch Operator, you can use the Loki Operator instead."
-        summary: "Detected Elasticsearch as the in-cluster storage, which has been removed in 6.0 release"
-      expr: |
-        sum(kube_pod_labels{namespace="openshift-logging",label_component='elasticsearch'}) > 0
-      for: 5m
-      labels:
-        service: storage
-        severity: Warning
-        namespace: openshift-logging
-    - alert: FluentdDeprecation
-      annotations:
-        message: "In Red Hat OpenShift Logging Operator 6.0, support for Fluentd as a collector has been removed. Bug fixes and support are provided only through the end of the 5.9 lifecycle. As an alternative to Fluentd, you can use the Vector collector instead."
-        summary: "Detected Fluentd as the collector, which has been removed in a 6.0 release"
-      expr: |
-        sum(kube_pod_labels{namespace="openshift-logging", label_implementation='fluentd', label_app_kubernetes_io_managed_by="cluster-logging-operator"}) > 0
-      for: 5m
-      labels:
-        service: collector
-        severity: Warning
-        namespace: openshift-logging
-    - alert: KibanaDeprecation
-      annotations:
-        message: "In Red Hat OpenShift Logging Operator 6.0, support for Kibana as a data visualization dashboard has been removed. Bug fixes and support are provided only through the end of the 5.9 lifecycle. As an alternative to Kibana, you can use the Grafana Dashboard instead."
-        summary: "Detected Kibana as the log data visualization, which has been removed in the 6.0 release"
-      expr: |
-        sum(kube_pod_labels{namespace="openshift-logging",label_component='kibana'}) > 0
-      for: 5m
-      labels:
-        service: visualization
-        severity: Warning
-        namespace: openshift-logging
     - alert: DiskBufferUsage
       annotations:
-        message: "Collectors potentially consuming too much node disk, {{ $value }}% "
+        description: "Collectors potentially consuming too much node disk, {{ $value }}% "
         summary: "Detected consuming too much node disk on $labels.hostname host"
       expr: |
         (label_replace(sum by(hostname) (vector_buffer_byte_size{component_kind='sink', buffer_type='disk'}), 'instance', '$1', 'hostname', '(.*)') 
diff --git a/component/extracted_alerts/master/lokistack_prometheus_alerts.yaml b/component/extracted_alerts/master/lokistack_prometheus_alerts.yaml
index 15cc424..799c280 100644
--- a/component/extracted_alerts/master/lokistack_prometheus_alerts.yaml
+++ b/component/extracted_alerts/master/lokistack_prometheus_alerts.yaml
@@ -175,6 +175,24 @@ groups:
     for: 15m
     labels:
       severity: warning
+  - alert: LokiDiscardedSamplesWarning
+    annotations:
+      message: |-
+        Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+        Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+      summary: Loki is discarding samples during ingestion because they fail validation.
+      runbook_url: "[[ .RunbookURL]]#Loki-Discarded-Samples-Warning"
+    expr: |
+      sum by(namespace, tenant, reason) (
+        irate(loki_discarded_samples_total{
+          reason!="rate_limited",
+          reason!="per_stream_rate_limit",
+          reason!="stream_limit"}[2m])
+      )
+      > 0
+    for: 15m
+    labels:
+      severity: warning
   - alert: LokistackSchemaUpgradesRequired
     annotations:
       message: |-
diff --git a/component/extracted_alerts/release-5.6/lokistack_prometheus_alerts.yaml b/component/extracted_alerts/release-5.6/lokistack_prometheus_alerts.yaml
index f378c49..e0c49d6 100644
--- a/component/extracted_alerts/release-5.6/lokistack_prometheus_alerts.yaml
+++ b/component/extracted_alerts/release-5.6/lokistack_prometheus_alerts.yaml
@@ -175,3 +175,21 @@ groups:
     for: 15m
     labels:
       severity: warning
+  - alert: LokiDiscardedSamplesWarning
+    annotations:
+      message: |-
+        Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+        Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+      summary: Loki is discarding samples during ingestion because they fail validation.
+      runbook_url: "[[ .RunbookURL]]#Loki-Discarded-Samples-Warning"
+    expr: |
+      sum by(namespace, tenant, reason) (
+        irate(loki_discarded_samples_total{
+          reason!="rate_limited",
+          reason!="per_stream_rate_limit",
+          reason!="stream_limit"}[2m])
+      )
+      > 0
+    for: 15m
+    labels:
+      severity: warning
diff --git a/component/extracted_alerts/release-5.8/lokistack_prometheus_alerts.yaml b/component/extracted_alerts/release-5.8/lokistack_prometheus_alerts.yaml
index f378c49..e0c49d6 100644
--- a/component/extracted_alerts/release-5.8/lokistack_prometheus_alerts.yaml
+++ b/component/extracted_alerts/release-5.8/lokistack_prometheus_alerts.yaml
@@ -175,3 +175,21 @@ groups:
     for: 15m
     labels:
       severity: warning
+  - alert: LokiDiscardedSamplesWarning
+    annotations:
+      message: |-
+        Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+        Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+      summary: Loki is discarding samples during ingestion because they fail validation.
+      runbook_url: "[[ .RunbookURL]]#Loki-Discarded-Samples-Warning"
+    expr: |
+      sum by(namespace, tenant, reason) (
+        irate(loki_discarded_samples_total{
+          reason!="rate_limited",
+          reason!="per_stream_rate_limit",
+          reason!="stream_limit"}[2m])
+      )
+      > 0
+    for: 15m
+    labels:
+      severity: warning
diff --git a/component/extracted_alerts/release-5.9/lokistack_prometheus_alerts.yaml b/component/extracted_alerts/release-5.9/lokistack_prometheus_alerts.yaml
index 15cc424..799c280 100644
--- a/component/extracted_alerts/release-5.9/lokistack_prometheus_alerts.yaml
+++ b/component/extracted_alerts/release-5.9/lokistack_prometheus_alerts.yaml
@@ -175,6 +175,24 @@ groups:
     for: 15m
     labels:
       severity: warning
+  - alert: LokiDiscardedSamplesWarning
+    annotations:
+      message: |-
+        Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+        Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+      summary: Loki is discarding samples during ingestion because they fail validation.
+      runbook_url: "[[ .RunbookURL]]#Loki-Discarded-Samples-Warning"
+    expr: |
+      sum by(namespace, tenant, reason) (
+        irate(loki_discarded_samples_total{
+          reason!="rate_limited",
+          reason!="per_stream_rate_limit",
+          reason!="stream_limit"}[2m])
+      )
+      > 0
+    for: 15m
+    labels:
+      severity: warning
   - alert: LokistackSchemaUpgradesRequired
     annotations:
       message: |-
diff --git a/tests/golden/defaults/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/defaults/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
index 65a573e..4f6c7da 100644
--- a/tests/golden/defaults/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
+++ b/tests/golden/defaults/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
@@ -204,6 +204,27 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-logging
+        - alert: SYN_LokiDiscardedSamplesWarning
+          annotations:
+            message: |-
+              Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+              Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+            runbook_url: '[[ .RunbookURL]]#Loki-Discarded-Samples-Warning'
+            summary: Loki is discarding samples during ingestion because they fail
+              validation.
+          expr: |
+            sum by(namespace, tenant, reason) (
+              irate(loki_discarded_samples_total{
+                reason!="rate_limited",
+                reason!="per_stream_rate_limit",
+                reason!="stream_limit"}[2m])
+            )
+            > 0
+          for: 15m
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-logging
         - alert: SYN_LokistackSchemaUpgradesRequired
           annotations:
             message: |-
diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
index 65a573e..4f6c7da 100644
--- a/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
+++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
@@ -204,6 +204,27 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-logging
+        - alert: SYN_LokiDiscardedSamplesWarning
+          annotations:
+            message: |-
+              Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+              Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+            runbook_url: '[[ .RunbookURL]]#Loki-Discarded-Samples-Warning'
+            summary: Loki is discarding samples during ingestion because they fail
+              validation.
+          expr: |
+            sum by(namespace, tenant, reason) (
+              irate(loki_discarded_samples_total{
+                reason!="rate_limited",
+                reason!="per_stream_rate_limit",
+                reason!="stream_limit"}[2m])
+            )
+            > 0
+          for: 15m
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-logging
         - alert: SYN_LokistackSchemaUpgradesRequired
           annotations:
             message: |-
diff --git a/tests/golden/master/openshift4-logging/openshift4-logging/60_collector_alerts.yaml b/tests/golden/master/openshift4-logging/openshift4-logging/60_collector_alerts.yaml
index 19adca5..2c6ddb1 100644
--- a/tests/golden/master/openshift4-logging/openshift4-logging/60_collector_alerts.yaml
+++ b/tests/golden/master/openshift4-logging/openshift4-logging/60_collector_alerts.yaml
@@ -12,7 +12,7 @@ spec:
       rules:
         - alert: SYN_CollectorNodeDown
           annotations:
-            message: Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod
+            description: Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod
               }} collector component for more than 10m.
             summary: Collector cannot be scraped
           expr: |
@@ -23,97 +23,10 @@ spec:
             severity: critical
             syn: 'true'
             syn_component: openshift4-logging
-        - alert: SYN_CollectorHighErrorRate
-          annotations:
-            message: '{{ $value }}% of records have resulted in an error by {{ $labels.namespace
-              }}/{{ $labels.pod }} collector component.'
-            summary: '{{ $labels.namespace }}/{{ $labels.pod }} collector component
-              errors are high'
-          expr: |
-            100 * (
-                collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-              /
-                collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-              ) > 0.001
-          for: 15m
-          labels:
-            service: collector
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-logging
-        - alert: SYN_CollectorVeryHighErrorRate
-          annotations:
-            message: '{{ $value }}% of records have resulted in an error by {{ $labels.namespace
-              }}/{{ $labels.pod }} collector component.'
-            summary: '{{ $labels.namespace }}/{{ $labels.pod }} collector component
-              errors are very high'
-          expr: |
-            100 * (
-                collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-              /
-                collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"}
-              ) > 0.05
-          for: 15m
-          labels:
-            service: collector
-            severity: critical
-            syn: 'true'
-            syn_component: openshift4-logging
-        - alert: SYN_ElasticsearchDeprecation
-          annotations:
-            message: In Red Hat OpenShift Logging Operator 6.0, support for the Red
-              Hat Elasticsearch Operator has been removed. Bug fixes and support are
-              provided only through the end of the 5.9 lifecycle. As an alternative
-              to the Elasticsearch Operator, you can use the Loki Operator instead.
-            summary: Detected Elasticsearch as the in-cluster storage, which has been
-              removed in 6.0 release
-          expr: |
-            sum(kube_pod_labels{namespace="openshift-logging",label_component='elasticsearch'}) > 0
-          for: 5m
-          labels:
-            namespace: openshift-logging
-            service: storage
-            severity: Warning
-            syn: 'true'
-            syn_component: openshift4-logging
-        - alert: SYN_FluentdDeprecation
-          annotations:
-            message: In Red Hat OpenShift Logging Operator 6.0, support for Fluentd
-              as a collector has been removed. Bug fixes and support are provided
-              only through the end of the 5.9 lifecycle. As an alternative to Fluentd,
-              you can use the Vector collector instead.
-            summary: Detected Fluentd as the collector, which has been removed in
-              a 6.0 release
-          expr: |
-            sum(kube_pod_labels{namespace="openshift-logging", label_implementation='fluentd', label_app_kubernetes_io_managed_by="cluster-logging-operator"}) > 0
-          for: 5m
-          labels:
-            namespace: openshift-logging
-            service: collector
-            severity: Warning
-            syn: 'true'
-            syn_component: openshift4-logging
-        - alert: SYN_KibanaDeprecation
-          annotations:
-            message: In Red Hat OpenShift Logging Operator 6.0, support for Kibana
-              as a data visualization dashboard has been removed. Bug fixes and support
-              are provided only through the end of the 5.9 lifecycle. As an alternative
-              to Kibana, you can use the Grafana Dashboard instead.
-            summary: Detected Kibana as the log data visualization, which has been
-              removed in the 6.0 release
-          expr: |
-            sum(kube_pod_labels{namespace="openshift-logging",label_component='kibana'}) > 0
-          for: 5m
-          labels:
-            namespace: openshift-logging
-            service: visualization
-            severity: Warning
-            syn: 'true'
-            syn_component: openshift4-logging
         - alert: SYN_DiskBufferUsage
           annotations:
-            message: 'Collectors potentially consuming too much node disk, {{ $value
-              }}% '
+            description: 'Collectors potentially consuming too much node disk, {{
+              $value }}% '
             summary: Detected consuming too much node disk on $labels.hostname host
           expr: "(label_replace(sum by(hostname) (vector_buffer_byte_size{component_kind='sink',\
             \ buffer_type='disk'}), 'instance', '$1', 'hostname', '(.*)') \n/ on(instance)\
diff --git a/tests/golden/master/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/master/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
index 65a573e..4f6c7da 100644
--- a/tests/golden/master/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
+++ b/tests/golden/master/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
@@ -204,6 +204,27 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-logging
+        - alert: SYN_LokiDiscardedSamplesWarning
+          annotations:
+            message: |-
+              Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+              Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+            runbook_url: '[[ .RunbookURL]]#Loki-Discarded-Samples-Warning'
+            summary: Loki is discarding samples during ingestion because they fail
+              validation.
+          expr: |
+            sum by(namespace, tenant, reason) (
+              irate(loki_discarded_samples_total{
+                reason!="rate_limited",
+                reason!="per_stream_rate_limit",
+                reason!="stream_limit"}[2m])
+            )
+            > 0
+          for: 15m
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-logging
         - alert: SYN_LokistackSchemaUpgradesRequired
           annotations:
             message: |-
diff --git a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
index 65a573e..4f6c7da 100644
--- a/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
+++ b/tests/golden/multilineerr/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml
@@ -204,6 +204,27 @@ spec:
             severity: warning
             syn: 'true'
             syn_component: openshift4-logging
+        - alert: SYN_LokiDiscardedSamplesWarning
+          annotations:
+            message: |-
+              Loki in namespace {{ $labels.namespace }} is discarding samples in the "{{ $labels.tenant }}" tenant during ingestion.
+              Samples are discarded because of "{{ $labels.reason }}" at a rate of {{ .Value | humanize }} samples per second.
+            runbook_url: '[[ .RunbookURL]]#Loki-Discarded-Samples-Warning'
+            summary: Loki is discarding samples during ingestion because they fail
+              validation.
+          expr: |
+            sum by(namespace, tenant, reason) (
+              irate(loki_discarded_samples_total{
+                reason!="rate_limited",
+                reason!="per_stream_rate_limit",
+                reason!="stream_limit"}[2m])
+            )
+            > 0
+          for: 15m
+          labels:
+            severity: warning
+            syn: 'true'
+            syn_component: openshift4-logging
         - alert: SYN_LokistackSchemaUpgradesRequired
           annotations:
             message: |-