Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Disambiguate alert names #23

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions resources/prometheus/prometheus-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ spec:
summary: "Central container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` restarted more than 3 times."
description: "Central container `{{ $labels.pod }}/{{ $labels.container }}` in namespace `{{ $labels.namespace }}` has restarted more than 3 times during the last 10 minutes."
sop_url: "" # TODO: Add SOP
- alert: RHACSCentralDatabasePersistentVolumeFillingUp
- alert: RHACSCentralDatabasePersistentVolumeFillingUp (< 10% left)
expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="stackrox-db"} < 0.1
for: 5m
labels:
Expand All @@ -47,7 +47,7 @@ spec:
summary: "Central database storage in namespace `{{ $labels.namespace }}` is filing up."
description: "Central database storage in namespace `{{ $labels.namespace }}` is filling up for PVC `{{ $labels.persistentvolumeclaim }}`. Available storage quota is `{{ $value | humanizePercentage }}`."
sop_url: "" # TODO: Add SOP
- alert: RHACSCentralDatabasePersistentVolumeFillingUp
- alert: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left)
expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim="stackrox-db"} < 0.25 and predict_linear(kubelet_volume_stats_available_bytes{persistentvolumeclaim="stackrox-db"}[6h], 4 * 24 * 3600) < 0
for: 5m
labels:
Expand Down Expand Up @@ -190,7 +190,7 @@ spec:

- name: observability-operator
rules:
- alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
- alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left)
expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} < 0.1
for: 5m
labels:
Expand All @@ -200,7 +200,7 @@ spec:
description: "The Observability Operator's Prometheus storage in namespace `{{ $labels.namespace }}` is filling up for PVC `{{ $labels.persistentvolumeclaim }}`. Available storage quota is `{{ $value | humanizePercentage }}`."
sop_url: "" # TODO: Add SOP

- alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
- alert: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left)
expr: kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} / kubelet_volume_stats_capacity_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"} < 0.25 and predict_linear(kubelet_volume_stats_available_bytes{persistentvolumeclaim=~"managed-services-prometheus-kafka-prometheus-[0-9]"}[6h], 4 * 24 * 3600) < 0
for: 5m
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ tests:
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
exp_alerts: []
- eval_time: 100m
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left)
exp_alerts:
- exp_labels:
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left)
severity: warning
persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0
namespace: rhacs-observability
Expand All @@ -28,10 +28,10 @@ tests:
description: "The Observability Operator's Prometheus storage in namespace `rhacs-observability` is filling up for PVC `managed-services-prometheus-kafka-prometheus-0`. Available storage quota is `13.09%`. The volume is expected to fill up within 4 days based on linear extrapolation over the last 6 hours."
sop_url: ""
- eval_time: 110m
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left)
exp_alerts:
- exp_labels:
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (< 10% left)
severity: critical
persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0
namespace: rhacs-observability
Expand All @@ -40,7 +40,7 @@ tests:
description: "The Observability Operator's Prometheus storage in namespace `rhacs-observability` is filling up for PVC `managed-services-prometheus-kafka-prometheus-0`. Available storage quota is `3.32%`."
sop_url: ""
- exp_labels:
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp
alertname: ObservabilityOperatorPrometheusPersistentVolumeFillingUp (~4 days left)
severity: warning
persistentvolumeclaim: managed-services-prometheus-kafka-prometheus-0
namespace: rhacs-observability
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ tests:
alertname: RHACSCentralDatabasePersistentVolumeFillingUp
exp_alerts:
- exp_labels:
alertname: RHACSCentralDatabasePersistentVolumeFillingUp
alertname: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left)
severity: warning
persistentvolumeclaim: stackrox-db
namespace: rhacs-1234
Expand All @@ -31,7 +31,7 @@ tests:
alertname: RHACSCentralDatabasePersistentVolumeFillingUp
exp_alerts:
- exp_labels:
alertname: RHACSCentralDatabasePersistentVolumeFillingUp
alertname: RHACSCentralDatabasePersistentVolumeFillingUp (< 10% left)
severity: critical
persistentvolumeclaim: stackrox-db
namespace: rhacs-1234
Expand All @@ -40,7 +40,7 @@ tests:
description: "Central database storage in namespace `rhacs-1234` is filling up for PVC `stackrox-db`. Available storage quota is `3.32%`."
sop_url: ""
- exp_labels:
alertname: RHACSCentralDatabasePersistentVolumeFillingUp
alertname: RHACSCentralDatabasePersistentVolumeFillingUp (~4 days left)
severity: warning
persistentvolumeclaim: stackrox-db
namespace: rhacs-1234
Expand Down