From 746c0f2c924bcc328dca68582b98eb49e4d2ceb3 Mon Sep 17 00:00:00 2001 From: Jonathan Buch Date: Sun, 1 Sep 2024 14:11:40 +0200 Subject: [PATCH] Revert "grafana, use alertmanager v2 alerts api" * the prometheus v1 API rules at least provides the current state, as opposed to: * /api/ruler/grafana/api/v1/rules/ * /api/alertmanager/grafana/api/v2/alerts * /api/prometheus/grafana/api/v1/alerts * /api/v1/provisioning/alert-rules * /api/alerts * This reverts commit a2214be2e1fb66d17d5a0ebddbec1b4eb6fb2e31. --- pkg/connectors/grafana/api.go | 76 ++++++++++++++++++ pkg/connectors/grafana/connector.go | 118 ++++++++++++++++++++++------ 2 files changed, 172 insertions(+), 22 deletions(-) create mode 100644 pkg/connectors/grafana/api.go diff --git a/pkg/connectors/grafana/api.go b/pkg/connectors/grafana/api.go new file mode 100644 index 0000000..85b63f2 --- /dev/null +++ b/pkg/connectors/grafana/api.go @@ -0,0 +1,76 @@ +package grafana + +import "time" + +// https://raw.githubusercontent.com/grafana/grafana/main/pkg/services/ngalert/api/tooling/post.json + +type ruleResponse struct { + Status string `json:"status"` + Data ruleDiscovery `json:"data,omitempty"` +} + +type ruleDiscovery struct { + Groups []ruleGroup `json:"groups"` +} + +type ruleGroup struct { + Name string `json:"name"` + File string `json:"file"` + Rules []alertingRule `json:"rules"` +} + +type alertingRule struct { + State alertingState `json:"state"` + Name string `json:"name"` + ActiveAt string `json:"activeAt"` + Health string `json:"health"` + Annotations map[string]string `json:"annotations"` + Labels map[string]string `json:"labels,omitempty"` + Alerts []alert `json:"alerts,omitempty"` + Type string `json:"type"` +} + +type alert struct { + Labels map[string]string `json:"labels"` + Annotations map[string]string `json:"annotations"` + State string `json:"state"` + ActiveAt string `json:"activeAt"` + Value string `json:"value"` +} + +type alertingState = string + +const ( + alertingStatePending = "pending" + alertingStateFiring = "firing" + alertingStateInactive = "inactive" +) + +const ( + alertingStateAlerting = "alerting" + alertingStateNoData = "nodata" + alertingStateNormal = "normal" + alertingStateError = "error" +) + +// https://grafana.com/docs/grafana/latest/developers/http_api/alerting_provisioning/#provisioned-alert-rules + +type provisionedAlertRule struct { + Annotations map[string]string `json:"annotations"` + Condition string `json:"condition"` + ExecErrState string `json:"execErrState"` + Uid int64 `json:"id"` + IsPaused bool `json:"isPaused"` + Labels map[string]string `json:"labels"` + NoDataState string `json:"noDataState"` + For time.Duration `json:"for"` + Title string `json:"title"` + RuleGroup string `json:"ruleGroup"` +} + +const ( + noDataStateNoData = "NoData" + noDataStateOk = "OK" + execErrStateAlerting = "Alerting" + execErrStateError = "Error" +) diff --git a/pkg/connectors/grafana/connector.go b/pkg/connectors/grafana/connector.go index 412f2f2..35a61d6 100644 --- a/pkg/connectors/grafana/connector.go +++ b/pkg/connectors/grafana/connector.go @@ -1,18 +1,24 @@ package grafana import ( + "bytes" "context" + "encoding/json" "fmt" html "html/template" + "io" + "log/slog" + "net/http" + "strings" + "time" "github.com/synyx/tuwat/pkg/connectors" - "github.com/synyx/tuwat/pkg/connectors/alertmanager" "github.com/synyx/tuwat/pkg/connectors/common" ) type Connector struct { config Config - ac *alertmanager.Connector + client *http.Client } type Config struct { @@ -22,14 +28,7 @@ type Config struct { } func NewConnector(cfg *Config) *Connector { - alertmanagerConfig := &alertmanager.Config{ - Tag: cfg.Tag, - Cluster: cfg.Cluster, - HTTPConfig: cfg.HTTPConfig, - } - alertmanagerConfig.URL += "/api/alertmanager/grafana" - - c := &Connector{config: *cfg, ac: alertmanager.NewConnector(alertmanagerConfig)} + c := &Connector{config: *cfg, client: cfg.HTTPConfig.Client()} return c } @@ -39,27 +38,39 @@ func (c *Connector) Tag() string { } func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { - sourceAlerts, err := c.ac.Collect(ctx) + sourceAlertGroups, err := c.collectAlerts(ctx) if err != nil { return nil, err } var alerts []connectors.Alert - for _, alert := range sourceAlerts { - alert.Description = alert.Labels["rulename"] - alert.Details = alert.Labels["message"] - labels := map[string]string{ - "Hostname": alert.Labels["grafana_folder"], - "Contacts": alert.Labels["__contacts__"], + for _, sourceAlertGroup := range sourceAlertGroups { + rule := sourceAlertGroup.Rules[0] + sourceAlert := rule.Alerts[0] + + state := grafanaStateToState(sourceAlert.State) + if state == connectors.OK { + continue } - for k, v := range labels { - alert.Labels[k] = v + + labels := map[string]string{ + "Hostname": sourceAlert.Labels["grafana_folder"], + "Folder": sourceAlert.Labels["grafana_folder"], + "Alertname": sourceAlert.Labels["alertname"], + "Contacts": sourceAlert.Labels["__contacts__"], } - alert.Links = []html.HTML{ - html.HTML("🏠"), - html.HTML("🏠"), + alert := connectors.Alert{ + Labels: labels, + Start: parseTime(sourceAlert.ActiveAt), + State: state, + Description: rule.Name, + Details: rule.Annotations["message"], + Links: []html.HTML{ + html.HTML("🏠"), + html.HTML("🏠"), + }, } alerts = append(alerts, alert) @@ -68,6 +79,69 @@ func (c *Connector) Collect(ctx context.Context) ([]connectors.Alert, error) { return alerts, nil } +func grafanaStateToState(state string) connectors.State { + switch strings.ToLower(state) { + case alertingStateAlerting: + return connectors.Critical + case alertingStateNoData: + return connectors.Warning + default: + return connectors.OK + } +} + func (c *Connector) String() string { return fmt.Sprintf("Grafana (%s)", c.config.URL) } + +func (c *Connector) collectAlerts(ctx context.Context) ([]ruleGroup, error) { + res, err := c.get(ctx, "/api/prometheus/grafana/api/v1/rules") + if err != nil { + return nil, err + } + defer res.Body.Close() + + b, _ := io.ReadAll(res.Body) + buf := bytes.NewBuffer(b) + + decoder := json.NewDecoder(buf) + + var response ruleResponse + err = decoder.Decode(&response) + if err != nil { + slog.ErrorContext(ctx, "Cannot parse", + slog.String("url", c.config.URL), + slog.String("data", buf.String()), + slog.Any("status", res.StatusCode), + slog.Any("error", err)) + return nil, err + } + + return response.Data.Groups, nil +} + +func (c *Connector) get(ctx context.Context, endpoint string) (*http.Response, error) { + + slog.DebugContext(ctx, "getting alerts", slog.String("url", c.config.URL+endpoint)) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, c.config.URL+endpoint, nil) + if err != nil { + return nil, err + } + + req.Header.Set("Accept", "application/json") + + res, err := c.client.Do(req) + if err != nil { + return nil, err + } + + return res, nil +} +func parseTime(timeField string) time.Time { + t, err := time.Parse("2006-01-02T15:04:05.999-07:00", timeField) + if err != nil { + return time.Time{} + } + return t +}