From 04afbc135d3392ddab2c6fab530b44d1669f3d4a Mon Sep 17 00:00:00 2001 From: Aline Abler Date: Wed, 30 Oct 2024 15:56:48 +0100 Subject: [PATCH 1/3] Make clusterlogforwarding filters mergeable --- component/config_forwarding.libsonnet | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/component/config_forwarding.libsonnet b/component/config_forwarding.libsonnet index dcaa4aa..85e3c66 100644 --- a/component/config_forwarding.libsonnet +++ b/component/config_forwarding.libsonnet @@ -215,6 +215,10 @@ local unfoldSpecs(specs) = { { name: name } + specs.pipelines[name] for name in std.objectFields(specs.pipelines) ], + [if std.objectHas(specs, 'filters') && std.isObject(specs.filters) && std.length(specs.filters) > 0 then 'filters']: [ + { name: name } + specs.filters[name] + for name in std.objectFields(specs.filters) + ], } + { // Import remaining specs as is. [key]: specs[key] From e66db6b9accc416d1c0052fbea040cc55f30cec0 Mon Sep 17 00:00:00 2001 From: Aline Abler Date: Wed, 30 Oct 2024 15:57:22 +0100 Subject: [PATCH 2/3] Update from template Template version: main (0aba6c3) --- .cruft.json | 4 ++-- .github/workflows/test.yaml | 2 ++ Makefile.vars.mk | 2 +- .../openshift4-logging/apps/openshift4-logging.yaml | 0 tests/log-forwarding.yml | 3 +++ 5 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 tests/golden/log-forwarding/openshift4-logging/apps/openshift4-logging.yaml create mode 100644 tests/log-forwarding.yml diff --git a/.cruft.json b/.cruft.json index 12d154e..5102268 100644 --- a/.cruft.json +++ b/.cruft.json @@ -1,13 +1,13 @@ { "template": "https://github.com/projectsyn/commodore-component-template.git", - "commit": "8840f87d25d97ce0d4bfed75d40173caaf4100fc", + "commit": "0aba6c3dc0f4bcbe81aab1cdcc977f32b00f1cb2", "checkout": "main", "context": { "cookiecutter": { "name": "OpenShift4 Logging", "slug": "openshift4-logging", "parameter_key": "openshift4_logging", - "test_cases": "defaults master elasticsearch multilineerr forwardingonly legacy", + "test_cases": "defaults master elasticsearch multilineerr forwardingonly legacy log-forwarding", "add_lib": "n", "add_pp": "n", "add_golden": "y", diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 647a541..5a3656d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -39,6 +39,7 @@ jobs: - multilineerr - forwardingonly - legacy + - log-forwarding defaults: run: working-directory: ${{ env.COMPONENT_NAME }} @@ -59,6 +60,7 @@ jobs: - multilineerr - forwardingonly - legacy + - log-forwarding defaults: run: working-directory: ${{ env.COMPONENT_NAME }} diff --git a/Makefile.vars.mk b/Makefile.vars.mk index 538e4d0..2cac468 100644 --- a/Makefile.vars.mk +++ b/Makefile.vars.mk @@ -57,4 +57,4 @@ KUBENT_IMAGE ?= ghcr.io/doitintl/kube-no-trouble:latest KUBENT_DOCKER ?= $(DOCKER_CMD) $(DOCKER_ARGS) $(root_volume) --entrypoint=/app/kubent $(KUBENT_IMAGE) instance ?= defaults -test_instances = tests/defaults.yml tests/master.yml tests/elasticsearch.yml tests/multilineerr.yml tests/forwardingonly.yml tests/legacy.yml +test_instances = tests/defaults.yml tests/master.yml tests/elasticsearch.yml tests/multilineerr.yml tests/forwardingonly.yml tests/legacy.yml tests/log-forwarding.yml diff --git a/tests/golden/log-forwarding/openshift4-logging/apps/openshift4-logging.yaml b/tests/golden/log-forwarding/openshift4-logging/apps/openshift4-logging.yaml new file mode 100644 index 0000000..e69de29 diff --git a/tests/log-forwarding.yml b/tests/log-forwarding.yml new file mode 100644 index 0000000..a4da5b7 --- /dev/null +++ b/tests/log-forwarding.yml @@ -0,0 +1,3 @@ +# Overwrite parameters here + +# parameters: {...} From 3f6d14f7c93cea5355899217fd1d30b047219dc9 Mon Sep 17 00:00:00 2001 From: Aline Abler Date: Wed, 30 Oct 2024 16:31:45 +0100 Subject: [PATCH 3/3] Update test cases --- component/config_forwarding.libsonnet | 48 ++-- .../31_cluster_logforwarding.yaml | 7 + .../openshift4_console_params.yaml | 3 + .../openshift4-logging/00_namespace.yaml | 9 + .../openshift4-logging/10_operator_group.yaml | 11 + .../openshift4-logging/20_subscriptions.yaml | 43 ++++ .../30_cluster_logging.yaml | 17 ++ .../31_cluster_logforwarding.yaml | 46 ++++ .../50_loki_ingester_fix.yaml | 153 ++++++++++++ .../50_loki_logreader_fix.yaml | 17 ++ .../openshift4-logging/50_loki_logstore.yaml | 14 ++ .../openshift4-logging/50_loki_netpol.yaml | 54 +++++ .../50_loki_operator_metrics_token.yaml | 11 + .../openshift4-logging/50_loki_rbac.yaml | 18 ++ .../openshift4-logging/50_loki_stack.yaml | 60 +++++ .../60_collector_alerts.yaml | 127 ++++++++++ .../60_lokistack_alerts.yaml | 225 ++++++++++++++++++ tests/legacy.yml | 7 + tests/log-forwarding.yml | 54 ++++- 19 files changed, 899 insertions(+), 25 deletions(-) create mode 100644 tests/golden/log-forwarding/openshift4-logging/console-patching/openshift4_console_params.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/00_namespace.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/10_operator_group.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/20_subscriptions.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/30_cluster_logging.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logreader_fix.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logstore.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_netpol.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_rbac.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_stack.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_collector_alerts.yaml create mode 100644 tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml diff --git a/component/config_forwarding.libsonnet b/component/config_forwarding.libsonnet index 85e3c66..cb3a5f9 100644 --- a/component/config_forwarding.libsonnet +++ b/component/config_forwarding.libsonnet @@ -202,29 +202,31 @@ local clusterLogForwarderSpec = std.foldl( // Unfold objects into array for ClusterLogForwarder resource. local unfoldSpecs(specs) = { - // Unfold objects into array. - [if std.length(specs.inputs) > 0 then 'inputs']: [ - { name: name } + specs.inputs[name] - for name in std.objectFields(specs.inputs) - ], - [if std.length(specs.outputs) > 0 then 'outputs']: [ - { name: name } + specs.outputs[name] - for name in std.objectFields(specs.outputs) - ], - [if std.length(specs.pipelines) > 0 then 'pipelines']: [ - { name: name } + specs.pipelines[name] - for name in std.objectFields(specs.pipelines) - ], - [if std.objectHas(specs, 'filters') && std.isObject(specs.filters) && std.length(specs.filters) > 0 then 'filters']: [ - { name: name } + specs.filters[name] - for name in std.objectFields(specs.filters) - ], -} + { - // Import remaining specs as is. - [key]: specs[key] - for key in std.objectFields(specs) - if !std.member([ 'inputs', 'outputs', 'pipelines' ], key) -}; + // Unfold objects into array. + [if std.length(specs.inputs) > 0 then 'inputs']: [ + { name: name } + specs.inputs[name] + for name in std.objectFields(specs.inputs) + ], + [if std.length(specs.outputs) > 0 then 'outputs']: [ + { name: name } + specs.outputs[name] + for name in std.objectFields(specs.outputs) + ], + [if std.length(specs.pipelines) > 0 then 'pipelines']: [ + { name: name } + specs.pipelines[name] + for name in std.objectFields(specs.pipelines) + ], + [if std.objectHas(specs, 'filters') && std.length(specs.filters) > 0 then 'filters']: ( + if std.isObject(specs.filters) then + [ { name: name } + specs.filters[name] for name in std.objectFields(specs.filters) ] + else specs.filters + ), + } + + { + // Import remaining specs as is. + [key]: specs[key] + for key in std.objectFields(specs) + if !std.member([ 'inputs', 'outputs', 'pipelines', 'filters' ], key) + }; // ClusterLogForwarder: // Create definitive ClusterLogForwarder resource from specs. diff --git a/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml b/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml index b6f6990..7dd9c47 100644 --- a/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml +++ b/tests/golden/legacy/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml @@ -7,6 +7,13 @@ metadata: name: instance namespace: openshift-logging spec: + filters: + - drop: + - test: + - field: .message + matches: .*DEBUG.* + name: legacy-filter + type: drop inputs: - application: namespaces: diff --git a/tests/golden/log-forwarding/openshift4-logging/console-patching/openshift4_console_params.yaml b/tests/golden/log-forwarding/openshift4-logging/console-patching/openshift4_console_params.yaml new file mode 100644 index 0000000..f71555a --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/console-patching/openshift4_console_params.yaml @@ -0,0 +1,3 @@ +config: + plugins: + - logging-view-plugin diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/00_namespace.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/00_namespace.yaml new file mode 100644 index 0000000..1b27cf9 --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/00_namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + annotations: + openshift.io/node-selector: '' + labels: + name: openshift-logging + openshift.io/cluster-monitoring: 'true' + name: openshift-logging diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/10_operator_group.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/10_operator_group.yaml new file mode 100644 index 0000000..ff11675 --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/10_operator_group.yaml @@ -0,0 +1,11 @@ +apiVersion: operators.coreos.com/v1 +kind: OperatorGroup +metadata: + annotations: {} + labels: + name: cluster-logging + name: cluster-logging + namespace: openshift-logging +spec: + targetNamespaces: + - openshift-logging diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/20_subscriptions.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/20_subscriptions.yaml new file mode 100644 index 0000000..1f0b7ad --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/20_subscriptions.yaml @@ -0,0 +1,43 @@ +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + annotations: {} + labels: + name: cluster-logging + name: cluster-logging + namespace: openshift-logging +spec: + channel: stable-5.9 + config: + resources: + limits: + memory: 256Mi + requests: + cpu: 10m + memory: 128Mi + installPlanApproval: Automatic + name: cluster-logging + source: redhat-operators + sourceNamespace: openshift-operators-redhat +--- +apiVersion: operators.coreos.com/v1alpha1 +kind: Subscription +metadata: + annotations: {} + labels: + name: loki-operator + name: loki-operator + namespace: openshift-operators-redhat +spec: + channel: stable-5.9 + config: + resources: + limits: + memory: 512Mi + requests: + cpu: 50m + memory: 381Mi + installPlanApproval: Automatic + name: loki-operator + source: openshift-operators-redhat + sourceNamespace: openshift-operators-redhat diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/30_cluster_logging.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/30_cluster_logging.yaml new file mode 100644 index 0000000..307f0ca --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/30_cluster_logging.yaml @@ -0,0 +1,17 @@ +apiVersion: logging.openshift.io/v1 +kind: ClusterLogging +metadata: + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + labels: + name: instance + name: instance + namespace: openshift-logging +spec: + collection: + type: vector + logStore: + lokistack: + name: loki + type: lokistack + managementState: Managed diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml new file mode 100644 index 0000000..686f9ad --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/31_cluster_logforwarding.yaml @@ -0,0 +1,46 @@ +apiVersion: logging.openshift.io/v1 +kind: ClusterLogForwarder +metadata: + annotations: {} + labels: + name: instance + name: instance + namespace: openshift-logging +spec: + filters: + - drop: + - test: + - field: .message + notMatches: .*org\.keycloak\.events.* + name: keycloak-filter + type: drop + inputs: + - application: + namespaces: + - vshn-keycloak-prod + name: keycloak-logs + outputs: + - loki: + tenantKey: keycloak + name: appuio-loki-keycloak + type: loki + url: https://my-loki-url.com + pipelines: + - inputRefs: + - application + name: application-logs + outputRefs: + - default + - inputRefs: + - infrastructure + name: infrastructure-logs + outputRefs: + - default + - filterRefs: + - keycloak-filter + inputRefs: + - keycloak-logs + name: keycloak-logs + outputRefs: + - appuio-loki-keycloak + - default diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml new file mode 100644 index 0000000..dcca6fb --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_ingester_fix.yaml @@ -0,0 +1,153 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +rules: + - apiGroups: + - '' + resources: + - pods + - pods/exec + verbs: + - get + - list + - watch + - create + - delete + - patch + - update +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: loki-ingester-check +subjects: + - kind: ServiceAccount + name: loki-ingester-check +--- +apiVersion: v1 +data: + wal-check.sh: | + #!/bin/bash + + set -e -o pipefail + + # Check if pod is in stuck state. + function check_pod() { + POD_NAME="loki-ingester-${1}" + echo "checking POD ${POD_NAME}" + PHASE=$(kubectl -n openshift-logging get po ${POD_NAME} -oyaml | yq '.status.phase') + if [ ${PHASE} != "Running" ]; then + return 0 + fi + READY=$(kubectl -n openshift-logging get po ${POD_NAME} -oyaml | yq '.status.conditions[] | select(.type == "ContainersReady") | .status') + if [ ${READY} == "True" ]; then + return 0 + fi + return 1 + } + + # Check directories of pod and remove non-existing checkpoint if present. + function check_dir() { + shopt -s extglob + POD_NAME="loki-ingester-${1}" + echo "checking DIR ${POD_NAME}" + DIR_CHP=$(kubectl -n openshift-logging exec -i ${POD_NAME} -- ls /tmp/wal | grep -o "^checkpoint\.[0-9]*$") + PATTERN=$(echo ${DIR_CHP} | sed 's/[^0-9]*//g') + DIR_WAL=$(kubectl -n openshift-logging exec -i ${POD_NAME} -- ls /tmp/wal | grep -o "^0*${PATTERN}$" || exit 0) + if [ -z $DIR_WAL ]; then + kubectl -n openshift-logging exec -i ${POD_NAME} -- rm -rf /tmp/wal/${DIR_CHP} + kubectl -n openshift-logging delete po ${POD_NAME} + fi + } + + # Check if pods are in stuck state for longer than ${SLEEP_TIME}. + # Only fix 1 pod at a time and immediatly exit if it is fixed. + function fix_pod() { + if ! check_pod $1; then + echo "stuck POD, waiting ${SLEEP_TIME}" + sleep ${SLEEP_TIME} + if ! check_pod $1; then + check_dir $1 + exit 0 + fi + fi + } + + fix_pod 0 + fix_pod 1 + + exit 0 +kind: ConfigMap +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + annotations: {} + labels: + name: loki-ingester-check + name: loki-ingester-check + namespace: openshift-logging +spec: + concurrencyPolicy: Forbid + failedJobsHistoryLimit: 0 + jobTemplate: + spec: + activeDeadlineSeconds: 360 + backoffLimit: 1 + template: + spec: + containers: + - command: + - /usr/local/bin/wal-check.sh + env: + - name: SLEEP_TIME + value: 2m + image: quay.io/appuio/oc:v4.14 + imagePullPolicy: IfNotPresent + name: check-pod + ports: [] + stdin: false + tty: false + volumeMounts: + - mountPath: /usr/local/bin/wal-check.sh + name: wal-check + readOnly: true + subPath: wal-check.sh + nodeSelector: + node-role.kubernetes.io/infra: '' + restartPolicy: Never + serviceAccountName: loki-ingester-check + volumes: + - configMap: + defaultMode: 364 + name: loki-ingester-check + name: wal-check + schedule: '*/10 * * * *' diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logreader_fix.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logreader_fix.yaml new file mode 100644 index 0000000..5e7989d --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logreader_fix.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + name: logging-application-logs-reader-aggregate + rbac.authorization.k8s.io/aggregate-to-admin: 'true' + name: logging-application-logs-reader-aggregate +rules: + - apiGroups: + - loki.grafana.com + resourceNames: + - logs + resources: + - application + verbs: + - get diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logstore.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logstore.yaml new file mode 100644 index 0000000..77d8c18 --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_logstore.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +data: {} +kind: Secret +metadata: + annotations: {} + labels: + name: loki-logstore + name: loki-logstore +stringData: + access_key_id: '' + access_key_secret: '' + bucketnames: c-green-test-1234-logstore + endpoint: '' +type: Opaque diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_netpol.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_netpol.yaml new file mode 100644 index 0000000..f2cd3bb --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_netpol.yaml @@ -0,0 +1,54 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + annotations: {} + labels: + name: allow-console-logging-view-plugin + name: allow-console-logging-view-plugin +spec: + ingress: + - from: + - podSelector: + matchLabels: + app: console + component: ui + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshift-console + ports: + - port: 9443 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/created-by: openshift-logging_instance + app.kubernetes.io/name: logging-view-plugin + policyTypes: + - Ingress +--- +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + annotations: {} + labels: + name: allow-console-logging-lokistack-gateway + name: allow-console-logging-lokistack-gateway +spec: + ingress: + - from: + - podSelector: + matchLabels: + app: console + component: ui + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openshift-console + ports: + - port: 8080 + protocol: TCP + podSelector: + matchLabels: + app.kubernetes.io/component: lokistack-gateway + app.kubernetes.io/instance: loki + app.kubernetes.io/name: lokistack + policyTypes: + - Ingress diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml new file mode 100644 index 0000000..0b86fe6 --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_operator_metrics_token.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Secret +metadata: + annotations: + argocd.argoproj.io/sync-options: Prune=false,Delete=false + kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader + labels: + name: loki-operator-controller-manager-metrics-token + name: loki-operator-controller-manager-metrics-token + namespace: openshift-operators-redhat +type: kubernetes.io/service-account-token diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_rbac.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_rbac.yaml new file mode 100644 index 0000000..d5dde59 --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_rbac.yaml @@ -0,0 +1,18 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + annotations: {} + labels: + name: syn-loki-cluster-reader + rbac.authorization.k8s.io/aggregate-to-cluster-reader: 'true' + name: syn:loki:cluster-reader +rules: + - apiGroups: + - loki.grafana.com + resourceNames: + - logs + resources: + - application + - infrastructure + verbs: + - get diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_stack.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_stack.yaml new file mode 100644 index 0000000..259068c --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/50_loki_stack.yaml @@ -0,0 +1,60 @@ +apiVersion: loki.grafana.com/v1 +kind: LokiStack +metadata: + annotations: + argocd.argoproj.io/sync-options: SkipDryRunOnMissingResource=true + labels: + name: loki + name: loki +spec: + limits: + global: + ingestion: + ingestionBurstSize: 9 + ingestionRate: 5 + size: 1x.demo + storage: + schemas: + - effectiveDate: '2022-06-01' + version: v12 + - effectiveDate: '2024-09-01' + version: v13 + secret: + name: loki-logstore + type: s3 + storageClassName: '' + template: + compactor: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 1 + distributor: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + gateway: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + indexGateway: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + ingester: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + querier: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + queryFrontend: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 2 + ruler: + nodeSelector: + node-role.kubernetes.io/infra: '' + replicas: 1 + tenants: + mode: openshift-logging diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_collector_alerts.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_collector_alerts.yaml new file mode 100644 index 0000000..268663f --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_collector_alerts.yaml @@ -0,0 +1,127 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: syn-collector-rules + name: syn-collector-rules + namespace: openshift-logging +spec: + groups: + - name: logging_collector.alerts + rules: + - alert: SYN_CollectorNodeDown + annotations: + message: Prometheus could not scrape {{ $labels.namespace }}/{{ $labels.pod + }} collector component for more than 10m. + summary: Collector cannot be scraped + expr: | + up{app_kubernetes_io_component = "collector", app_kubernetes_io_part_of = "cluster-logging"} == 0 + for: 10m + labels: + service: collector + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_CollectorHighErrorRate + annotations: + message: '{{ $value }}% of records have resulted in an error by {{ $labels.namespace + }}/{{ $labels.pod }} collector component.' + summary: '{{ $labels.namespace }}/{{ $labels.pod }} collector component + errors are high' + expr: | + 100 * ( + collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + / + collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + ) > 0.001 + for: 15m + labels: + service: collector + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_CollectorVeryHighErrorRate + annotations: + message: '{{ $value }}% of records have resulted in an error by {{ $labels.namespace + }}/{{ $labels.pod }} collector component.' + summary: '{{ $labels.namespace }}/{{ $labels.pod }} collector component + errors are very high' + expr: | + 100 * ( + collector:log_num_errors:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + / + collector:received_events:sum_rate{app_kubernetes_io_part_of = "cluster-logging"} + ) > 0.05 + for: 15m + labels: + service: collector + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_ElasticsearchDeprecation + annotations: + message: The OpenShift Elasticsearch Operator is deprecated and is planned + to be removed in a future release. Red Hat provides bug fixes and support + for this feature during the current release lifecycle, but this feature + no longer receives enhancements. As an alternative to using the OpenShift + Elasticsearch Operator to manage the default log storage, you can use + the Loki Operator. + summary: Detected Elasticsearch as the in-cluster storage which is deprecated + and will be removed in a future release. + expr: | + sum(kube_pod_labels{namespace="openshift-logging",label_component='elasticsearch'}) > 0 + for: 5m + labels: + namespace: openshift-logging + service: storage + severity: Warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_FluentdDeprecation + annotations: + message: Fluentd is deprecated and is planned to be removed in a future + release. Red Hat provides bug fixes and support for this feature during + the current release lifecycle, but this feature no longer receives enhancements. + As an alternative to Fluentd, you can use Vector instead. + summary: Detected Fluentd as the collector which is deprecated and will + be removed in a future release. + expr: | + sum(kube_pod_labels{namespace="openshift-logging", label_implementation='fluentd', label_app_kubernetes_io_managed_by="cluster-logging-operator"}) > 0 + for: 5m + labels: + namespace: openshift-logging + service: collector + severity: Warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_KibanaDeprecation + annotations: + message: The Kibana web console is now deprecated and is planned to be + removed in a future logging release. + summary: Detected Kibana as the visualization which is deprecated and + will be removed in a future release. + expr: | + sum(kube_pod_labels{namespace="openshift-logging",label_component='kibana'}) > 0 + for: 5m + labels: + namespace: openshift-logging + service: visualization + severity: Warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_DiskBufferUsage + annotations: + message: 'Collectors potentially consuming too much node disk, {{ $value + }}% ' + summary: Detected consuming too much node disk on $labels.hostname host + expr: "(label_replace(sum by(hostname) (vector_buffer_byte_size{component_kind='sink',\ + \ buffer_type='disk'}), 'instance', '$1', 'hostname', '(.*)') \n/ on(instance)\ + \ group_left() sum by(instance) (node_filesystem_size_bytes{mountpoint='/var'}))\ + \ * 100 > 15\n" + for: 5m + labels: + service: collector + severity: Warning + syn: 'true' + syn_component: openshift4-logging diff --git a/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml new file mode 100644 index 0000000..65a573e --- /dev/null +++ b/tests/golden/log-forwarding/openshift4-logging/openshift4-logging/60_lokistack_alerts.yaml @@ -0,0 +1,225 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + annotations: {} + labels: + name: syn-loki-logging-rules + name: syn-loki-logging-rules + namespace: openshift-logging +spec: + groups: + - name: logging_loki.alerts + rules: + - alert: SYN_LokiRequestErrors + annotations: + message: '{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf + "%.2f" $value }}% errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Errors + summary: At least 10% of requests are responded by 5xx server errors. + expr: | + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m{status_code=~"5.."} + ) by (job, namespace, route) + / + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m + ) by (job, namespace, route) + * 100 + > 10 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStackWriteRequestErrors + annotations: + message: '{{ printf "%.2f" $value }}% of write requests from {{ $labels.job + }} in {{ $labels.namespace }} are returned with server errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#LokiStack-Write-Request-Errors + summary: At least 10% of write requests to the lokistack-gateway are responded + with 5xx server errors. + expr: | + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{code=~"5..", handler="push"} + ) by (job, namespace) + / + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{handler="push"} + ) by (job, namespace) + * 100 + > 10 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStackReadRequestErrors + annotations: + message: '{{ printf "%.2f" $value }}% of query requests from {{ $labels.job + }} in {{ $labels.namespace }} are returned with server errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#LokiStack-Read-Request-Errors + summary: At least 10% of query requests to the lokistack-gateway are responded + with 5xx server errors. + expr: | + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{code=~"5..", handler=~"query|query_range|label|labels|label_values"} + ) by (job, namespace) + / + sum( + code_handler_job_namespace:lokistack_gateway_http_requests:irate1m{handler=~"query|query_range|label|labels|label_values"} + ) by (job, namespace) + * 100 + > 10 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiRequestPanics + annotations: + message: '{{ $labels.job }} is experiencing an increase of {{ $value }} + panics.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Panics + summary: A panic was triggered. + expr: | + sum( + increase( + loki_panic_total[10m] + ) + ) by (job, namespace) + > 0 + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiRequestLatency + annotations: + message: '{{ $labels.job }} {{ $labels.route }} is experiencing {{ printf + "%.2f" $value }}s 99th percentile latency.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Request-Latency + summary: The 99th percentile is experiencing high latency (higher than + 1 second). + expr: | + histogram_quantile(0.99, + sum( + irate( + loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[1m] + ) + ) by (job, le, namespace, route) + ) + > 1 + for: 15m + labels: + severity: critical + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiTenantRateLimit + annotations: + message: '{{ $labels.job }} {{ $labels.route }} is experiencing 429 errors.' + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Tenant-Rate-Limit + summary: At least 10% of requests are responded with the rate limit error + code. + expr: | + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m{status_code="429"} + ) by (job, namespace, route) + / + sum( + job_namespace_route_statuscode:loki_request_duration_seconds_count:irate1m + ) by (job, namespace, route) + * 100 + > 10 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStorageSlowWrite + annotations: + message: The storage path is experiencing slow write response rates. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Storage-Slow-Write + summary: The storage path is experiencing slow write response rates. + expr: | + histogram_quantile(0.99, + sum( + job_le_namespace_operation:loki_boltdb_shipper_request_duration_seconds_bucket:rate5m{operation="WRITE"} + ) by (job, le, namespace) + ) + > 1 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiStorageSlowRead + annotations: + message: The storage path is experiencing slow read response rates. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Storage-Slow-Read + summary: The storage path is experiencing slow read response rates. + expr: | + histogram_quantile(0.99, + sum( + job_le_namespace_operation:loki_boltdb_shipper_request_duration_seconds_bucket:rate5m{operation="Shipper.Query"} + ) by (job, le, namespace) + ) + > 5 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiWritePathHighLoad + annotations: + message: The write path is experiencing high load. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Write-Path-High-Load + summary: The write path is experiencing high load, causing backpressure + storage flushing. + expr: | + sum( + loki_ingester_wal_replay_flushing + ) by (job, namespace) + > 0 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokiReadPathHighLoad + annotations: + message: The read path is experiencing high load. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Loki-Read-Path-High-Load + summary: The read path has high volume of queries, causing longer response + times. + expr: | + histogram_quantile(0.99, + sum( + rate( + loki_logql_querystats_latency_seconds_bucket[5m] + ) + ) by (job, le, namespace) + ) + > 30 + for: 15m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging + - alert: SYN_LokistackSchemaUpgradesRequired + annotations: + message: |- + The LokiStack "{{ $labels.stack_name }}" in namespace "{{ $labels.stack_namespace }}" is using a storage schema + configuration that does not contain the latest schema version. It is recommended to update the schema + configuration to update the schema version to the latest version in the future. + runbook_url: https://github.com/grafana/loki/blob/main/operator/docs/lokistack/sop.md#Lokistack-Schema-Upgrades-Required + summary: One or more of the deployed LokiStacks contains an outdated storage + schema configuration. + expr: | + sum ( + lokistack_status_condition{reason="StorageNeedsSchemaUpdate",status="true"} + ) by (stack_namespace, stack_name) + > 0 + for: 1m + labels: + severity: warning + syn: 'true' + syn_component: openshift4-logging diff --git a/tests/legacy.yml b/tests/legacy.yml index 7d5565b..5f3291b 100644 --- a/tests/legacy.yml +++ b/tests/legacy.yml @@ -51,6 +51,13 @@ parameters: detectMultilineErrors: true clusterLogForwarder: + filters: + - name: legacy-filter + type: drop + drop: + - test: + - field: .message + matches: '.*DEBUG.*' pipelines: application-logs: outputRefs: diff --git a/tests/log-forwarding.yml b/tests/log-forwarding.yml index a4da5b7..4fc8f59 100644 --- a/tests/log-forwarding.yml +++ b/tests/log-forwarding.yml @@ -1,3 +1,53 @@ -# Overwrite parameters here +applications: + - openshift4-operators as openshift-operators-redhat + - openshift4-monitoring -# parameters: {...} +parameters: + kapitan: + dependencies: + - type: https + source: https://raw.githubusercontent.com/appuio/component-openshift4-operators/v1.0.2/lib/openshift4-operators.libsonnet + output_path: vendor/lib/openshift4-operators.libsonnet + - type: https + source: https://raw.githubusercontent.com/appuio/component-openshift4-monitoring/v2.9.0/lib/openshift4-monitoring-alert-patching.libsonnet + output_path: vendor/lib/alert-patching.libsonnet + compile: + - input_type: jsonnet + input_paths: + - tests/console-patch.jsonnet + output_path: console-patching/ + + openshift4_operators: + defaultInstallPlanApproval: Automatic + defaultSource: openshift-operators-redhat + defaultSourceNamespace: openshift-operators-redhat + + openshift4_logging: + clusterLogForwarder: + inputs: + keycloak-logs: + application: + namespaces: + - vshn-keycloak-prod + outputs: + appuio-loki-keycloak: + type: loki + url: https://my-loki-url.com + loki: + tenantKey: "keycloak" + filters: + keycloak-filter: + type: drop + drop: + - test: + - field: .message + notMatches: '.*org\.keycloak\.events.*' + pipelines: + keycloak-logs: + inputRefs: + - keycloak-logs + filterRefs: + - keycloak-filter + outputRefs: + - appuio-loki-keycloak + - default