diff --git a/modules/prometheus/istio.hcl b/modules/prometheus/istio.hcl index c8341db..e77bf30 100644 --- a/modules/prometheus/istio.hcl +++ b/modules/prometheus/istio.hcl @@ -41,8 +41,8 @@ ingester prometheus_istio_workload module { unit = "count" source prometheus "throughput" { - //query = "sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m]))" - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" + //query = "sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m]))" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -54,7 +54,7 @@ ingester prometheus_istio_workload module { unit = "count" source prometheus "status_2xx" { - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='source', source_canonical_service!='unknown', response_code=~'^2.*', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='destination', source_canonical_service!='unknown', response_code=~'^2.*', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -66,7 +66,7 @@ ingester prometheus_istio_workload module { unit = "count" source prometheus "status_3xx" { - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster', response_code=~'^3.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster', response_code=~'^3.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -78,7 +78,7 @@ ingester prometheus_istio_workload module { unit = "count" source prometheus "status_4xx" { - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster', response_code=~'^4.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster', response_code=~'^4.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -90,7 +90,7 @@ ingester prometheus_istio_workload module { unit = "count" source prometheus "status_5xx" { - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster', response_code=~'^5.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_requests_total{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster', response_code=~'^5.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -140,7 +140,7 @@ ingester prometheus_istio_workload module { unit = "bytes" source prometheus "bytes_in" { - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_request_bytes_sum{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_request_bytes_sum{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -152,7 +152,7 @@ ingester prometheus_istio_workload module { unit = "bytes" source prometheus "bytes_out" { - query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_response_bytes_sum{reporter='source', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, destination_canonical_service, destination_workload, destination_workload_namespace, destination_version, pod_name) (increase(istio_response_bytes_sum{reporter='destination', source_canonical_service!='unknown', destination_service_name!='PassthroughCluster'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -199,7 +199,7 @@ ingester prometheus_istio_cluster module { unit = "count" source prometheus "throughput" { - query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='source'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='destination'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -211,7 +211,7 @@ ingester prometheus_istio_cluster module { unit = "count" source prometheus "status_2xx" { - query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='source', response_code=~'^2.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='destination', response_code=~'^2.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -223,7 +223,7 @@ ingester prometheus_istio_cluster module { unit = "count" source prometheus "status_3xx" { - query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='source', response_code=~'^3.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='destination', response_code=~'^3.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -235,7 +235,7 @@ ingester prometheus_istio_cluster module { unit = "count" source prometheus "status_4xx" { - query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='source', response_code=~'^4.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='destination', response_code=~'^4.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -247,7 +247,7 @@ ingester prometheus_istio_cluster module { unit = "count" source prometheus "status_5xx" { - query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='source', response_code=~'^5.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_requests_total{reporter='destination', response_code=~'^5.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -259,7 +259,7 @@ ingester prometheus_istio_cluster module { unit = "bytes" source prometheus "bytes_in" { - query = "label_set(sum by (cluster) (increase(istio_request_bytes_sum{reporter='source'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_request_bytes_sum{reporter='destination'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -271,7 +271,7 @@ ingester prometheus_istio_cluster module { unit = "bytes" source prometheus "bytes_out" { - query = "label_set(sum by (cluster) (increase(istio_response_bytes_sum{reporter='source'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster) (increase(istio_response_bytes_sum{reporter='destination'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -338,7 +338,7 @@ ingester prometheus_istio_k8s_pod module { unit = "count" source prometheus "throughput" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -350,7 +350,7 @@ ingester prometheus_istio_k8s_pod module { unit = "count" source prometheus "status_2xx" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^2.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^2.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -362,7 +362,7 @@ ingester prometheus_istio_k8s_pod module { unit = "count" source prometheus "status_3xx" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^3.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^3.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -374,7 +374,7 @@ ingester prometheus_istio_k8s_pod module { unit = "count" source prometheus "status_4xx" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^4.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^4.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -386,7 +386,7 @@ ingester prometheus_istio_k8s_pod module { unit = "count" source prometheus "status_5xx" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^5.*'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_requests_total{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown', response_code=~'^5.*'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -398,7 +398,7 @@ ingester prometheus_istio_k8s_pod module { unit = "bytes" source prometheus "bytes_in" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_request_bytes_sum{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_request_bytes_sum{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" @@ -410,7 +410,7 @@ ingester prometheus_istio_k8s_pod module { unit = "bytes" source prometheus "bytes_out" { - query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_response_bytes_sum{reporter='source', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown'}[1m])), 'cluster', '$input{cluster}')" + query = "label_set(sum by (cluster, pod_name, destination_canonical_service, destination_workload_namespace) (increase(istio_response_bytes_sum{reporter='destination', destination_service_name!='PassthroughCluster', source_canonical_service!='unknown'}[1m])), 'cluster', '$input{cluster}')" join_on = { "$output{cluster}" = "$input{cluster}" diff --git a/modules/prometheus/k8s_via_kube_state.hcl b/modules/prometheus/k8s_via_kube_state.hcl index 5b9a81c..d54f201 100644 --- a/modules/prometheus/k8s_via_kube_state.hcl +++ b/modules/prometheus/k8s_via_kube_state.hcl @@ -121,7 +121,6 @@ ingester prometheus_kube_cluster module { } } - ingester prometheus_kube_cluster_with_namespace module { frequency = 600 lookback = 600 @@ -156,55 +155,44 @@ ingester prometheus_kube_cluster_with_namespace module { "default" : "$input{using}" } - gauge "total_memory_requested" { - unit = "bytes" - - source prometheus "total_memory_requested" { - query = "label_set(sum by (cluster, namespace)(kube_pod_container_resource_requests{resource='memory', unit='byte'}), 'cluster', '$input{cluster}')" - join_on = { - "$output{cluster}" = "$input{cluster}" - } - } - } - - gauge "total_cpu_requested" { + gauge "unscheduled_pods" { unit = "count" - source prometheus "total_cpu_requested" { - query = "label_set(sum by (cluster, namespace) (kube_pod_container_resource_requests{resource='cpu', unit='core'}), 'cluster', '$input{cluster}')" + source prometheus "unscheduled_pods" { + query = "label_set(sum by (cluster, namespace) (increase(kube_pod_status_unschedulable{}[1m])))" join_on = { "$output{cluster}" = "$input{cluster}" } } } - gauge "total_unscheduled_pods" { + gauge "desired_pods" { unit = "count" - source prometheus "total_unscheduled_pods" { - query = "label_set(sum by (cluster, namespace) (kube_pod_status_unschedulable{}), 'cluster', '$input{cluster}')" + source prometheus "desired_pods" { + query = "label_set(sum by (cluster, namespace) (increase(kube_pod_status_phase{}[1m])))" join_on = { "$output{cluster}" = "$input{cluster}" } } } - gauge "total_failed_and_unknown_pods" { + gauge "failed_and_unknown_pods" { unit = "count" - source prometheus "total_failed_and_unknown_pods" { - query = "label_set(sum by (cluster, namespace) (kube_pod_status_phase{phase=~'Failed|Unknown'}), 'cluster', '$input{cluster}')" + source prometheus "failed_and_unknown_pods" { + query = "label_set(sum by (cluster, namespace) (kube_pod_status_phase{phase=~'Failed|Unknown'}))" join_on = { "$output{cluster}" = "$input{cluster}" } } } - gauge "total_container_restarts" { + gauge "container_restarts" { unit = "count" - source prometheus "total_container_restarts" { - query = "label_set(sum by (cluster, namespace) (kube_pod_container_status_restarts_total{}), 'cluster', '$input{cluster}')" + source prometheus "container_restarts" { + query = "label_set(sum by (cluster, namespace) (kube_pod_container_status_restarts_total{}))" join_on = { "$output{cluster}" = "$input{cluster}" } @@ -212,7 +200,6 @@ ingester prometheus_kube_cluster_with_namespace module { } } - ingester prometheus_kube_node module { frequency = 600 lookback = 600 @@ -252,66 +239,58 @@ ingester prometheus_kube_node module { "default" : "$input{using}" } - gauge "total_cpu_for_scheduling" { - unit = "count" - - source prometheus "total_cpu_for_scheduling" { - query = "label_set(sum by (cluster, node) (kube_node_status_allocatable{resource='cpu', unit='core'}) - sum by (cluster, node) (kube_pod_container_resource_limits{resource='cpu', unit='core'}), 'cluster', '$input{cluster}')" - join_on = { - "$output{cluster}" = "$input{cluster}" - } - } - } - - gauge "total_memory_for_scheduling" { - unit = "bytes" - - source prometheus "total_memory_for_scheduling" { - query = "label_set(sum by (cluster, node) (kube_node_status_allocatable{resource='memory', unit='byte'}) - sum by (cluster, node) (kube_pod_container_resource_limits{resource='memory', unit='byte'}), 'cluster', '$input{cluster}')" - join_on = { - "$output{cluster}" = "$input{cluster}" - } - } - } - - gauge "out_of_pods" { + gauge "disk_pressure" { unit = "count" - source prometheus "out_of_pods" { - query = "label_set(sum by (cluster, node) (kube_node_spec_unschedulable{}), 'cluster', '$input{cluster}')" + source prometheus "disk_pressure" { + query = <