Skip to content

Commit

Permalink
Set Single Controller Mode as Default (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
jmcgrath207 authored Oct 23, 2023
1 parent 7922d24 commit 50d1fde
Show file tree
Hide file tree
Showing 12 changed files with 122 additions and 39 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
name: ci

on:
push:
branches:
- master
pull_request:
types:
- closed
- ready_for_review


jobs:
Expand Down
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ issue [Here](https://github.com/kubernetes/kubernetes/issues/69507)

```bash
helm repo add k8s-ephemeral-storage-metrics https://jmcgrath207.github.io/k8s-ephemeral-storage-metrics/chart
helm repo update
helm upgrade --install my-deployment k8s-ephemeral-storage-metrics/k8s-ephemeral-storage-metrics
```

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| deploy_type | string | `"DaemonSet"` | |
| deploy_type | string | `"Deployment"` | Set as Deployment for single controller to query all nodes or Daemonset |
| dev.enabled | bool | `false` | |
| extra.adjusted_polling_rate | bool | `false` | Create the ephemeral_storage_adjusted_polling_rate metrics to report Adjusted Poll Rate in milliseconds. Typically used for testing. |
| image.imagePullPolicy | string | `"IfNotPresent"` | |
Expand Down
4 changes: 2 additions & 2 deletions chart/Chart.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
apiVersion: v2
name: k8s-ephemeral-storage-metrics
version: 1.0.2
appVersion: 1.0.2
version: 1.1.0
appVersion: 1.1.0
kubeVersion: ">=1.21.0-0"
description: Ephemeral storage metrics for prometheus operator.
home: https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics
Expand Down
3 changes: 2 additions & 1 deletion chart/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

```bash
helm repo add k8s-ephemeral-storage-metrics https://jmcgrath207.github.io/k8s-ephemeral-storage-metrics/chart
helm repo update
helm upgrade --install my-deployment k8s-ephemeral-storage-metrics/k8s-ephemeral-storage-metrics
```

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| deploy_type | string | `"DaemonSet"` | |
| deploy_type | string | `"Deployment"` | Set as Deployment for single controller to query all nodes or Daemonset |
| dev.enabled | bool | `false` | |
| extra.adjusted_polling_rate | bool | `false` | Create the ephemeral_storage_adjusted_polling_rate metrics to report Adjusted Poll Rate in milliseconds. Typically used for testing. |
| image.imagePullPolicy | string | `"IfNotPresent"` | |
Expand Down
1 change: 1 addition & 0 deletions chart/README.md.gotmpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

```bash
helm repo add k8s-ephemeral-storage-metrics https://jmcgrath207.github.io/k8s-ephemeral-storage-metrics/chart
helm repo update
helm upgrade --install my-deployment k8s-ephemeral-storage-metrics/k8s-ephemeral-storage-metrics
```

Expand Down
24 changes: 23 additions & 1 deletion chart/index.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
apiVersion: v1
entries:
k8s-ephemeral-storage-metrics:
- annotations:
artifacthub.io/license: MIT
artifacthub.io/links: |
- name: Documentation
url: https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics
artifacthub.io/prerelease: "false"
apiVersion: v2
appVersion: 1.1.0
created: "2023-10-22T19:40:25.810173641-05:00"
description: Ephemeral storage metrics for prometheus operator.
digest: fe6be3c20af159cb4e7adb25fe96fe578d07fa49a6f8af5097c334571f644ec4
home: https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics
keywords:
- kubernetes
- metrics
kubeVersion: '>=1.21.0-0'
name: k8s-ephemeral-storage-metrics
sources:
- https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics
urls:
- https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics/releases/download/1.1.0/k8s-ephemeral-storage-metrics-1.1.0.tgz
version: 1.1.0
- annotations:
artifacthub.io/license: MIT
artifacthub.io/links: |
Expand Down Expand Up @@ -67,4 +89,4 @@ entries:
urls:
- https://github.com/jmcgrath207/k8s-ephemeral-storage-metrics/releases/download/1.0.0/k8s-ephemeral-storage-metrics-1.0.0.tgz
version: 1.0.0
generated: "2023-10-20T13:24:12.479459598-05:00"
generated: "2023-10-22T19:40:25.809884043-05:00"
4 changes: 4 additions & 0 deletions chart/templates/DeployType.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ spec:
successThreshold: 1
timeoutSeconds: 1
env:
- name: DEPLOY_TYPE
value: "{{ .Values.deploy_type }}"
- name: SCRAPE_INTERVAL
value: "{{ .Values.interval }}"
- name: LOG_LEVEL
Expand All @@ -61,8 +63,10 @@ spec:
- name: ADJUSTED_POLLING_RATE
value: "{{ .Values.extra.adjusted_polling_rate }}"
{{ end }}
{{ if eq .Values.deploy_type "DaemonSet" }}
- name: CURRENT_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
{{ end }}

4 changes: 2 additions & 2 deletions chart/templates/RBAC.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ metadata:
{{- include "chart.labels" . | nindent 4 }}
rules:
- apiGroups: [""]
resources: ["nodes/proxy"]
verbs: ["get"]
resources: ["nodes","nodes/proxy"]
verbs: ["get","list"]

---

Expand Down
5 changes: 3 additions & 2 deletions chart/values.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
image:
repository: ghcr.io/jmcgrath207/k8s-ephemeral-storage-metrics
tag: 1.0.2
tag: 1.1.0
imagePullPolicy: IfNotPresent
log_level: info
deploy_type: DaemonSet
# -- Set as Deployment for single controller to query all nodes or Daemonset
deploy_type: Deployment
# Note in testing, Kube API does not refresh faster than 10 seconds
# -- Polling rate for exporter
interval: 15 # Seconds
Expand Down
103 changes: 76 additions & 27 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
Expand All @@ -18,16 +19,19 @@ import (
"path/filepath"
"runtime"
"strconv"
"sync"
"time"
)

var (
inCluster string
clientset *kubernetes.Clientset
currentNode string
sampleInterval int64
adjustedPollingRate bool
adjustedTimeGauge prometheus.Gauge
deployType string
nodeSlice []string
nodeWaitGroup sync.WaitGroup
)

func getEnv(key, fallback string) string {
Expand Down Expand Up @@ -97,8 +101,34 @@ type ephemeralStorageMetrics struct {
}
}

func getMetrics() {
func getNodes() {
nodeWaitGroup.Add(1)
if deployType != "Deployment" {
nodeSlice = append(nodeSlice, getEnv("CURRENT_NODE_NAME", ""))
nodeWaitGroup.Done()
return
}
for {
nodeSlice = nil
nodes, _ := clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
for _, node := range nodes.Items {
nodeSlice = append(nodeSlice, node.Name)
}
nodeWaitGroup.Done()
time.Sleep(1 * time.Minute)
nodeWaitGroup.Add(1)
}

}

type CollectMetric struct {
usedBytes float64
labels prometheus.Labels
}

func getMetrics() {
nodeWaitGroup.Wait()
var labelsList []CollectMetric
opsQueued := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: "ephemeral_storage_pod_usage",
Help: "Used to expose Ephemeral Storage metrics for pod in bytes ",
Expand All @@ -116,7 +146,6 @@ func getMetrics() {
prometheus.MustRegister(opsQueued)

log.Debug().Msg(fmt.Sprintf("getMetrics has been invoked"))
currentNode = getEnv("CURRENT_NODE_NAME", "")

if adjustedPollingRate {
adjustedTimeGauge = prometheus.NewGauge(prometheus.GaugeOpts{
Expand All @@ -132,37 +161,55 @@ func getMetrics() {
for {
start := time.Now()

content, err := clientset.RESTClient().Get().AbsPath(fmt.Sprintf("/api/v1/nodes/%s/proxy/stats/summary", currentNode)).DoRaw(context.Background())
if err != nil {
log.Error().Msg(fmt.Sprintf("ErrorBadRequst : %s\n", err.Error()))
os.Exit(1)
}
log.Debug().Msg(fmt.Sprintf("Fetched proxy stats from node : %s", currentNode))
var data ephemeralStorageMetrics
_ = json.Unmarshal(content, &data)

opsQueued.Reset() // reset this metrics in the Exporter to flush dead pods

nodeName := data.Node.NodeName
for _, pod := range data.Pods {
podName := pod.PodRef.Name
podNamespace := pod.PodRef.Namespace
usedBytes := pod.EphemeralStorage.UsedBytes
if podNamespace == "" || (usedBytes == 0 && pod.EphemeralStorage.AvailableBytes == 0 && pod.EphemeralStorage.CapacityBytes == 0) {
log.Warn().Msg(fmt.Sprintf("pod %s/%s on %s has no metrics on its ephemeral storage usage", podName, podNamespace, nodeName))
log.Warn().Msg(fmt.Sprintf("raw content %v", content))
for _, node := range nodeSlice {

content, err := clientset.RESTClient().Get().AbsPath(fmt.Sprintf("/api/v1/nodes/%s/proxy/stats/summary", node)).DoRaw(context.Background())
if err != nil {
log.Error().Msg(fmt.Sprintf("ErrorBadRequst : %s\n", err.Error()))
os.Exit(1)
}
opsQueued.With(prometheus.Labels{"pod_namespace": podNamespace,
"pod_name": podName, "node_name": nodeName}).Set(usedBytes)
if adjustedPollingRate {
adjustedTimeGauge.Set(float64(adjustTime))
log.Debug().Msg(fmt.Sprintf("Fetched proxy stats from node : %s", node))
var data ephemeralStorageMetrics
_ = json.Unmarshal(content, &data)

nodeName := data.Node.NodeName
for _, pod := range data.Pods {
podName := pod.PodRef.Name
podNamespace := pod.PodRef.Namespace
usedBytes := pod.EphemeralStorage.UsedBytes
if podNamespace == "" || (usedBytes == 0 && pod.EphemeralStorage.AvailableBytes == 0 && pod.EphemeralStorage.CapacityBytes == 0) {
log.Warn().Msg(fmt.Sprintf("pod %s/%s on %s has no metrics on its ephemeral storage usage", podName, podNamespace, nodeName))
log.Warn().Msg(fmt.Sprintf("raw content %v", content))
}
labelsList = append(labelsList, CollectMetric{
usedBytes,
prometheus.Labels{"pod_namespace": podNamespace,
"pod_name": podName, "node_name": nodeName},
})

log.Debug().Msg(fmt.Sprintf("pod %s/%s on %s with usedBytes: %f", podNamespace, podName, nodeName, usedBytes))
}
}

log.Debug().Msg(fmt.Sprintf("pod %s/%s on %s with usedBytes: %f", podNamespace, podName, nodeName, usedBytes))
// reset this metrics in the Exporter to flush dead pods
opsQueued.Reset()
// Push new metrics to exporter
for _, x := range labelsList {
opsQueued.With(x.labels).Set(x.usedBytes)
}
// Zero out collection list
labelsList = nil

elapsedTime := time.Now().Sub(start).Milliseconds()
adjustTime = sampleInterval - elapsedTime
if adjustTime <= 0.0 {
log.Error().Msgf("Adjusted Poll Rate: %d ms", adjustTime)
log.Error().Msgf("Polling Rate could not keep up. Adjust your Interval to a higher number than %d", sampleInterval)
os.Exit(1)
}
if adjustedPollingRate {
adjustedTimeGauge.Set(float64(adjustTime))
}
log.Debug().Msgf("Adjusted Poll Rate: %d ms", adjustTime)
time.Sleep(time.Duration(adjustTime) * time.Millisecond)
}
Expand Down Expand Up @@ -193,9 +240,11 @@ func main() {
flag.Parse()
setLogger()
getK8sClient()
go getNodes()
go getMetrics()
port := getEnv("METRICS_PORT", "9100")
adjustedPollingRate, _ = strconv.ParseBool(getEnv("ADJUSTED_POLLING_RATE", "false"))
deployType = getEnv("DEPLOY_TYPE", "DaemonSet")
http.Handle("/metrics", promhttp.Handler())
log.Info().Msg(fmt.Sprintf("Starting server listening on :%s", port))
err := http.ListenAndServe(fmt.Sprintf(":%s", port), nil)
Expand Down
2 changes: 1 addition & 1 deletion scripts/create_kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ kubectl get nodes -o wide
# Deploy Service Monitor CRD
kubectl apply --server-side -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.65.1/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml

if [[ ! $ENV =~ "e2e" ]]; then
if ! [[ $ENV =~ "e2e" ]]; then
# Deploy Prometheus
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
helm repo update
Expand Down
3 changes: 2 additions & 1 deletion tests/e2e/deployment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ var _ = ginkgo.Describe("Test Metrics\n", func() {
ginkgo.Specify("\nReturn A Record IP addresses and Proxy IP address", func() {
var checkSlice []string
checkSlice = append(checkSlice, "ephemeral_storage_pod_usage",
"pod_name=\"k8s-ephemeral-storage", "ephemeral_storage_adjusted_polling_rate")
"pod_name=\"k8s-ephemeral-storage", "ephemeral_storage_adjusted_polling_rate",
"node_name=\"ephemeral-metrics-cluster-worker", "node_name=\"ephemeral-metrics-cluster-control-plane")
checkPrometheus(checkSlice)
})
})
Expand Down

0 comments on commit 50d1fde

Please sign in to comment.