Skip to content

Commit

Permalink
testing
Browse files Browse the repository at this point in the history
  • Loading branch information
jmcgrath207 committed Nov 5, 2024
1 parent dda180f commit 04fe405
Show file tree
Hide file tree
Showing 8 changed files with 90 additions and 76 deletions.
5 changes: 1 addition & 4 deletions cmd/app/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,8 @@ func setMetrics(nodeName string) {
start := time.Now()

content, err := Node.Query(nodeName)
// Skip node query if there is an error.
if err != nil {
log.Warn().Msgf("Could not query node %s for ephemeral storage", nodeName)
return
}

Expand Down Expand Up @@ -83,7 +83,6 @@ func setMetrics(nodeName string) {

func getMetrics() {

Node.WaitGroup.Wait()
Pod.WaitGroup.Wait()

p, _ := ants.NewPoolWithFunc(Node.MaxNodeQueryConcurrency, func(node interface{}) {
Expand Down Expand Up @@ -120,8 +119,6 @@ func main() {
if pprofEnabled {
go dev.EnablePprof()
}
go Node.Get()
go Node.Watch()
go getMetrics()
http.Handle("/metrics", promhttp.Handler())
log.Info().Msg(fmt.Sprintf("Starting server listening on :%s", port))
Expand Down
7 changes: 7 additions & 0 deletions pkg/node/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,12 @@ func NewCollector(sampleInterval int64) Node {
WaitGroup: &waitGroup,
}
node.createMetrics()

if node.deployType != "Deployment" {
node.Set.Add(dev.GetEnv("CURRENT_NODE_NAME", ""))
} else {
go node.Watch()
}

return node
}
49 changes: 26 additions & 23 deletions pkg/node/k8s.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ import (
"github.com/jmcgrath207/k8s-ephemeral-storage-metrics/pkg/dev"
"github.com/rs/zerolog/log"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/tools/cache"
)
Expand All @@ -31,24 +30,14 @@ func (n *Node) getKubeletEndpoint(node *v1.Node) string {
return ""
}

func (n *Node) Get() {
n.WaitGroup.Add(1)
if n.deployType != "Deployment" {
n.Set.Add(dev.GetEnv("CURRENT_NODE_NAME", ""))
n.WaitGroup.Done()
return
}

// Init Node slice
startNodes, _ := dev.Clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
for _, node := range startNodes.Items {
n.Set.Add(node.Name)
if n.scrapeFromKubelet {
n.KubeletEndpoint.Store(node.Name, n.getKubeletEndpoint(&node))
func checkKubeletStatus(nodeStatusConditions *[]v1.NodeCondition) bool {
// Ensure the Kubelet service is ready.
for _, nodeCon := range *nodeStatusConditions {
if nodeCon.Reason == "KubeletReady" {
return true
}
}
n.WaitGroup.Done()

return false
}

func (n *Node) Query(node string) ([]byte, error) {
Expand Down Expand Up @@ -95,7 +84,10 @@ func (n *Node) Query(node string) ([]byte, error) {
err := backoff.Retry(operation, bo)

if err != nil {
log.Warn().Msg(fmt.Sprintf("Failed fetched proxy stats from node : %s: %v", node, err))
log.Warn().Msg(fmt.Sprintf("Failed to fetched proxy stats from node: %s Error: %v", node, err))
// Assume the node status is not ready so evict all pods tracked by that node. The Update func in the Node Watcher
// will pick the node back up for monitoring again, once the kubelet status reports back ready.
n.evict(node)
return nil, err
}

Expand All @@ -104,24 +96,35 @@ func (n *Node) Query(node string) ([]byte, error) {
}

func (n *Node) Watch() {
n.WaitGroup.Wait()
stopCh := make(chan struct{})
defer close(stopCh)
// TODO: break out the sampleInterval into Groups. E.g. nodeSampleInterval, podSampleInterval, metricsSampleInterval
sharedInformerFactory := informers.NewSharedInformerFactory(dev.Clientset, time.Duration(n.sampleInterval)*time.Second)
nodeInformer := sharedInformerFactory.Core().V1().Nodes().Informer()

// Define event handlers for Pod events
eventHandler := cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
p := obj.(*v1.Node)
n.Set.Add(p.Name)
if n.scrapeFromKubelet {
n.KubeletEndpoint.Store(p.Name, n.getKubeletEndpoint(p))
if checkKubeletStatus(&p.Status.Conditions) {
n.Set.Add(p.Name)
if n.scrapeFromKubelet {
n.KubeletEndpoint.Store(p.Name, n.getKubeletEndpoint(p))
}
}
},
UpdateFunc: func(oldObj, newObj interface{}) {
p := newObj.(*v1.Node)
// Add nodes back that have changed readiness status.
if checkKubeletStatus(&p.Status.Conditions) {
n.Set.Add(p.Name)
if n.scrapeFromKubelet {
n.KubeletEndpoint.Store(p.Name, n.getKubeletEndpoint(p))
}
}
},
DeleteFunc: func(obj interface{}) {
p := obj.(*v1.Node)
n.Set.Remove(p.Name)
n.evict(p.Name)
if n.scrapeFromKubelet {
n.KubeletEndpoint.Delete(p.Name)
Expand Down
4 changes: 2 additions & 2 deletions pkg/node/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,12 @@ func (n *Node) SetMetrics(nodeName string, availableBytes float64, capacityBytes
}

func (n *Node) evict(node string) {

n.Set.Remove(node)
nodeAvailableGaugeVec.DeletePartialMatch(prometheus.Labels{"node_name": node})
nodeCapacityGaugeVec.DeletePartialMatch(prometheus.Labels{"node_name": node})
nodePercentageGaugeVec.DeletePartialMatch(prometheus.Labels{"node_name": node})
if n.AdjustedPollingRate {
AdjustedPollingRateGaugeVec.DeletePartialMatch(prometheus.Labels{"node_name": node})
}
log.Info().Msgf("Node %s does not exist. Removed from monitoring", node)
log.Info().Msgf("Node %s does not exist or is unresponsive. Removed from monitoring", node)
}
34 changes: 32 additions & 2 deletions scripts/create-minikube.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,42 @@ c=$(docker ps -q) && [[ $c ]] && docker kill $c
docker network prune -f
minikube start \
--kubernetes-version="${K8S_VERSION}" \
--insecure-registry "10.0.0.0/24" \
--insecure-registry "0.0.0.0/0" \
--addons="registry" \
--cpus=2 \
--cni='calico' \
--memory=3900MB \
--driver="${DRIVER}"

minikube addons enable registry
# minikube registry-proxy doesn't work well on other nodes.
# kubectl patch daemonset -n kube-system registry-proxy -p '{"spec":{"template":{"spec":{"nodeSelector":{"kubernetes.io/hostname":"minikube"}}}}}'

# Wait until registry pod come up
while [ "$(kubectl get pods -n kube-system -l actual-registry=true -o=jsonpath='{.items[*].status.phase}')" != "Running" ]; do
echo "Waiting for registry pod to start. Sleep 10" && sleep 10
done

# Wait until registry-proxy pod come up
while [ "$(kubectl get pods -n kube-system -l registry-proxy=true -o=jsonpath='{.items[*].status.phase}')" != "Running" ]; do
echo "Waiting for registry proxy pod to start. Sleep 10" && sleep 10
done

# Use a while loop to repeatedly check the registry endpoint until health
while true; do
# Send a GET request to the endpoint and capture the HTTP status code
status_code=$(curl -s -o /dev/null -w "%{http_code}" "http://$(minikube ip):5000/v2/_catalog")

# Check if the status code is 200
if [ "$status_code" -eq 200 ]; then
echo "Registry endpoint is healthy. Status code: $status_code"
break
else
echo "Registry endpoint is not healthy. Status code: $status_code. Retrying..."
sleep 5 # Wait for 5 seconds before retrying
fi
done



# Deploy Service Monitor and Prometheus Rule CRDs
for crd in monitoring.coreos.com_{servicemonitors,prometheusrules}.yaml; do
Expand Down
49 changes: 4 additions & 45 deletions scripts/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,39 +14,14 @@ function main() {
local common_set_values_arr
local grow_repo_image
local shrink_repo_image
local main_repo_image
local e2e_values_arr
local external_registry
local internal_registry
local status_code
local time_tag

trap 'trap_func' EXIT ERR

# Wait until registry pod come up
while [ "$(kubectl get pods -n kube-system -l actual-registry=true -o=jsonpath='{.items[*].status.phase}')" != "Running" ]; do
echo "Waiting for registry pod to start. Sleep 10" && sleep 10
done

# Wait until registry-proxy pod come up
while [ "$(kubectl get pods -n kube-system -l registry-proxy=true -o=jsonpath='{.items[*].status.phase}')" != "Running" ]; do
echo "Waiting for registry proxy pod to start. Sleep 10" && sleep 10
done

# Use a while loop to repeatedly check the registry endpoint until health
while true; do
# Send a GET request to the endpoint and capture the HTTP status code
status_code=$(curl -s -o /dev/null -w "%{http_code}" "http://$(minikube ip):5000/v2/_catalog")

# Check if the status code is 200
if [ "$status_code" -eq 200 ]; then
echo "Registry endpoint is healthy. Status code: $status_code"
break
else
echo "Registry endpoint is not healthy. Status code: $status_code. Retrying..."
sleep 5 # Wait for 5 seconds before retrying
fi
done

# Need both. External to push and internal for pods to pull from registry in cluster
external_registry="$(minikube ip):5000"
internal_registry="$(kubectl get service -n kube-system registry --template='{{.spec.clusterIP}}')"
Expand All @@ -56,22 +31,11 @@ function main() {
time_tag=$(date +%Y%m%d%H%M%S)

grow_repo_image="k8s-ephemeral-storage-grow-test:${time_tag}"

docker build --build-arg TARGETOS=linux --build-arg TARGETARCH=amd64 -f ../DockerfileTestGrow \
-t "${external_registry}/${grow_repo_image}" -t "${internal_registry}/${grow_repo_image}" ../.

docker save "${external_registry}/${grow_repo_image}" >/tmp/image.tar
"${LOCALBIN}/crane" push --insecure /tmp/image.tar "${external_registry}/${grow_repo_image}"
rm /tmp/image.tar
dev_image_builder "${grow_repo_image}" "DockerfileTestGrow" "${external_registry}" "${internal_registry}"

shrink_repo_image="k8s-ephemeral-storage-shrink-test:${time_tag}"
dev_image_builder "${shrink_repo_image}" "DockerfileTestShrink" "${external_registry}" "${internal_registry}"

docker build --build-arg TARGETOS=linux --build-arg TARGETARCH=amd64 -f ../DockerfileTestShrink \
-t "${external_registry}/${shrink_repo_image}" -t "${internal_registry}/${shrink_repo_image}" ../.

docker save "${external_registry}/${shrink_repo_image}" >/tmp/image.tar
${LOCALBIN}/crane push --insecure /tmp/image.tar "${external_registry}/${shrink_repo_image}"
rm /tmp/image.tar

if [[ $ENV == "debug" ]]; then
image_tag="debug-${time_tag}"
Expand All @@ -83,12 +47,7 @@ function main() {

# Main image
main_repo_image="${DEPLOYMENT_NAME}:${image_tag}"
docker build --build-arg TARGETOS=linux --build-arg TARGETARCH=amd64 -f ../${dockerfile} \
-t "${external_registry}/${main_repo_image}" -t "${internal_registry}/${main_repo_image}" ../.

docker save "${external_registry}/${main_repo_image}" >/tmp/image.tar
${LOCALBIN}/crane push --insecure /tmp/image.tar "${external_registry}/${main_repo_image}"
rm /tmp/image.tar
dev_image_builder "${main_repo_image}" "${dockerfile}" "${external_registry}" "${internal_registry}"

### Install Chart ###

Expand Down
16 changes: 16 additions & 0 deletions scripts/helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,19 @@ function wait_pods() {
echo ""
fi
}

function dev_image_builder() {
# Build image with docker and upload with crane.

local image_name=$1
local dockerfile=$2
local external_registry=$3
local internal_registry=$4

docker build --build-arg TARGETOS=linux --build-arg TARGETARCH=amd64 -f "../${dockerfile}" \
-t "${external_registry}/${image_name}" -t "${internal_registry}/${image_name}" ../.

docker save "${external_registry}/${image_name}" >/tmp/image.tar
"${LOCALBIN}/crane" push --insecure /tmp/image.tar "${external_registry}/${image_name}"
rm /tmp/image.tar
}
2 changes: 2 additions & 0 deletions steps.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


0 comments on commit 04fe405

Please sign in to comment.