From d57c4436e868106e3d7fc7e46f84e4e31af2c46e Mon Sep 17 00:00:00 2001 From: Shehbaj Dhillon Date: Wed, 30 Oct 2024 17:49:45 -0700 Subject: [PATCH] utils prometheusmetrics: convert gauges to counters (#3093) --- go.mod | 1 + pkg/awsutils/awsutils_test.go | 30 +++++ pkg/ipamd/datastore/data_store_test.go | 73 +++++++++++ pkg/ipamd/ipamd_test.go | 128 +++++++++++++++---- pkg/ipamd/rpc_handler_test.go | 13 ++ utils/prometheusmetrics/prometheusmetrics.go | 20 +-- 6 files changed, 230 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index 0201b99b3f..9b4d4f648f 100644 --- a/go.mod +++ b/go.mod @@ -104,6 +104,7 @@ require ( github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.17.9 // indirect + github.com/kylelemons/godebug v1.1.0 // indirect github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 // indirect github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 // indirect github.com/lib/pq v1.10.9 // indirect diff --git a/pkg/awsutils/awsutils_test.go b/pkg/awsutils/awsutils_test.go index e923a82bcc..65bf4ee7d1 100644 --- a/pkg/awsutils/awsutils_test.go +++ b/pkg/awsutils/awsutils_test.go @@ -26,6 +26,8 @@ import ( "github.com/aws/aws-sdk-go/aws/request" "github.com/aws/aws-sdk-go/service/ec2" "github.com/golang/mock/gomock" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" "github.com/aws/aws-sdk-go/aws" @@ -33,6 +35,7 @@ import ( mock_ec2wrapper "github.com/aws/amazon-vpc-cni-k8s/pkg/ec2wrapper/mocks" "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/eventrecorder" + "github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -780,6 +783,33 @@ func TestFreeENIRetry(t *testing.T) { assert.NoError(t, err) } +func TestAwsAPIErrInc(t *testing.T) { + // Reset metrics before test + prometheusmetrics.AwsAPIErr.Reset() + + // Test case 1: AWS error + awsErr := awserr.New("InvalidParameterException", "The parameter is invalid", nil) + awsAPIErrInc("CreateNetworkInterface", awsErr) + + // Verify metric was incremented with correct labels + count := testutil.ToFloat64(prometheusmetrics.AwsAPIErr.With(prometheus.Labels{ + "api": "CreateNetworkInterface", + "error": "InvalidParameterException", + })) + assert.Equal(t, float64(1), count) + + // Test case 2: Non-AWS error + regularErr := errors.New("some other error") + awsAPIErrInc("CreateNetworkInterface", regularErr) + + // Verify metric was not incremented for non-AWS error + count = testutil.ToFloat64(prometheusmetrics.AwsAPIErr.With(prometheus.Labels{ + "api": "CreateNetworkInterface", + "error": "InvalidParameterException", + })) + assert.Equal(t, float64(1), count) +} + func TestFreeENIRetryMax(t *testing.T) { ctrl, mockEC2 := setup(t) defer ctrl.Finish() diff --git a/pkg/ipamd/datastore/data_store_test.go b/pkg/ipamd/datastore/data_store_test.go index 5e1925fdc5..3efd872dbd 100644 --- a/pkg/ipamd/datastore/data_store_test.go +++ b/pkg/ipamd/datastore/data_store_test.go @@ -22,6 +22,7 @@ import ( mock_netlinkwrapper "github.com/aws/amazon-vpc-cni-k8s/pkg/netlinkwrapper/mocks" "github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils" + "github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics" "github.com/golang/mock/gomock" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" @@ -31,6 +32,8 @@ import ( "github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" ) @@ -1542,3 +1545,73 @@ func TestDataStore_validateAllocationByPodVethExistence(t *testing.T) { }) } } + +func TestForceRemovalMetrics(t *testing.T) { + // Reset metrics by creating new counters + prometheusmetrics.ForceRemovedENIs = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "awscni_force_removed_enis_total", + Help: "The total number of ENIs force removed", + }) + prometheusmetrics.ForceRemovedIPs = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "awscni_force_removed_ips_total", + Help: "The total number of IPs force removed", + }) + + ds := NewDataStore(Testlog, NullCheckpoint{}, false) + + // Add an ENI and IP + err := ds.AddENI("eni-1", 1, true, false, false) + assert.NoError(t, err) + + ipv4Addr := net.IPNet{IP: net.ParseIP("1.1.1.1"), Mask: net.IPv4Mask(255, 255, 255, 255)} + err = ds.AddIPv4CidrToStore("eni-1", ipv4Addr, false) + assert.NoError(t, err) + + // Assign IP to a pod + key := IPAMKey{"net0", "sandbox-1", "eth0"} + ip, device, err := ds.AssignPodIPv4Address(key, IPAMMetadata{K8SPodNamespace: "default", K8SPodName: "sample-pod-1"}) + assert.NoError(t, err) + assert.Equal(t, "1.1.1.1", ip) + assert.Equal(t, 1, device) + + // Test force removal of IP + err = ds.DelIPv4CidrFromStore("eni-1", ipv4Addr, false) + assert.Error(t, err) // Should fail without force + assert.Contains(t, err.Error(), "IP is used and can not be deleted") + + ipCount := testutil.ToFloat64(prometheusmetrics.ForceRemovedIPs) + assert.Equal(t, float64(0), ipCount) + + // Force remove the IP + err = ds.DelIPv4CidrFromStore("eni-1", ipv4Addr, true) + assert.NoError(t, err) // Should succeed with force + + ipCount = testutil.ToFloat64(prometheusmetrics.ForceRemovedIPs) + assert.Equal(t, float64(1), ipCount) + + // Add another IP and assign to pod for ENI removal test + ipv4Addr2 := net.IPNet{IP: net.ParseIP("1.1.1.2"), Mask: net.IPv4Mask(255, 255, 255, 255)} + err = ds.AddIPv4CidrToStore("eni-1", ipv4Addr2, false) + assert.NoError(t, err) + + key2 := IPAMKey{"net0", "sandbox-2", "eth0"} + ip, device, err = ds.AssignPodIPv4Address(key2, IPAMMetadata{K8SPodNamespace: "default", K8SPodName: "sample-pod-2"}) + assert.NoError(t, err) + assert.Equal(t, "1.1.1.2", ip) + assert.Equal(t, 1, device) + + // Test force removal of ENI + err = ds.RemoveENIFromDataStore("eni-1", false) + assert.Error(t, err) // Should fail without force + assert.Contains(t, err.Error(), "datastore: ENI is used and can not be deleted") // Updated error message + + eniCount := testutil.ToFloat64(prometheusmetrics.ForceRemovedENIs) + assert.Equal(t, float64(0), eniCount) + + // Force remove the ENI + err = ds.RemoveENIFromDataStore("eni-1", true) + assert.NoError(t, err) // Should succeed with force + + eniCount = testutil.ToFloat64(prometheusmetrics.ForceRemovedENIs) + assert.Equal(t, float64(1), eniCount) +} diff --git a/pkg/ipamd/ipamd_test.go b/pkg/ipamd/ipamd_test.go index 6053cb0b18..7999a5e4a8 100644 --- a/pkg/ipamd/ipamd_test.go +++ b/pkg/ipamd/ipamd_test.go @@ -48,7 +48,10 @@ import ( mock_eniconfig "github.com/aws/amazon-vpc-cni-k8s/pkg/eniconfig/mocks" "github.com/aws/amazon-vpc-cni-k8s/pkg/ipamd/datastore" mock_networkutils "github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils/mocks" + "github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics" rcscheme "github.com/aws/amazon-vpc-resource-controller-k8s/apis/vpcresources/v1alpha1" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" ) const ( @@ -590,7 +593,7 @@ func testIncreaseIPPool(t *testing.T, useENIConfig bool, unschedulabeNode bool, labels := map[string]string{ "k8s.amazonaws.com/eniConfig": "az1", } - //Create a Fake Node + // Create a Fake Node fakeNode := v1.Node{ TypeMeta: metav1.TypeMeta{Kind: "Node"}, ObjectMeta: metav1.ObjectMeta{Name: myNodeName, Labels: labels}, @@ -754,7 +757,7 @@ func testIncreasePrefixPool(t *testing.T, useENIConfig, subnetDiscovery bool) { labels := map[string]string{ "k8s.amazonaws.com/eniConfig": "az1", } - //Create a Fake Node + // Create a Fake Node fakeNode := v1.Node{ TypeMeta: metav1.TypeMeta{Kind: "Node"}, ObjectMeta: metav1.ObjectMeta{Name: myNodeName, Labels: labels}, @@ -763,7 +766,7 @@ func testIncreasePrefixPool(t *testing.T, useENIConfig, subnetDiscovery bool) { } m.k8sClient.Create(ctx, &fakeNode) - //Create a dummy ENIConfig + // Create a dummy ENIConfig fakeENIConfig := v1alpha1.ENIConfig{ TypeMeta: metav1.TypeMeta{}, ObjectMeta: metav1.ObjectMeta{Name: "az1"}, @@ -825,7 +828,7 @@ func TestDecreaseIPPool(t *testing.T) { assert.Equal(t, 0, over) // after the above deallocation this should be zero assert.Equal(t, true, enabled) // there is warm ip target enabled with the value of 1 - //make another call just to ensure that more deallocations do not happen + // make another call just to ensure that more deallocations do not happen mockContext.decreaseDatastorePool(10 * time.Second) short, over, enabled = mockContext.datastoreTargetState(nil) @@ -899,7 +902,7 @@ func TestTryAddIPToENI(t *testing.T) { mockContext.myNodeName = myNodeName - //Create a Fake Node + // Create a Fake Node fakeNode := v1.Node{ TypeMeta: metav1.TypeMeta{Kind: "Node"}, ObjectMeta: metav1.ObjectMeta{Name: myNodeName}, @@ -1061,13 +1064,13 @@ func TestNodePrefixPoolReconcile(t *testing.T) { PrivateIpAddress: &testAddr1, Primary: &primary, }, }, - //IPv4Prefixes: make([]*ec2.Ipv4PrefixSpecification, 0), + // IPv4Prefixes: make([]*ec2.Ipv4PrefixSpecification, 0), IPv4Prefixes: nil, }, } m.awsutils.EXPECT().GetAttachedENIs().Return(oneIPUnassigned, nil) m.awsutils.EXPECT().GetIPv4PrefixesFromEC2(primaryENIid).Return(oneIPUnassigned[0].IPv4Prefixes, nil) - //m.awsutils.EXPECT().GetIPv4sFromEC2(primaryENIid).Return(oneIPUnassigned[0].IPv4Addresses, nil) + // m.awsutils.EXPECT().GetIPv4sFromEC2(primaryENIid).Return(oneIPUnassigned[0].IPv4Addresses, nil) mockContext.nodeIPPoolReconcile(ctx, 0) curENIs = mockContext.dataStore.GetENIInfos() @@ -1432,16 +1435,20 @@ func TestIPAMContext_filterUnmanagedENIs(t *testing.T) { Test1TagMap := map[string]awsutils.TagMap{eni1.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}} Test2TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, - eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}} + eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, + } Test3TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, - eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "false"}} + eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "false"}, + } Test4TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, - eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}} + eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}, + } Test5TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNodeTagKey: "i-abcdabcdabcd"}, - eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}} + eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}, + } tests := []struct { name string @@ -1468,7 +1475,8 @@ func TestIPAMContext_filterUnmanagedENIs(t *testing.T) { c := &IPAMContext{ awsClient: mockAWSUtils, - enableManageUntaggedMode: true} + enableManageUntaggedMode: true, + } mockAWSUtils.EXPECT().SetUnmanagedENIs(gomock.Any()). Do(func(args []string) { @@ -1495,13 +1503,11 @@ func TestIPAMContext_filterUnmanagedENIs(t *testing.T) { } } return false - }).AnyTimes() mockAWSUtils.EXPECT().IsMultiCardENI(gomock.Any()).DoAndReturn( func(eni string) (unmanaged bool) { return false - }).AnyTimes() if got := c.filterUnmanagedENIs(tt.enis); !reflect.DeepEqual(got, tt.want) { @@ -1512,7 +1518,6 @@ func TestIPAMContext_filterUnmanagedENIs(t *testing.T) { } func TestIPAMContext_filterUnmanagedENIs_disableManageUntaggedMode(t *testing.T) { - eni1, eni2, eni3 := getDummyENIMetadata() allENIs := []awsutils.ENIMetadata{eni1, eni2, eni3} primaryENIonly := []awsutils.ENIMetadata{eni1} @@ -1520,16 +1525,20 @@ func TestIPAMContext_filterUnmanagedENIs_disableManageUntaggedMode(t *testing.T) Test1TagMap := map[string]awsutils.TagMap{eni1.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}} Test2TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, - eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}} + eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, + } Test3TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, - eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "false"}} + eni3.ENIID: {"hi": "tag", eniNoManageTagKey: "false"}, + } Test4TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNoManageTagKey: "true"}, - eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}} + eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}, + } Test5TagMap := map[string]awsutils.TagMap{ eni2.ENIID: {"hi": "tag", eniNodeTagKey: "i-abcdabcdabcd"}, - eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}} + eni3.ENIID: {"hi": "tag", eniNodeTagKey: instanceID}, + } tests := []struct { name string @@ -1557,7 +1566,8 @@ func TestIPAMContext_filterUnmanagedENIs_disableManageUntaggedMode(t *testing.T) c := &IPAMContext{ awsClient: mockAWSUtils, - enableManageUntaggedMode: false} + enableManageUntaggedMode: false, + } mockAWSUtils.EXPECT().GetPrimaryENI().Times(tt.expectedGetPrimaryENICalls).Return(eni1.ENIID) mockAWSUtils.EXPECT().GetInstanceID().Times(tt.expectedGetInstanceIDCalls).Return(instanceID) @@ -1586,13 +1596,11 @@ func TestIPAMContext_filterUnmanagedENIs_disableManageUntaggedMode(t *testing.T) } } return false - }).AnyTimes() mockAWSUtils.EXPECT().IsMultiCardENI(gomock.Any()).DoAndReturn( func(eni string) (unmanaged bool) { return false - }).AnyTimes() if got := c.filterUnmanagedENIs(tt.enis); !reflect.DeepEqual(got, tt.want) { @@ -1798,6 +1806,7 @@ func TestNodePrefixPoolReconcileBadIMDSData(t *testing.T) { assert.Equal(t, 1, len(curENIs.ENIs)) assert.Equal(t, 16, curENIs.TotalIPs) } + func getPrimaryENIMetadata() awsutils.ENIMetadata { primary := true notPrimary := false @@ -1905,7 +1914,7 @@ func TestIPAMContext_setupENI(t *testing.T) { primaryIP: make(map[string]string), terminating: int32(0), } - //mockContext.primaryIP[] + // mockContext.primaryIP[] mockContext.dataStore = testDatastore() primary := true @@ -1951,7 +1960,7 @@ func TestIPAMContext_setupENIwithPDenabled(t *testing.T) { primaryIP: make(map[string]string), terminating: int32(0), } - //mockContext.primaryIP[] + // mockContext.primaryIP[] mockContext.dataStore = testDatastorewithPrefix() primary := true @@ -2193,7 +2202,6 @@ func TestIsConfigValid(t *testing.T) { assert.Equal(t, tt.want, resp) }) } - } func TestAnnotatePod(t *testing.T) { @@ -2390,3 +2398,73 @@ func TestAddFeatureToCNINode(t *testing.T) { }) } } + +func TestPodENIErrInc(t *testing.T) { + // Reset metrics before test + prometheusmetrics.PodENIErr.Reset() + + m := setup(t) + defer m.ctrl.Finish() + ctx := context.Background() + + mockContext := &IPAMContext{ + awsClient: m.awsutils, + k8sClient: m.k8sClient, + networkClient: m.network, + primaryIP: make(map[string]string), + terminating: int32(0), + dataStore: testDatastore(), + enableIPv4: true, + enableIPv6: false, + enablePodENI: true, + } + + // Create a test pod + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "test-namespace", + }, + } + err := mockContext.k8sClient.Create(ctx, pod) + assert.NoError(t, err) + + // Mock AWS API error + m.awsutils.EXPECT().AllocENI(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()). + Return("", errors.New("API error")).Times(2) // Expect 2 calls + + // Test case 1: First error + err = mockContext.tryAssignPodENI(ctx, pod, "test-function") + assert.Error(t, err) + + // Verify metric was incremented + count := testutil.ToFloat64(prometheusmetrics.PodENIErr.With(prometheus.Labels{ + "fn": "test-function", + })) + assert.Equal(t, float64(1), count, "Expected error count to be 1 for test-function") + + // Test case 2: Second error with different function + err = mockContext.tryAssignPodENI(ctx, pod, "another-function") + assert.Error(t, err) + + // Verify counts for both functions + count = testutil.ToFloat64(prometheusmetrics.PodENIErr.With(prometheus.Labels{ + "fn": "another-function", + })) + assert.Equal(t, float64(1), count, "Expected error count to be 1 for another-function") + + count = testutil.ToFloat64(prometheusmetrics.PodENIErr.With(prometheus.Labels{ + "fn": "test-function", + })) + assert.Equal(t, float64(1), count, "Expected error count to remain 1 for test-function") +} + +func (c *IPAMContext) tryAssignPodENI(ctx context.Context, pod *corev1.Pod, fnName string) error { + // Mock implementation for the test + _, err := c.awsClient.AllocENI(false, nil, "", 0) + if err != nil { + prometheusmetrics.PodENIErr.With(prometheus.Labels{"fn": fnName}).Inc() + return err + } + return nil +} diff --git a/pkg/ipamd/rpc_handler_test.go b/pkg/ipamd/rpc_handler_test.go index 4ddb74c93f..53389ba9ca 100644 --- a/pkg/ipamd/rpc_handler_test.go +++ b/pkg/ipamd/rpc_handler_test.go @@ -22,6 +22,9 @@ import ( pb "github.com/aws/amazon-vpc-cni-k8s/rpc" + "github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/assert" ) @@ -238,6 +241,12 @@ func TestServer_AddNetwork(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + // Reset the counter for each test case + prometheusmetrics.AddIPCnt = prometheus.NewCounter(prometheus.CounterOpts{ + Name: "awscni_add_ip_req_count", + Help: "Number of add IP address requests", + }) + m := setup(t) defer m.ctrl.Finish() @@ -302,6 +311,10 @@ func TestServer_AddNetwork(t *testing.T) { assert.NoError(t, err) assert.Equal(t, tt.want, resp) } + + // Add more detailed assertion messages + assert.Equal(t, float64(1), testutil.ToFloat64(prometheusmetrics.AddIPCnt), + "AddIPCnt should be incremented exactly once for test case: %s", tt.name) }) } } diff --git a/utils/prometheusmetrics/prometheusmetrics.go b/utils/prometheusmetrics/prometheusmetrics.go index fadda0a094..bd404875aa 100644 --- a/utils/prometheusmetrics/prometheusmetrics.go +++ b/utils/prometheusmetrics/prometheusmetrics.go @@ -61,8 +61,8 @@ var ( }, []string{"fn"}, ) - AddIPCnt = prometheus.NewGauge( - prometheus.GaugeOpts{ + AddIPCnt = prometheus.NewCounter( + prometheus.CounterOpts{ Name: "awscni_add_ip_req_count", Help: "The number of add IP address requests", }, @@ -74,8 +74,8 @@ var ( }, []string{"reason"}, ) - PodENIErr = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ + PodENIErr = prometheus.NewCounterVec( + prometheus.CounterOpts{ Name: "awscni_pod_eni_error_count", Help: "The number of errors encountered for pod ENIs", }, @@ -88,8 +88,8 @@ var ( }, []string{"api", "error", "status"}, ) - AwsAPIErr = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ + AwsAPIErr = prometheus.NewCounterVec( + prometheus.CounterOpts{ Name: "awscni_aws_api_error_count", Help: "The number of times AWS API returns an error", }, @@ -134,14 +134,14 @@ var ( Help: "The number of IP addresses assigned to pods", }, ) - ForceRemovedENIs = prometheus.NewGauge( - prometheus.GaugeOpts{ + ForceRemovedENIs = prometheus.NewCounter( + prometheus.CounterOpts{ Name: "awscni_force_removed_enis", Help: "The number of ENIs force removed while they had assigned pods", }, ) - ForceRemovedIPs = prometheus.NewGauge( - prometheus.GaugeOpts{ + ForceRemovedIPs = prometheus.NewCounter( + prometheus.CounterOpts{ Name: "awscni_force_removed_ips", Help: "The number of IPs force removed while they had assigned pods", },