From 68565384e532fb82153d27612cee31f3ad521ea2 Mon Sep 17 00:00:00 2001 From: "Mengyi Zhou (bjrara)" Date: Wed, 4 Dec 2024 12:01:43 -0800 Subject: [PATCH 1/3] Unify Application Signals export template (#1430) --- .../appsignals_and_ecs_config.yaml | 30 ++++++++++++ .../sampleConfig/base_appsignals_config.yaml | 30 ++++++++++++ .../base_appsignals_fallback_config.yaml | 30 ++++++++++++ .../awsemf/appsignals_config_generic.yaml | 41 ----------------- .../awsemf/appsignals_config_k8s.yaml | 46 ------------------- ...ks.yaml => awsemf_default_appsignals.yaml} | 0 .../otel/exporter/awsemf/translator.go | 36 +-------------- .../otel/exporter/awsemf/translator_test.go | 16 +++---- 8 files changed, 100 insertions(+), 129 deletions(-) delete mode 100644 translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml delete mode 100644 translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml rename translator/translate/otel/exporter/awsemf/{appsignals_config_eks.yaml => awsemf_default_appsignals.yaml} (100%) diff --git a/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml b/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml index a97027c87f..c2a36602fe 100644 --- a/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml +++ b/translator/tocwconfig/sampleConfig/appsignals_and_ecs_config.yaml @@ -32,6 +32,7 @@ exporters: - dimensions: - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteResourceIdentifier - RemoteResourceType @@ -39,10 +40,39 @@ exporters: - Service - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteService - Service - - Environment + - Operation + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteService + - Service + - - Environment + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation - RemoteService - Service - - Environment diff --git a/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml b/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml index 24afc038e6..84aa3bac0f 100644 --- a/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml +++ b/translator/tocwconfig/sampleConfig/base_appsignals_config.yaml @@ -32,6 +32,7 @@ exporters: - dimensions: - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteResourceIdentifier - RemoteResourceType @@ -39,10 +40,39 @@ exporters: - Service - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteService - Service - - Environment + - Operation + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteService + - Service + - - Environment + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation - RemoteService - Service - - Environment diff --git a/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml b/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml index 9791f903dd..ddb31057dd 100644 --- a/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml +++ b/translator/tocwconfig/sampleConfig/base_appsignals_fallback_config.yaml @@ -32,6 +32,7 @@ exporters: - dimensions: - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteResourceIdentifier - RemoteResourceType @@ -39,10 +40,39 @@ exporters: - Service - - Environment - Operation + - RemoteEnvironment - RemoteOperation - RemoteService - Service - - Environment + - Operation + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - Operation + - RemoteOperation + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteService + - Service + - - Environment + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation + - RemoteResourceIdentifier + - RemoteResourceType + - RemoteService + - Service + - - Environment + - RemoteEnvironment + - RemoteOperation - RemoteService - Service - - Environment diff --git a/translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml b/translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml deleted file mode 100644 index 57d10c5c6b..0000000000 --- a/translator/translate/otel/exporter/awsemf/appsignals_config_generic.yaml +++ /dev/null @@ -1,41 +0,0 @@ -log_group_name: "/aws/application-signals/data" -namespace: "ApplicationSignals" -middleware: agenthealth/logs -dimension_rollup_option: "NoDimensionRollup" -metric_declarations: - - dimensions: - - [Environment, Service, Operation] - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^(ServerSpan|LocalRootSpan)$' - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, Operation, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService] - - [Environment, Service, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^(ClientSpan|ProducerSpan|ConsumerSpan)$' - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^RuntimeMetric$' - metric_name_selectors: - - '^.*$' \ No newline at end of file diff --git a/translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml b/translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml deleted file mode 100644 index 05ea848fff..0000000000 --- a/translator/translate/otel/exporter/awsemf/appsignals_config_k8s.yaml +++ /dev/null @@ -1,46 +0,0 @@ -log_group_name: "/aws/application-signals/data" -namespace: "ApplicationSignals" -middleware: agenthealth/logs -dimension_rollup_option: "NoDimensionRollup" -metric_declarations: - - dimensions: - - [Environment, Service, Operation] - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: ^(ServerSpan|LocalRootSpan)$ - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteEnvironment, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteEnvironment] - - [Environment, Service, Operation, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, Operation, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService, RemoteEnvironment] - - [Environment, Service, RemoteService] - - [Environment, Service, RemoteService, RemoteOperation, RemoteEnvironment, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, RemoteService, RemoteOperation, RemoteEnvironment] - - [Environment, Service, RemoteService, RemoteOperation, RemoteResourceIdentifier, RemoteResourceType] - - [Environment, Service, RemoteService, RemoteOperation] - - [Environment, Service, RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService, RemoteResourceIdentifier, RemoteResourceType] - - [RemoteService] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^(ClientSpan|ProducerSpan|ConsumerSpan)$' - metric_name_selectors: - - Latency - - Fault - - Error - - dimensions: - - [Environment, Service] - label_matchers: - - label_names: - - Telemetry.Source - regex: '^RuntimeMetric$' - metric_name_selectors: - - '^.*$' \ No newline at end of file diff --git a/translator/translate/otel/exporter/awsemf/appsignals_config_eks.yaml b/translator/translate/otel/exporter/awsemf/awsemf_default_appsignals.yaml similarity index 100% rename from translator/translate/otel/exporter/awsemf/appsignals_config_eks.yaml rename to translator/translate/otel/exporter/awsemf/awsemf_default_appsignals.yaml diff --git a/translator/translate/otel/exporter/awsemf/translator.go b/translator/translate/otel/exporter/awsemf/translator.go index 18ed408bed..24ef1dcd82 100644 --- a/translator/translate/otel/exporter/awsemf/translator.go +++ b/translator/translate/otel/exporter/awsemf/translator.go @@ -22,7 +22,6 @@ import ( "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/common" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/extension/agenthealth" "github.com/aws/amazon-cloudwatch-agent/translator/translate/otel/receiver/awscontainerinsight" - "github.com/aws/amazon-cloudwatch-agent/translator/util/ecsutil" ) const ( @@ -44,13 +43,7 @@ var defaultKubernetesKueueConfig string //go:embed awsemf_default_prometheus.yaml var defaultPrometheusConfig string -//go:embed appsignals_config_eks.yaml -var appSignalsConfigEks string - -//go:embed appsignals_config_k8s.yaml -var appSignalsConfigK8s string - -//go:embed appsignals_config_generic.yaml +//go:embed awsemf_default_appsignals.yaml var appSignalsConfigGeneric string //go:embed awsemf_jmx_config.yaml @@ -92,7 +85,7 @@ func (t *translator) Translate(c *confmap.Conf) (component.Config, error) { defaultConfig := defaultGenericConfig if t.isAppSignals(c) { - defaultConfig = getAppSignalsConfig() + defaultConfig = appSignalsConfigGeneric } else if t.isCiJMX(c) { defaultConfig = defaultJmxConfig } else if isEcs(c) { @@ -163,31 +156,6 @@ func (t *translator) Translate(c *confmap.Conf) (component.Config, error) { return cfg, nil } -func getAppSignalsConfig() string { - ctx := context.CurrentContext() - - mode := ctx.KubernetesMode() - if mode == "" { - mode = ctx.Mode() - } - if mode == config.ModeEC2 { - if ecsutil.GetECSUtilSingleton().IsECS() { - mode = config.ModeECS - } - } - - switch mode { - case config.ModeEKS: - return appSignalsConfigEks - case config.ModeK8sEC2, config.ModeK8sOnPrem: - return appSignalsConfigK8s - case config.ModeEC2, config.ModeECS: - return appSignalsConfigGeneric - default: - return appSignalsConfigGeneric - } -} - func (t *translator) isAppSignals(conf *confmap.Conf) bool { return (t.name == common.AppSignals || t.name == common.AppSignalsFallback) && (conf.IsSet(common.AppSignalsMetrics) || conf.IsSet(common.AppSignalsTraces) || conf.IsSet(common.AppSignalsMetricsFallback) || conf.IsSet(common.AppSignalsTracesFallback)) } diff --git a/translator/translate/otel/exporter/awsemf/translator_test.go b/translator/translate/otel/exporter/awsemf/translator_test.go index b0c779198a..5d9e6c101c 100644 --- a/translator/translate/otel/exporter/awsemf/translator_test.go +++ b/translator/translate/otel/exporter/awsemf/translator_test.go @@ -928,7 +928,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_eks.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", @@ -944,7 +944,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_k8s.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -960,7 +960,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -976,7 +976,7 @@ func TestTranslateAppSignals(t *testing.T) { "application_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", @@ -992,7 +992,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_eks.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", @@ -1008,7 +1008,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_k8s.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -1024,7 +1024,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "true", "region": "us-east-1", "role_arn": "global_arn", @@ -1040,7 +1040,7 @@ func TestTranslateAppSignals(t *testing.T) { "app_signals": map[string]any{}, }, }}, - want: testutil.GetConfWithOverrides(t, filepath.Join("appsignals_config_generic.yaml"), map[string]any{ + want: testutil.GetConfWithOverrides(t, filepath.Join("awsemf_default_appsignals.yaml"), map[string]any{ "local_mode": "false", "region": "us-east-1", "role_arn": "global_arn", From a44d8830d74fb19962c21d12c2f25a056bb38921 Mon Sep 17 00:00:00 2001 From: Hyunsoo Kim <884273+movence@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:11:21 -0500 Subject: [PATCH 2/3] Drop beta var and replace kubectl rollout check with simple sleep (#1457) --- .github/workflows/integration-test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 6ad082a8a3..18936b8eb5 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1303,10 +1303,11 @@ jobs: echo "Cluster name is ${EKS_CLUSTER_NAME}" kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.17.0/deployments/static/nvidia-device-plugin.yml kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]' - kubectl rollout status daemonset nvidia-device-plugin-daemonset -n kube-system --timeout 10s + # wait nvidia device plugin to be ready + sleep 10 kubectl apply -f ./gpuBurner.yaml else - terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve && exit 1 + terraform destroy -auto-approve && exit 1 fi - name: Run Go tests with retry @@ -1345,4 +1346,4 @@ jobs: else cd terraform/eks/addon/gpu fi - terraform destroy -var="beta=${{ github.event.inputs.run_in_beta }}" -auto-approve + terraform destroy -auto-approve From a8ef6f778b075e3eaf51e033a15313edaca33d89 Mon Sep 17 00:00:00 2001 From: zhihonl <61301537+zhihonl@users.noreply.github.com> Date: Wed, 4 Dec 2024 15:39:07 -0500 Subject: [PATCH 3/3] [Bug Fix] Fix Excessive IMDS related error logging (#1440) --- extension/entitystore/ec2Info.go | 51 ++++++----------------- extension/entitystore/ec2Info_test.go | 33 +++++++++++++++ extension/entitystore/extension.go | 8 +++- extension/entitystore/extension_test.go | 52 ++++++++++++++++++++++++ extension/entitystore/retryer.go | 2 +- extension/entitystore/serviceprovider.go | 2 +- internal/retryer/imdsretryer.go | 1 - 7 files changed, 105 insertions(+), 44 deletions(-) diff --git a/extension/entitystore/ec2Info.go b/extension/entitystore/ec2Info.go index cfb2eccba8..e646fca78c 100644 --- a/extension/entitystore/ec2Info.go +++ b/extension/entitystore/ec2Info.go @@ -14,6 +14,7 @@ import ( "github.com/aws/amazon-cloudwatch-agent/internal/ec2metadataprovider" "github.com/aws/amazon-cloudwatch-agent/plugins/processors/ec2tagger" + "github.com/aws/amazon-cloudwatch-agent/translator/config" ) const ( @@ -32,7 +33,8 @@ type EC2Info struct { AutoScalingGroup string // region is used while making call to describeTags Ec2 API for AutoScalingGroup - Region string + Region string + kubernetesMode string metadataProvider ec2metadataprovider.MetadataProvider logger *zap.Logger @@ -48,8 +50,11 @@ func (ei *EC2Info) initEc2Info() { if err := ei.setInstanceIDAccountID(); err != nil { return } - if err := ei.setAutoScalingGroup(); err != nil { - return + // Instance metadata tags is not usable for EKS nodes + // https://github.com/kubernetes/cloud-provider-aws/issues/762 + if ei.kubernetesMode != config.ModeEKS { + limitedRetryer := NewRetryer(true, true, defaultJitterMin, defaultJitterMax, ec2tagger.BackoffSleepArray, maxRetry, ei.done, ei.logger) + limitedRetryer.refreshLoop(ei.retrieveAsgName) } ei.logger.Debug("Finished initializing EC2Info") } @@ -99,49 +104,16 @@ func (ei *EC2Info) setInstanceIDAccountID() error { } } -func (ei *EC2Info) setAutoScalingGroup() error { - retry := 0 - for { - var waitDuration time.Duration - if retry < len(ec2tagger.BackoffSleepArray) { - waitDuration = ec2tagger.BackoffSleepArray[retry] - } else { - waitDuration = ec2tagger.BackoffSleepArray[len(ec2tagger.BackoffSleepArray)-1] - } - - wait := time.NewTimer(waitDuration) - select { - case <-ei.done: - wait.Stop() - return errors.New("shutdown signal received") - case <-wait.C: - } - - if retry > 0 { - ei.logger.Debug("Initial retrieval of tags and volumes", zap.Int("retry", retry)) - } - - if err := ei.retrieveAsgName(); err != nil { - ei.logger.Debug("Unable to fetch instance tags with imds", zap.Int("retry", retry), zap.Error(err)) - } else { - ei.logger.Debug("Retrieval of auto-scaling group tags succeeded") - return nil - } - - retry++ - } - -} - func (ei *EC2Info) retrieveAsgName() error { tags, err := ei.metadataProvider.InstanceTags(context.Background()) if err != nil { - ei.logger.Debug("Failed to get tags through metadata provider", zap.Error(err)) + ei.logger.Debug("Failed to get AutoScalingGroup from instance tags. This is likely because instance tag is not enabled for IMDS but will not affect agent functionality.") return err } else if strings.Contains(tags, ec2tagger.Ec2InstanceTagKeyASG) { asg, err := ei.metadataProvider.InstanceTagValue(context.Background(), ec2tagger.Ec2InstanceTagKeyASG) if err != nil { ei.logger.Error("Failed to get AutoScalingGroup through metadata provider", zap.Error(err)) + return err } else { ei.logger.Debug("AutoScalingGroup retrieved through IMDS") ei.mutex.Lock() @@ -156,9 +128,10 @@ func (ei *EC2Info) retrieveAsgName() error { return nil } -func newEC2Info(metadataProvider ec2metadataprovider.MetadataProvider, done chan struct{}, region string, logger *zap.Logger) *EC2Info { +func newEC2Info(metadataProvider ec2metadataprovider.MetadataProvider, kubernetesMode string, done chan struct{}, region string, logger *zap.Logger) *EC2Info { return &EC2Info{ metadataProvider: metadataProvider, + kubernetesMode: kubernetesMode, done: done, Region: region, logger: logger, diff --git a/extension/entitystore/ec2Info_test.go b/extension/entitystore/ec2Info_test.go index 6602752c5a..9cc4efd896 100644 --- a/extension/entitystore/ec2Info_test.go +++ b/extension/entitystore/ec2Info_test.go @@ -15,6 +15,7 @@ import ( "go.uber.org/zap" "github.com/aws/amazon-cloudwatch-agent/internal/ec2metadataprovider" + "github.com/aws/amazon-cloudwatch-agent/translator/config" ) var mockedInstanceIdentityDoc = &ec2metadata.EC2InstanceIdentityDocument{ @@ -236,3 +237,35 @@ func TestNotInitIfMetadataProviderIsEmpty(t *testing.T) { }) } } + +func TestNoASGRetrievalInKubernetesMode(t *testing.T) { + type args struct { + metadataProvider ec2metadataprovider.MetadataProvider + kubernetesMode string + } + tests := []struct { + name string + args args + wantErr bool + want string + }{ + { + name: "EKSNoASGFromEC2Info", + args: args{ + metadataProvider: &mockMetadataProvider{InstanceIdentityDocument: mockedInstanceIdentityDoc, Tags: map[string]string{"aws:autoscaling:groupName": tagVal3}}, + kubernetesMode: config.ModeEKS, + }, + wantErr: false, + want: "", + }, + } + for _, tt := range tests { + logger, _ := zap.NewDevelopment() + t.Run(tt.name, func(t *testing.T) { + ei := &EC2Info{metadataProvider: tt.args.metadataProvider, kubernetesMode: tt.args.kubernetesMode, logger: logger} + go ei.initEc2Info() + time.Sleep(3 * time.Second) + assert.Equal(t, tt.want, ei.GetAutoScalingGroup()) + }) + } +} diff --git a/extension/entitystore/extension.go b/extension/entitystore/extension.go index a6af693cb3..a486134507 100644 --- a/extension/entitystore/extension.go +++ b/extension/entitystore/extension.go @@ -94,9 +94,13 @@ func (e *EntityStore) Start(ctx context.Context, host component.Host) error { e.serviceprovider = newServiceProvider(e.mode, e.config.Region, &e.ec2Info, e.metadataprovider, getEC2Provider, ec2CredentialConfig, e.done, e.logger) switch e.mode { case config.ModeEC2: - e.ec2Info = *newEC2Info(e.metadataprovider, e.done, e.config.Region, e.logger) + e.ec2Info = *newEC2Info(e.metadataprovider, e.kubernetesMode, e.done, e.config.Region, e.logger) go e.ec2Info.initEc2Info() - go e.serviceprovider.startServiceProvider() + // Instance metadata tags is not usable for EKS nodes + // https://github.com/kubernetes/cloud-provider-aws/issues/762 + if e.kubernetesMode == "" { + go e.serviceprovider.startServiceProvider() + } } if e.kubernetesMode != "" { e.eksInfo = newEKSInfo(e.logger) diff --git a/extension/entitystore/extension_test.go b/extension/entitystore/extension_test.go index 5662cf1d88..02cdff56d3 100644 --- a/extension/entitystore/extension_test.go +++ b/extension/entitystore/extension_test.go @@ -626,6 +626,58 @@ func TestEntityStore_LogMessageDoesNotIncludeResourceInfo(t *testing.T) { } } +func TestEntityStore_ServiceProviderInDifferentEnv(t *testing.T) { + type args struct { + mode string + kubernetesMode string + } + tests := []struct { + name string + args args + }{ + { + name: "EC2inEKS", + args: args{ + mode: config.ModeEC2, + kubernetesMode: config.ModeEKS, + }, + }, + { + name: "EC2Only", + args: args{ + mode: config.ModeEC2, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + esConfig := &Config{ + Mode: tt.args.mode, + KubernetesMode: tt.args.kubernetesMode, + } + getMetaDataProvider = mockMetadataProviderFunc + e := EntityStore{ + logger: zap.NewNop(), + config: esConfig, + } + e.Start(context.TODO(), nil) + time.Sleep(3 * time.Second) + + name, source := e.serviceprovider.getServiceNameAndSource() + if tt.args.mode == config.ModeEC2 && tt.args.kubernetesMode != "" { + assert.Equal(t, name, ServiceNameUnknown) + assert.Equal(t, source, ServiceNameSourceUnknown) + } else if tt.args.mode == config.ModeEC2 && tt.args.kubernetesMode == "" { + assert.Equal(t, name, "TestRole") + assert.Equal(t, source, ServiceNameSourceClientIamRole) + } + + }) + } + +} + func assertIfNonEmpty(t *testing.T, message string, pattern string) { if pattern != "" { assert.NotContains(t, message, pattern) diff --git a/extension/entitystore/retryer.go b/extension/entitystore/retryer.go index cefa06d374..65829f8970 100644 --- a/extension/entitystore/retryer.go +++ b/extension/entitystore/retryer.go @@ -77,7 +77,7 @@ func (r *Retryer) refreshLoop(updateFunc func() error) int { if err != nil { retry++ - r.logger.Debug("there was an error when retrieving service attribute.", zap.Error(err)) + r.logger.Debug("there was an issue when retrieving entity attributes but will not affect agent functionality", zap.Error(err)) } else { retry = 1 } diff --git a/extension/entitystore/serviceprovider.go b/extension/entitystore/serviceprovider.go index c65a0daf62..9f36dd9005 100644 --- a/extension/entitystore/serviceprovider.go +++ b/extension/entitystore/serviceprovider.go @@ -240,7 +240,7 @@ func (s *serviceprovider) scrapeIAMRole() error { func (s *serviceprovider) scrapeImdsServiceName() error { tags, err := s.metadataProvider.InstanceTags(context.Background()) if err != nil { - s.logger.Debug("Failed to get tags through metadata provider", zap.Error(err)) + s.logger.Debug("Failed to get service name from instance tags. This is likely because instance tag is not enabled for IMDS but will not affect agent functionality.") return err } // This will check whether the tags contains SERVICE, APPLICATION, APP, in that order. diff --git a/internal/retryer/imdsretryer.go b/internal/retryer/imdsretryer.go index 29dec2976f..5a4322c479 100644 --- a/internal/retryer/imdsretryer.go +++ b/internal/retryer/imdsretryer.go @@ -43,7 +43,6 @@ func (r IMDSRetryer) ShouldRetry(req *request.Request) bool { if awsError, ok := req.Error.(awserr.Error); r.DefaultRetryer.ShouldRetry(req) || (ok && awsError != nil && awsError.Code() == "EC2MetadataError") { shouldRetry = true } - fmt.Printf("D! should retry %t for imds error : %v", shouldRetry, req.Error) return shouldRetry }