From cf0f7179b646e411abe4679c90bd0a2b3dafecee Mon Sep 17 00:00:00 2001 From: Harrryr Date: Tue, 19 Dec 2023 00:51:27 -0800 Subject: [PATCH] Add E2E test to integration-test.yml --- .github/actions/patch-dependencies/action.yml | 97 +++++ .github/workflows/appsignals-e2e-eks-test.yml | 330 ++++++++++++++++++ .github/workflows/integration-test.yml | 11 + 3 files changed, 438 insertions(+) create mode 100644 .github/actions/patch-dependencies/action.yml create mode 100644 .github/workflows/appsignals-e2e-eks-test.yml diff --git a/.github/actions/patch-dependencies/action.yml b/.github/actions/patch-dependencies/action.yml new file mode 100644 index 0000000000..57d66eccef --- /dev/null +++ b/.github/actions/patch-dependencies/action.yml @@ -0,0 +1,97 @@ +name: "Patch dependencies" +description: | + Patches direct dependencies of this project leveraging maven local to publish the results. + + This workflow supports patching opentelemetry-java and opentelemetry-java-instrumentation repositories by executing + the `patch.sh` script that will try to patch those repositories and after that will optionally test and then publish + the artifacts to maven local. + To add a patch you have to add a file in the `.github/patches/` directory with the name of the repository that must + be patched. + This action assumes that java was set correctly. +inputs: + run_tests: + default: "false" + required: false + description: "If the workflow should run tests of the dependencies. Anything different than false will evaluate to true" + +runs: + using: "composite" + steps: + - name: check patches + run: | + if [[ -f .github/patches/opentelemetry-java.patch ]]; then + echo 'patch_otel_java=true' >> $GITHUB_ENV + fi + if [[ -f .github/patches/opentelemetry-java-instrumentation.patch ]]; then + echo 'patch_otel_java_instrumentation=true' >> $GITHUB_ENV + fi + if [[ -f .github/patches/opentelemetry-java-contrib.patch ]]; then + echo 'patch_otel_java_contrib=true' >> $GITHUB_ENV + fi + shell: bash + + - name: Clone and patch repositories + run: .github/scripts/patch.sh + if: ${{ env.patch_otel_java == 'true' || + env.patch_otel_java_instrumentation == 'true' || + env.patch_otel_java_contrib == 'true' }} + shell: bash + + - name: Build opentelemetry-java with tests + uses: gradle/gradle-build-action@v2 + if: ${{ env.patch_otel_java == 'true' && inputs.run_tests != 'false' }} + with: + arguments: build publishToMavenLocal + build-root-directory: opentelemetry-java + + - name: Build opentelemetry-java + uses: gradle/gradle-build-action@v2 + if: ${{ env.patch_otel_java == 'true' && inputs.run_tests == 'false' }} + with: + arguments: publishToMavenLocal + build-root-directory: opentelemetry-java + + - name: cleanup opentelemetry-java + run: rm -rf opentelemetry-java + if: ${{ env.patch_otel_java == 'true' }} + shell: bash + + - name: Build opentelemetry-java-contrib with tests + uses: gradle/gradle-build-action@v2 + if: ${{ env.patch_otel_java_contrib == 'true' && inputs.run_tests != 'false' }} + with: + arguments: build publishToMavenLocal + build-root-directory: opentelemetry-java-contrib + + - name: Build opentelemetry-java-contrib + uses: gradle/gradle-build-action@v2 + if: ${{ env.patch_otel_java_contrib == 'true' && inputs.run_tests == 'false' }} + with: + arguments: publishToMavenLocal + build-root-directory: opentelemetry-java-contrib + + - name: cleanup opentelemetry-java-contrib + run: rm -rf opentelemetry-java-contrib + if: ${{ env.patch_otel_java_contrib == 'true' }} + shell: bash + + - name: Build opentelemetry-java-instrumentation with tests + uses: gradle/gradle-build-action@v2 + if: ${{ env.patch_otel_java_instrumentation == 'true' && inputs.run_tests != 'false' }} + with: + arguments: check -x spotlessCheck publishToMavenLocal + build-root-directory: opentelemetry-java-instrumentation + cache-read-only: false + + - name: Build opentelemetry java instrumentation + uses: gradle/gradle-build-action@v2 + if: ${{ env.patch_otel_java_instrumentation == 'true' && inputs.run_tests == 'false' }} + with: + arguments: publishToMavenLocal + build-root-directory: opentelemetry-java-instrumentation + cache-read-only: false + + - name: cleanup opentelmetry-java-instrumentation + run: rm -rf opentelemetry-java-instrumentation + if: ${{ env.patch_otel_java_instrumentation == 'true' }} + shell: bash \ No newline at end of file diff --git a/.github/workflows/appsignals-e2e-eks-test.yml b/.github/workflows/appsignals-e2e-eks-test.yml new file mode 100644 index 0000000000..44838a19fb --- /dev/null +++ b/.github/workflows/appsignals-e2e-eks-test.yml @@ -0,0 +1,330 @@ +# This is a reusable workflow for running the E2E test for App Signals. +# It is meant to be called from another workflow. +# This E2E test is responsible for validating setting up a sample application on an EKS cluster and enabling +# App Signals using the staging image of Cloudwatch Agent. It validates the generated telemetry +# including logs, metrics, and traces, then cleans up the cluster. The testing resources can be found in the +# ADOT java instrumentation repo: https://github.com/aws-observability/aws-otel-java-instrumentation/tree/main/testing +# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview +name: App Signals Enablement E2E Testing +on: + workflow_call: + inputs: + # Ensure two tests do not run on the same cluster at the same time through GitHub Action concurrency + test-cluster-name: + required: true + type: string + +permissions: + id-token: write + contents: read + +env: + AWS_DEFAULT_REGION: us-east-1 + APP_SIGNALS_E2E_TEST_ACCOUNT_ID: ${{ secrets.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} + SAMPLE_APP_NAMESPACE: sample-app-namespace + APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }} + APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG: ${{ secrets.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }} + METRIC_NAMESPACE: AppSignals + LOG_GROUP_NAME: /aws/appsignals/eks + ECR_INTEGRATION_TEST_REPO: "cwagent-integration-test" + +jobs: + appsignals-e2e-test: + runs-on: ubuntu-latest + steps: + - name: Get testing resources from ADOT + uses: actions/checkout@v4 + with: + repository: aws-observability/aws-otel-java-instrumentation + ref: main + + - name: Download patch action script + uses: actions/checkout@v4 + with: + path: patch-dependencies + sparse-checkout: | + .github/actions/patch-dependencies/action.yml + + - name: Replace patch dependency action.yml + run: | + cp -f ./patch-dependencies/.github/actions/patch-dependencies/action.yml ./.github/actions/patch-dependencies/action.yml + + - name: Download enablement script + uses: actions/checkout@v4 + with: + repository: aws-observability/application-signals-demo + ref: main + path: enablement-script + sparse-checkout: | + scripts/eks/appsignals/enable-app-signals.sh + scripts/eks/appsignals/clean-app-signals.sh + sparse-checkout-cone-mode: false + + - uses: actions/setup-java@v4 + with: + java-version: 17 + distribution: temurin + + - name: Generate testing id + run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV + + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: ${{ secrets.APP_SIGNALS_E2E_IAM_ROLE }} + aws-region: ${{ env.AWS_DEFAULT_REGION }} + + # local directory to store the kubernetes config + - name: Create kubeconfig directory + run: mkdir -p ${{ github.workspace }}/.kube + + - name: Set KUBECONFIG environment variable + run: echo KUBECONFIG="${{ github.workspace }}/.kube/config" >> $GITHUB_ENV + + - name: Set up kubeconfig + run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ env.AWS_DEFAULT_REGION }} + + - name: Install eksctl + run: | + mkdir ${{ github.workspace }}/eksctl + curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz" + tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz + echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH + + - name: Create role for AWS access from the sample app + id: create_service_account + run: | + eksctl create iamserviceaccount \ + --name service-account-${{ env.TESTING_ID }} \ + --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ + --cluster ${{ inputs.test-cluster-name }} \ + --role-name eks-s3-access-${{ env.TESTING_ID }} \ + --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ + --region ${{ env.AWS_DEFAULT_REGION }} \ + --approve + + - name: Set up terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_wrapper: false + + - name: Deploy sample app via terraform + working-directory: testing/terraform/eks + run: | + terraform init + terraform validate + terraform apply -auto-approve \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ + -var="kube_directory_path=${{ github.workspace }}/.kube" \ + -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ + -var="eks_cluster_context_name=$(kubectl config current-context)" \ + -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ + -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ + -var="sample_app_image=${{ env.APP_SIGNALS_E2E_SAMPLE_APP_FRONTEND_SVC_IMG }}" \ + -var="sample_remote_app_image=${{ env.APP_SIGNALS_E2E_SAMPLE_APP_REMOTE_SVC_IMG }}" + + # Enable App Signals on the test cluster + - name: Enable App Signals + working-directory: enablement-script/scripts/eks/appsignals + run: | + ./enable-app-signals.sh \ + ${{ inputs.test-cluster-name }} \ + ${{ env.AWS_DEFAULT_REGION }} \ + ${{ env.SAMPLE_APP_NAMESPACE }} + + - name: Save CloudWatch image to environment before patching + run: | + echo "OLD_CW_AGENT_IMAGE"=$(kubectl get amazoncloudwatchagents -n amazon-cloudwatch -o json | jq '.items[0].spec.image') >> $GITHUB_ENV + + - name: Patch the CloudWatch Agent image and restart CloudWatch pods + run: | + kubectl patch amazoncloudwatchagents -n amazon-cloudwatch cloudwatch-agent --type='json' -p='[{"op": "replace", "path": "/spec/image", "value": ${{ secrets.AWS_ECR_PRIVATE_REGISTRY }}/${{ env.ECR_INTEGRATION_TEST_REPO }}:${{ github.sha }}}]' + kubectl delete pods --all -n amazon-cloudwatch + kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch + + # Application pods need to be restarted for the + # app signals instrumentation to take effect + - name: Restart the app pods + run: kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }} + + - name: Wait for sample app pods to come up + run: | + kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }} \ + + - name: Get remote service deployment name and IP + run: | + echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV + echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV + + - name: Log pod ADOT image ID + run: | + kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --output json | \ + jq '.items[0].status.initContainerStatuses[0].imageID' + + - name: Log pod CWAgent image ID + run: | + kubectl get pods -n amazon-cloudwatch -l app.kubernetes.io/name=cloudwatch-agent -o json | \ + jq '.items[0].status.containerStatuses[0].imageID' + + - name: Log pod Fluent Bit image ID + run: | + kubectl get pods -n amazon-cloudwatch -l k8s-app=fluent-bit -o json | \ + jq '.items[0].status.containerStatuses[0].imageID' + + - name: Log pod CWAgent image ID and save image to the environment + run: | + echo "NEW_CW_AGENT_IMAGE"=$(kubectl get amazoncloudwatchagents -n amazon-cloudwatch -o json | jq '.items[0].spec.image') >> $GITHUB_ENV + + - name: Check if CW Agent image has changed + run: | + if [ ${{ env.OLD_CW_AGENT_IMAGE }} = ${{ env.NEW_CW_AGENT_IMAGE }} ]; then + echo "Operator image did not change" + exit 1 + fi + + # cache local patch outputs + - name: Cache local Maven repository + id: cache-local-maven-repo + uses: actions/cache@v3 + with: + path: | + ~/.m2/repository/io/opentelemetry/ + key: ${{ runner.os }}-maven-local-${{ hashFiles('.github/patches/opentelemetry-java*.patch') }} + + - name: Publish patched dependencies to maven local + uses: ./.github/actions/patch-dependencies + if: steps.cache-local-maven-repo.outputs.cache-hit != 'true' + + - name: Get the sample app endpoint + run: | + echo "APP_ENDPOINT=$(terraform output sample_app_endpoint)" >> $GITHUB_ENV + working-directory: testing/terraform/eks + + - name: Wait for app endpoint to come online + id: endpoint-check + run: | + attempt_counter=0 + max_attempts=30 + until $(curl --output /dev/null --silent --head --fail http://${{ env.APP_ENDPOINT }}); do + if [ ${attempt_counter} -eq ${max_attempts} ];then + echo "Max attempts reached" + exit 1 + fi + + printf '.' + attempt_counter=$(($attempt_counter+1)) + sleep 10 + done + + # This steps increases the speed of the validation by creating the telemetry data in advance + - name: Call all test APIs + continue-on-error: true + run: | + curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/outgoing-http-call/ + curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/aws-sdk-call/ + curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}/ + curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/client-call/ + + # Validation for app signals telemetry data + - name: Call endpoint and validate generated EMF logs + id: log-validation + if: steps.endpoint-check.outcome == 'success' && !cancelled() + run: ./gradlew testing:validator:run --args='-c eks/log-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.APP_ENDPOINT }} + --region ${{ env.AWS_DEFAULT_REGION }} + --account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} + --platform-info ${{ inputs.test-cluster-name }} + --service-name sample-application-${{ env.TESTING_ID }} + --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} + --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} + --rollup' + + - name: Call endpoints and validate generated metrics + id: metric-validation + if: (success() || steps.log-validation.outcome == 'failure') && !cancelled() + run: ./gradlew testing:validator:run --args='-c eks/metric-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.APP_ENDPOINT }} + --region ${{ env.AWS_DEFAULT_REGION }} + --account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} + --platform-info ${{ inputs.test-cluster-name }} + --service-name sample-application-${{ env.TESTING_ID }} + --remote-service-name sample-remote-application-${{ env.TESTING_ID }} + --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} + --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} + --rollup' + + - name: Call endpoints and validate generated traces + id: trace-validation + if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled() + run: ./gradlew testing:validator:run --args='-c eks/trace-validation.yml + --testing-id ${{ env.TESTING_ID }} + --endpoint http://${{ env.APP_ENDPOINT }} + --region ${{ env.AWS_DEFAULT_REGION }} + --account-id ${{ env.APP_SIGNALS_E2E_TEST_ACCOUNT_ID }} + --metric-namespace ${{ env.METRIC_NAMESPACE }} + --log-group ${{ env.LOG_GROUP_NAME }} + --app-namespace ${{ env.SAMPLE_APP_NAMESPACE }} + --platform-info ${{ inputs.test-cluster-name }} + --service-name sample-application-${{ env.TESTING_ID }} + --remote-service-deployment-name ${{ env.REMOTE_SERVICE_DEPLOYMENT_NAME }} + --request-body ip=${{ env.REMOTE_SERVICE_POD_IP }} + --rollup' + + # Clean up Procedures + + - name: Remove log group deletion command + if: always() + working-directory: enablement-script/scripts/eks/appsignals + run: | + delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP_NAME }}' --region \$REGION" + sed -i "s#$delete_log_group##g" clean-app-signals.sh + + - name: Clean Up App Signals + if: always() + continue-on-error: true + working-directory: enablement-script/scripts/eks/appsignals + run: | + ./clean-app-signals.sh \ + ${{ inputs.test-cluster-name }} \ + ${{ env.AWS_DEFAULT_REGION }} \ + ${{ env.SAMPLE_APP_NAMESPACE }} + + # This step also deletes lingering resources from previous test runs + - name: Delete all sample app resources + if: always() + continue-on-error: true + timeout-minutes: 10 + run: kubectl delete namespace ${{ env.SAMPLE_APP_NAMESPACE }} + + - name: Terraform destroy + if: always() + continue-on-error: true + working-directory: testing/terraform/eks + run: | + terraform destroy -auto-approve \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \ + -var="kube_directory_path=${{ github.workspace }}/.kube" \ + -var="eks_cluster_name=${{ inputs.test-cluster-name }}" \ + -var="test_namespace=${{ env.SAMPLE_APP_NAMESPACE }}" \ + -var="service_account_aws_access=service-account-${{ env.TESTING_ID }}" \ + -var="sample_app_image=${{ env.SAMPLE_APP_IMAGE }}" + + - name: Remove aws access service account + if: always() + continue-on-error: true + run: | + eksctl delete iamserviceaccount \ + --name service-account-${{ env.TESTING_ID }} \ + --namespace ${{ env.SAMPLE_APP_NAMESPACE }} \ + --cluster ${{ inputs.test-cluster-name }} \ + --region ${{ env.AWS_DEFAULT_REGION }} \ No newline at end of file diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index ef27fe34d5..11b1dfb9fa 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -1170,3 +1170,14 @@ jobs: timeout_minutes: 8 retry_wait_seconds: 5 command: cd terraform/stress && terraform destroy --auto-approve + + E2ETest: + name: "E2ETest" + needs: [ BuildAndUpload ] + uses: ./.github/workflows/appsignals-e2e-eks-test.yml + permissions: + id-token: write + contents: read + secrets: inherit + with: + test-cluster-name: 'e2e-cw-agent-test'