From 0b3592c3a37150355d7f26f41d3ee2658b802bf1 Mon Sep 17 00:00:00 2001 From: Xinran Zhang Date: Thu, 22 Aug 2024 16:40:01 -0700 Subject: [PATCH] fix asg and remove comments --- .github/workflows/dotnet-ec2-asg-e2e-test.yml | 88 ++++++++++++------- .../workflows/dotnet-ec2-default-e2e-test.yml | 25 ------ 2 files changed, 58 insertions(+), 55 deletions(-) diff --git a/.github/workflows/dotnet-ec2-asg-e2e-test.yml b/.github/workflows/dotnet-ec2-asg-e2e-test.yml index 3bd3073c0..4b5a60190 100644 --- a/.github/workflows/dotnet-ec2-asg-e2e-test.yml +++ b/.github/workflows/dotnet-ec2-asg-e2e-test.yml @@ -75,24 +75,23 @@ jobs: role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }} aws-region: ${{ env.E2E_TEST_AWS_REGION }} - - uses: actions/download-artifact@v3 - if: inputs.caller-workflow-name == 'main-build' - with: - name: ${{ env.ADOT_DISTRO_NAME }} - - - name: Upload main-build distro file to s3 - if: inputs.caller-workflow-name == 'main-build' - run: aws s3 cp ${{ env.ADOT_DISTRO_NAME }} s3://adot-main-build-staging-jar/${{ env.ADOT_DISTRO_NAME }} + - name: Set Get ADOT Wheel command environment variable + run: | + if [ "${{ github.event.repository.name }}" = "aws-otel-python-instrumentation" ]; then + # Reusing the adot-main-build-staging-jar bucket to store the python wheel file + echo GET_ADOT_DISTRO_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_DISTRO_NAME }} ./${{ env.ADOT_DISTRO_NAME }} && unzip -d dotnet-distro ${{ env.ADOT_DISTRO_NAME }}" >> $GITHUB_ENV + else + echo GET_ADOT_DISTRO_COMMAND="wget -O aws-distro-opentelemetry-dotnet-instrumentation-linux-glibc-x64.zip https://github.com/aws-observability/aws-otel-dotnet-instrumentation/releases/download/v1.2.0/aws-distro-opentelemetry-dotnet-instrumentation-linux-glibc-x64.zip && unzip -d dotnet-distro aws-distro-opentelemetry-dotnet-instrumentation-linux-glibc-x64.zip" >> $GITHUB_ENV + fi - - name: Set Get ADOT Distro command environment variable - working-directory: terraform/dotnet/ec2/asg - run: echo GET_ADOT_DISTRO_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_DISTRO_NAME }} ./${{ env.ADOT_DISTRO_NAME }} && unzip -d dotnet-distro ${{ env.ADOT_DISTRO_NAME }}" >> $GITHUB_ENV -# if [ "${{ inputs.caller-workflow-name }}" = "main-build" ]; then -# # Reusing the adot-main-build-staging-jar bucket to store the python wheel file -# echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_DISTRO_NAME }} ./${{ env.ADOT_DISTRO_NAME }} && python3.9 -m pip install ${{ env.ADOT_DISTRO_NAME }}" >> $GITHUB_ENV -# else -# echo GET_ADOT_WHEEL_COMMAND="python3.9 -m pip install ${{ env.ADOT_DISTRO_NAME }}" >> $GITHUB_ENV -# fi + - name: Set Get CW Agent command environment variable + run: | + if [ "${{ github.event.repository.name }}" = "amazon-cloudwatch-agent" ]; then + # Reusing the adot-main-build-staging-jar bucket to store the python wheel file + echo GET_CW_AGENT_RPM_COMMAND= "aws s3 cp s3://${{ secrets.S3_INTEGRATION_BUCKET }}/integration-test/binary/${{ github.sha }}/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm ./cw-agent.rpm" >> $GITHUB_ENV + else + echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ env.E2E_TEST_AWS_REGION }}.s3.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV + fi - name: Set up terraform uses: ./.github/workflows/actions/execute_and_retry @@ -105,24 +104,53 @@ jobs: - name: Initiate Terraform uses: ./.github/workflows/actions/execute_and_retry with: - command: "terraform init && terraform validate" + command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/dotnet/ec2/default && terraform init && terraform validate" cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl" max_retry: 6 sleep_time: 60 - working-directory: ./terraform/dotnet/ec2/asg - name: Deploy sample app via terraform and wait for endpoint to come online - uses: ./.github/workflows/actions/execute_and_retry - with: - command: 'terraform apply -auto-approve - -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" - -var="test_id=${{ env.TESTING_ID }}" - -var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" - -var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" - -var="get_adot_wheel_command=${{ env.GET_ADOT_DISTRO_COMMAND }}"' - cleanup: 'terraform destroy -auto-approve -var="test_id=${{ env.TESTING_ID }}' - max_retry: 2 - working-directory: ./terraform/dotnet/ec2/asg + working-directory: terraform/dotnet/ec2/default + run: | + # Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online. + # There may be occasional failures due to transitivity issues, so try up to 2 times. + # deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates + # that it failed at some point + retry_counter=0 + max_retry=2 + while [ $retry_counter -lt $max_retry ]; do + echo "Attempt $retry_counter" + deployment_failed=0 + terraform apply -auto-approve \ + -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" \ + -var="test_id=${{ env.TESTING_ID }}" \ + -var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" \ + -var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \ + -var="get_adot_distro_command=${{ env.GET_ADOT_DISTRO_COMMAND }}" \ + || deployment_failed=$? + + if [ $deployment_failed -eq 1 ]; then + echo "Terraform deployment was unsuccessful. Will attempt to retry deployment." + fi + + # If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the + # resources created from terraform and try again. + if [ $deployment_failed -eq 1 ]; then + echo "Destroying terraform" + terraform destroy -auto-approve \ + -var="test_id=${{ env.TESTING_ID }}" + + retry_counter=$(($retry_counter+1)) + else + # If deployment succeeded, then exit the loop + break + fi + + if [ $retry_counter -eq $max_retry ]; then + echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code" + exit 1 + fi + done - name: Get the sample app and EC2 instance information working-directory: terraform/dotnet/ec2/asg diff --git a/.github/workflows/dotnet-ec2-default-e2e-test.yml b/.github/workflows/dotnet-ec2-default-e2e-test.yml index 4432130b0..e03bd2cad 100644 --- a/.github/workflows/dotnet-ec2-default-e2e-test.yml +++ b/.github/workflows/dotnet-ec2-default-e2e-test.yml @@ -92,20 +92,6 @@ jobs: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ env.E2E_TEST_AWS_REGION }}.s3.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV fi -# - name: Upload main-build adot distro to s3 -# if: inputs.caller-workflow-name == 'main-build' -# run: aws s3 cp ${{ env.ADOT_WHEEL_NAME }} s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} - -# - name: Set Get ADOT Distro command environment variable -# working-directory: terraform/dotnet/ec2/default -# run: echo GET_ADOT_DISTRO_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && unzip -d dotnet-distro ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV -# # if [ "${{ inputs.caller-workflow-name }}" = "main-build" ]; then -## # Reusing the adot-main-build-staging-jar bucket to store the dotnet wheel file -## echo GET_ADOT_DISTRO_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && unzip ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV -## else -## echo GET_ADOT_DISTRO_COMMAND="dotnet3.9 -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV -## fi - - name: Set up terraform uses: ./.github/workflows/actions/execute_and_retry with: @@ -164,17 +150,6 @@ jobs: exit 1 fi done -# uses: ./.github/workflows/actions/execute_and_retry -# with: -# command: 'terraform apply -auto-approve -# -var="aws_region=${{ env.E2E_TEST_AWS_REGION }}" -# -var="test_id=${{ env.TESTING_ID }}" -# -var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" -# -var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" -# -var="get_adot_distro_command=${{ env.GET_ADOT_DISTRO_COMMAND }}"' -# cleanup: 'terraform destroy -auto-approve -var="test_id=${{ env.TESTING_ID }}' -# max_retry: 2 -# working-directory: ./terraform/dotnet/ec2/default - name: Get the ec2 instance ami id run: |