diff --git a/contrib/aws/Jenkinsfile b/contrib/aws/Jenkinsfile index cf7e5429339..b58c0815821 100644 --- a/contrib/aws/Jenkinsfile +++ b/contrib/aws/Jenkinsfile @@ -5,6 +5,144 @@ def buildNumber = env.BUILD_NUMBER as int if (buildNumber > 1) milestone(buildNumber - 1) milestone(buildNumber) + +import groovy.transform.Field +@Field boolean build_ok = true + + +def get_portafiducia_download_path() { + /* Stable Portafiducia tarball */ + def AWS_ACCOUNT_ID = sh ( + script: "aws sts get-caller-identity --query Account --output text | tr -dc 0-9", + returnStdout: true + ) + return "s3://libfabric-ci-$AWS_ACCOUNT_ID-us-west-2/portafiducia/portafiducia.tar.gz" +} + +def download_and_extract_portafiducia(outputDir) { + /* Download PortaFiducia tarball from S3 and extract to outputDir */ + def tempPath = "/tmp/portafiducia.tar.gz" + def downloadPath = this.get_portafiducia_download_path() + + def ret = sh ( + script: "mkdir -p ${outputDir} && aws s3 cp ${downloadPath} ${tempPath} && " + + "tar xf ${tempPath} -C ${outputDir}", + returnStatus: true, + ) + + if (ret != 0) { + unstable('Failed to download and extract PortaFiducia') + } +} + +def install_porta_fiducia() { + /* + * Install PortaFiducia in a (new) virtual environment. + */ + sh ''' + python3 -m venv venv + . venv/bin/activate + pip install --upgrade pip + pip install --upgrade awscli + pip install -e PortaFiducia + ''' +} + +def run_test_orchestrator_once(run_name, build_tag, os, instance_type, instance_count, region, test_config_file, addl_args) { + /* + * Run PortaFiducia/tests/test_orchestrator.py with given command line arguments + * param@ args: str, the command line arguments + */ + def cluster_name = get_cluster_name(build_tag, os, instance_type) + def args = "--config configs/${test_config_file} --os ${os} --instance-type ${instance_type} --instance-count ${instance_count} --region ${region} --cluster-name ${cluster_name} ${addl_args} --junit-xml outputs/${cluster_name}.xml" + def ret = sh ( + script: ". venv/bin/activate; cd PortaFiducia/tests && ./test_orchestrator.py ${args}", + returnStatus: true + ) + if (ret == 65) + unstable('Scripts exited with status 65') + else if (ret != 0) + build_ok = false + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { + sh "exit ${ret}" + } +} + +def get_random_string(len) { + def s = sh ( + script: "cat /dev/urandom | LC_ALL=C tr -dc A-Za-z0-9 | head -c ${len}", + returnStdout: true + ) + return s +} + +def get_cluster_name(build_tag, os, instance_type) { + /* + * Compose the cluster name. Pcluster requires a cluster name under 60 characters. + * cluster name cannot have ".". + * Jenkins does not allow groovy to use the replace() method + * of string. Therefore we used shell command sed to replace "." with "" + */ + build_tag = sh( + script: "echo ${build_tag} | sed \"s/^jenkins-//g\" | sed \"s/ //g\"", + returnStdout: true + ) + + def cluster_name = sh( + script: "echo '${build_tag.take(28)}-${os.take(10)}-${instance_type.take(10)}-'${get_random_string(8)} | tr -d '.\\n'", + returnStdout: true + ) + + return cluster_name +} + +def get_single_node_windows_test_stage(stage_name) { + /* + * Get Windows Stage + */ + return { + stage("${stage_name}") { + def ret = sh ( + script: """ + . venv/bin/activate; + cd PortaFiducia/scripts; + export PULL_REQUEST_ID=${env.CHANGE_ID}; + env AWS_DEFAULT_REGION=us-west-2 ./test_orchestrator_windows.py --ci public --s3-bucket-name libfabric-ci-windows-prod-test-output --pull-request-id ${env.CHANGE_ID}; + """, + returnStatus: true + ) + if (ret == 65) + unstable('Scripts exited with status 65') + else if (ret != 0) + build_ok = false + catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { + sh "exit ${ret}" + } + } + } + +} + +def get_test_stage(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) { + /* + * Generate a single test stage that run test_orchestrator.py with the given parameters. + * param@ stage_name: the name of the stage + * param@ build_tag: the BUILD_TAG env generated by Jenkins + * param@ os: the operating system for the test stage. + * param@ instance_type: the instance type for the test stage. + * param@ instance_count: number of intances to use + * param@ region: the (default) aws region where the tests are run. + * param@ test_config: the name of test config file in PortaFiducia/tests/configs/ + * param@ addl_args: additional arguments passed to test_orchestrator.py + * return@: the test stage. + */ + return { + stage("${stage_name}") { + this.run_test_orchestrator_once(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) + } + } +} + pipeline { agent { ecs { @@ -35,16 +173,14 @@ pipeline { steps { script { sh 'printenv' - def common = load "contrib/aws/common.groovy" - common.download_and_extract_portafiducia('PortaFiducia') + download_and_extract_portafiducia('PortaFiducia') } } } stage("Install PortaFiducia") { steps { script { - def common = load "contrib/aws/common.groovy" - common.install_porta_fiducia() + install_porta_fiducia() } } @@ -52,41 +188,40 @@ pipeline { stage("Test EFA provider") { steps { script { - def common = load "contrib/aws/common.groovy" def stages = [:] // This needs the extra space at the end def addl_args_pr = "--test-libfabric-pr $env.CHANGE_ID " // Single Node Tests - EFA - stages["1_g4dn_alinux2-efa"] = common.get_test_stage("1_g4dn_alinux2_efa", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) - stages["1_g4dn_ubuntu2004-efa"] = common.get_test_stage("1_g4dn_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) - stages["1_g4dn_rhel8-efa"] = common.get_test_stage("1_g4dn_rhel8_efa", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) - stages["1_g4dn_centos7-efa"] = common.get_test_stage("1_g4dn_centos7_efa", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["1_g4dn_alinux2-efa"] = get_test_stage("1_g4dn_alinux2_efa", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["1_g4dn_ubuntu2004-efa"] = get_test_stage("1_g4dn_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["1_g4dn_rhel8-efa"] = get_test_stage("1_g4dn_rhel8_efa", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["1_g4dn_centos7-efa"] = get_test_stage("1_g4dn_centos7_efa", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr) // Single Node Tests - SHM - stages["1_g4dn_alinux2_shm"] = common.get_test_stage("1_g4dn_alinux2_shm", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") - stages["1_g4dn_ubuntu2004_shm"] = common.get_test_stage("1_g4dn_ubuntu2004_shm", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") - stages["1_g4dn_rhel8_shm"] = common.get_test_stage("1_g4dn_rhel8_shm", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") - stages["1_g4dn_centos7_shm"] = common.get_test_stage("1_g4dn_centos7_shm", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") - stages["1_g4dn_ubuntu2004_shm_disable-cma"] = common.get_test_stage("1_g4dn_ubuntu2004_shm_disable-cma", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm --enable-cma false") + stages["1_g4dn_alinux2_shm"] = get_test_stage("1_g4dn_alinux2_shm", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") + stages["1_g4dn_ubuntu2004_shm"] = get_test_stage("1_g4dn_ubuntu2004_shm", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") + stages["1_g4dn_rhel8_shm"] = get_test_stage("1_g4dn_rhel8_shm", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") + stages["1_g4dn_centos7_shm"] = get_test_stage("1_g4dn_centos7_shm", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm") + stages["1_g4dn_ubuntu2004_shm_disable-cma"] = get_test_stage("1_g4dn_ubuntu2004_shm_disable-cma", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm --enable-cma false") // Single Node Windows Test - stages["EFA_Windows_Test"] = common.get_single_node_windows_test_stage("EFA_Windows_Test") + stages["EFA_Windows_Test"] = get_single_node_windows_test_stage("EFA_Windows_Test") // Multi Node Tests - EFA - stages["2_hpc6a_alinux2_efa"] = common.get_test_stage("2_hpc6a_alinux2_efa", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) - stages["2_hpc6a_ubuntu2004_efa"] = common.get_test_stage("2_hpc6a_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) - stages["2_hpc6a_rhel8_efa"] = common.get_test_stage("2_hpc6a_rhel8_efa", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["2_hpc6a_alinux2_efa"] = get_test_stage("2_hpc6a_alinux2_efa", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["2_hpc6a_ubuntu2004_efa"] = get_test_stage("2_hpc6a_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) + stages["2_hpc6a_rhel8_efa"] = get_test_stage("2_hpc6a_rhel8_efa", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr) // Multi Node Tests - TCP - stages["2_hpc6a_alinux2_tcp"] = common.get_test_stage("2_hpc6a_alinux2_tcp", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") - stages["2_hpc6a_ubuntu2004_tcp"] = common.get_test_stage("2_hpc6a_ubuntu2004_tcp", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") - stages["2_hpc6a_rhel8_tcp"] = common.get_test_stage("2_hpc6a_rhel8_tcp", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") + stages["2_hpc6a_alinux2_tcp"] = get_test_stage("2_hpc6a_alinux2_tcp", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") + stages["2_hpc6a_ubuntu2004_tcp"] = get_test_stage("2_hpc6a_ubuntu2004_tcp", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") + stages["2_hpc6a_rhel8_tcp"] = get_test_stage("2_hpc6a_rhel8_tcp", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp") // Multi Node Tests - SOCKETS - stages["2_hpc6a_alinux2_sockets"] = common.get_test_stage("2_hpc6a_alinux2_sockets", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") - stages["2_hpc6a_ubuntu2004_sockets"] = common.get_test_stage("2_hpc6a_ubuntu2004_sockets", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") - stages["2_hpc6a_rhel8_sockets"] = common.get_test_stage("2_hpc6a_rhel8_sockets", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") + stages["2_hpc6a_alinux2_sockets"] = get_test_stage("2_hpc6a_alinux2_sockets", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") + stages["2_hpc6a_ubuntu2004_sockets"] = get_test_stage("2_hpc6a_ubuntu2004_sockets", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") + stages["2_hpc6a_rhel8_sockets"] = get_test_stage("2_hpc6a_rhel8_sockets", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets") parallel stages } @@ -95,8 +230,7 @@ pipeline { stage('check build_ok') { steps { script { - def common = load "contrib/aws/common.groovy" - if (common.build_ok) { + if (build_ok) { currentBuild.result = "SUCCESS" } else { diff --git a/contrib/aws/common.groovy b/contrib/aws/common.groovy deleted file mode 100644 index d9c6db36e4a..00000000000 --- a/contrib/aws/common.groovy +++ /dev/null @@ -1,142 +0,0 @@ -/* SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0-only */ -/* SPDX-FileCopyrightText: Copyright Amazon.com, Inc. or its affiliates. All rights reserved. */ -/* This file contains variables and functions that can be shared across different jobs */ -import groovy.transform.Field -@Field boolean build_ok = true - -def get_portafiducia_download_path() { - /* Stable Portafiducia tarball */ - def AWS_ACCOUNT_ID = sh ( - script: "aws sts get-caller-identity --query Account --output text | tr -dc 0-9", - returnStdout: true - ) - return "s3://libfabric-ci-$AWS_ACCOUNT_ID-us-west-2/portafiducia/portafiducia.tar.gz" -} - -def download_and_extract_portafiducia(outputDir) { - /* Download PortaFiducia tarball from S3 and extract to outputDir */ - def tempPath = "/tmp/portafiducia.tar.gz" - def downloadPath = this.get_portafiducia_download_path() - - def ret = sh ( - script: "mkdir -p ${outputDir} && aws s3 cp ${downloadPath} ${tempPath} && " + - "tar xf ${tempPath} -C ${outputDir}", - returnStatus: true, - ) - - if (ret != 0) { - unstable('Failed to download and extract PortaFiducia') - } -} - -def install_porta_fiducia() { - /* - * Install PortaFiducia in a (new) virtual environment. - */ - sh ''' - python3 -m venv venv - . venv/bin/activate - pip install --upgrade pip - pip install --upgrade awscli - pip install -e PortaFiducia - ''' -} - -def run_test_orchestrator_once(run_name, build_tag, os, instance_type, instance_count, region, test_config_file, addl_args) { - /* - * Run PortaFiducia/tests/test_orchestrator.py with given command line arguments - * param@ args: str, the command line arguments - */ - def cluster_name = get_cluster_name(build_tag, os, instance_type) - def args = "--config configs/${test_config_file} --os ${os} --instance-type ${instance_type} --instance-count ${instance_count} --region ${region} --cluster-name ${cluster_name} ${addl_args} --junit-xml outputs/${cluster_name}.xml" - def ret = sh ( - script: ". venv/bin/activate; cd PortaFiducia/tests && ./test_orchestrator.py ${args}", - returnStatus: true - ) - if (ret == 65) - unstable('Scripts exited with status 65') - else if (ret != 0) - build_ok = false - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - sh "exit ${ret}" - } -} - -def get_random_string(len) { - def s = sh ( - script: "cat /dev/urandom | LC_ALL=C tr -dc A-Za-z0-9 | head -c ${len}", - returnStdout: true - ) - return s -} - -def get_cluster_name(build_tag, os, instance_type) { - /* - * Compose the cluster name. Pcluster requires a cluster name under 60 characters. - * cluster name cannot have ".". - * Jenkins does not allow groovy to use the replace() method - * of string. Therefore we used shell command sed to replace "." with "" - */ - build_tag = sh( - script: "echo ${build_tag} | sed \"s/^jenkins-//g\" | sed \"s/ //g\"", - returnStdout: true - ) - - def cluster_name = sh( - script: "echo '${build_tag.take(28)}-${os.take(10)}-${instance_type.take(10)}-'${get_random_string(8)} | tr -d '.\\n'", - returnStdout: true - ) - - return cluster_name -} - -def get_single_node_windows_test_stage(stage_name) { - /* - * Get Windows Stage - */ - return { - stage("${stage_name}") { - def ret = sh ( - script: """ - . venv/bin/activate; - cd PortaFiducia/scripts; - export PULL_REQUEST_ID=${env.CHANGE_ID}; - env AWS_DEFAULT_REGION=us-west-2 ./test_orchestrator_windows.py --ci public --s3-bucket-name libfabric-ci-windows-prod-test-output --pull-request-id ${env.CHANGE_ID}; - """, - returnStatus: true - ) - if (ret == 65) - unstable('Scripts exited with status 65') - else if (ret != 0) - build_ok = false - catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') { - sh "exit ${ret}" - } - } - } - -} - -def get_test_stage(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) { - /* - * Generate a single test stage that run test_orchestrator.py with the given parameters. - * param@ stage_name: the name of the stage - * param@ build_tag: the BUILD_TAG env generated by Jenkins - * param@ os: the operating system for the test stage. - * param@ instance_type: the instance type for the test stage. - * param@ instance_count: number of intances to use - * param@ region: the (default) aws region where the tests are run. - * param@ test_config: the name of test config file in PortaFiducia/tests/configs/ - * param@ addl_args: additional arguments passed to test_orchestrator.py - * return@: the test stage. - */ - return { - stage("${stage_name}") { - this.run_test_orchestrator_once(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) - } - } -} - - - -return this