Skip to content

Commit

Permalink
contrib/aws: Merge common.groovy with Jenkinsfile
Browse files Browse the repository at this point in the history
Signed-off-by: Seth Zegelstein <szegel@amazon.com>
(cherry picked from commit d2f4852)
  • Loading branch information
a-szegel authored and shijin-aws committed Jul 30, 2024
1 parent 08ef670 commit 656c380
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 168 deletions.
186 changes: 160 additions & 26 deletions contrib/aws/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,144 @@ def buildNumber = env.BUILD_NUMBER as int
if (buildNumber > 1) milestone(buildNumber - 1)
milestone(buildNumber)


import groovy.transform.Field
@Field boolean build_ok = true


def get_portafiducia_download_path() {
/* Stable Portafiducia tarball */
def AWS_ACCOUNT_ID = sh (
script: "aws sts get-caller-identity --query Account --output text | tr -dc 0-9",
returnStdout: true
)
return "s3://libfabric-ci-$AWS_ACCOUNT_ID-us-west-2/portafiducia/portafiducia.tar.gz"
}

def download_and_extract_portafiducia(outputDir) {
/* Download PortaFiducia tarball from S3 and extract to outputDir */
def tempPath = "/tmp/portafiducia.tar.gz"
def downloadPath = this.get_portafiducia_download_path()

def ret = sh (
script: "mkdir -p ${outputDir} && aws s3 cp ${downloadPath} ${tempPath} && " +
"tar xf ${tempPath} -C ${outputDir}",
returnStatus: true,
)

if (ret != 0) {
unstable('Failed to download and extract PortaFiducia')
}
}

def install_porta_fiducia() {
/*
* Install PortaFiducia in a (new) virtual environment.
*/
sh '''
python3 -m venv venv
. venv/bin/activate
pip install --upgrade pip
pip install --upgrade awscli
pip install -e PortaFiducia
'''
}

def run_test_orchestrator_once(run_name, build_tag, os, instance_type, instance_count, region, test_config_file, addl_args) {
/*
* Run PortaFiducia/tests/test_orchestrator.py with given command line arguments
* param@ args: str, the command line arguments
*/
def cluster_name = get_cluster_name(build_tag, os, instance_type)
def args = "--config configs/${test_config_file} --os ${os} --instance-type ${instance_type} --instance-count ${instance_count} --region ${region} --cluster-name ${cluster_name} ${addl_args} --junit-xml outputs/${cluster_name}.xml"
def ret = sh (
script: ". venv/bin/activate; cd PortaFiducia/tests && ./test_orchestrator.py ${args}",
returnStatus: true
)
if (ret == 65)
unstable('Scripts exited with status 65')
else if (ret != 0)
build_ok = false
catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') {
sh "exit ${ret}"
}
}

def get_random_string(len) {
def s = sh (
script: "cat /dev/urandom | LC_ALL=C tr -dc A-Za-z0-9 | head -c ${len}",
returnStdout: true
)
return s
}

def get_cluster_name(build_tag, os, instance_type) {
/*
* Compose the cluster name. Pcluster requires a cluster name under 60 characters.
* cluster name cannot have ".".
* Jenkins does not allow groovy to use the replace() method
* of string. Therefore we used shell command sed to replace "." with ""
*/
build_tag = sh(
script: "echo ${build_tag} | sed \"s/^jenkins-//g\" | sed \"s/ //g\"",
returnStdout: true
)

def cluster_name = sh(
script: "echo '${build_tag.take(28)}-${os.take(10)}-${instance_type.take(10)}-'${get_random_string(8)} | tr -d '.\\n'",
returnStdout: true
)

return cluster_name
}

def get_single_node_windows_test_stage(stage_name) {
/*
* Get Windows Stage
*/
return {
stage("${stage_name}") {
def ret = sh (
script: """
. venv/bin/activate;
cd PortaFiducia/scripts;
export PULL_REQUEST_ID=${env.CHANGE_ID};
env AWS_DEFAULT_REGION=us-west-2 ./test_orchestrator_windows.py --ci public --s3-bucket-name libfabric-ci-windows-prod-test-output --pull-request-id ${env.CHANGE_ID};
""",
returnStatus: true
)
if (ret == 65)
unstable('Scripts exited with status 65')
else if (ret != 0)
build_ok = false
catchError(buildResult: 'SUCCESS', stageResult: 'FAILURE') {
sh "exit ${ret}"
}
}
}

}

def get_test_stage(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args) {
/*
* Generate a single test stage that run test_orchestrator.py with the given parameters.
* param@ stage_name: the name of the stage
* param@ build_tag: the BUILD_TAG env generated by Jenkins
* param@ os: the operating system for the test stage.
* param@ instance_type: the instance type for the test stage.
* param@ instance_count: number of intances to use
* param@ region: the (default) aws region where the tests are run.
* param@ test_config: the name of test config file in PortaFiducia/tests/configs/
* param@ addl_args: additional arguments passed to test_orchestrator.py
* return@: the test stage.
*/
return {
stage("${stage_name}") {
this.run_test_orchestrator_once(stage_name, build_tag, os, instance_type, instance_count, region, test_config, addl_args)
}
}
}

pipeline {
agent {
ecs {
Expand Down Expand Up @@ -35,58 +173,55 @@ pipeline {
steps {
script {
sh 'printenv'
def common = load "contrib/aws/common.groovy"
common.download_and_extract_portafiducia('PortaFiducia')
download_and_extract_portafiducia('PortaFiducia')
}
}
}
stage("Install PortaFiducia") {
steps {
script {
def common = load "contrib/aws/common.groovy"
common.install_porta_fiducia()
install_porta_fiducia()
}

}
}
stage("Test EFA provider") {
steps {
script {
def common = load "contrib/aws/common.groovy"
def stages = [:]
// This needs the extra space at the end
def addl_args_pr = "--test-libfabric-pr $env.CHANGE_ID "

// Single Node Tests - EFA
stages["1_g4dn_alinux2-efa"] = common.get_test_stage("1_g4dn_alinux2_efa", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_ubuntu2004-efa"] = common.get_test_stage("1_g4dn_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_rhel8-efa"] = common.get_test_stage("1_g4dn_rhel8_efa", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_centos7-efa"] = common.get_test_stage("1_g4dn_centos7_efa", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_alinux2-efa"] = get_test_stage("1_g4dn_alinux2_efa", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_ubuntu2004-efa"] = get_test_stage("1_g4dn_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_rhel8-efa"] = get_test_stage("1_g4dn_rhel8_efa", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["1_g4dn_centos7-efa"] = get_test_stage("1_g4dn_centos7_efa", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr)

// Single Node Tests - SHM
stages["1_g4dn_alinux2_shm"] = common.get_test_stage("1_g4dn_alinux2_shm", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_ubuntu2004_shm"] = common.get_test_stage("1_g4dn_ubuntu2004_shm", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_rhel8_shm"] = common.get_test_stage("1_g4dn_rhel8_shm", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_centos7_shm"] = common.get_test_stage("1_g4dn_centos7_shm", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_ubuntu2004_shm_disable-cma"] = common.get_test_stage("1_g4dn_ubuntu2004_shm_disable-cma", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm --enable-cma false")
stages["1_g4dn_alinux2_shm"] = get_test_stage("1_g4dn_alinux2_shm", env.BUILD_TAG, "alinux2", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_ubuntu2004_shm"] = get_test_stage("1_g4dn_ubuntu2004_shm", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_rhel8_shm"] = get_test_stage("1_g4dn_rhel8_shm", env.BUILD_TAG, "rhel8", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_centos7_shm"] = get_test_stage("1_g4dn_centos7_shm", env.BUILD_TAG, "centos7", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm")
stages["1_g4dn_ubuntu2004_shm_disable-cma"] = get_test_stage("1_g4dn_ubuntu2004_shm_disable-cma", env.BUILD_TAG, "ubuntu2004", "g4dn.8xlarge", 1, "us-east-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider shm --enable-cma false")

// Single Node Windows Test
stages["EFA_Windows_Test"] = common.get_single_node_windows_test_stage("EFA_Windows_Test")
stages["EFA_Windows_Test"] = get_single_node_windows_test_stage("EFA_Windows_Test")

// Multi Node Tests - EFA
stages["2_hpc6a_alinux2_efa"] = common.get_test_stage("2_hpc6a_alinux2_efa", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["2_hpc6a_ubuntu2004_efa"] = common.get_test_stage("2_hpc6a_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["2_hpc6a_rhel8_efa"] = common.get_test_stage("2_hpc6a_rhel8_efa", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["2_hpc6a_alinux2_efa"] = get_test_stage("2_hpc6a_alinux2_efa", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["2_hpc6a_ubuntu2004_efa"] = get_test_stage("2_hpc6a_ubuntu2004_efa", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr)
stages["2_hpc6a_rhel8_efa"] = get_test_stage("2_hpc6a_rhel8_efa", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr)

// Multi Node Tests - TCP
stages["2_hpc6a_alinux2_tcp"] = common.get_test_stage("2_hpc6a_alinux2_tcp", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp")
stages["2_hpc6a_ubuntu2004_tcp"] = common.get_test_stage("2_hpc6a_ubuntu2004_tcp", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp")
stages["2_hpc6a_rhel8_tcp"] = common.get_test_stage("2_hpc6a_rhel8_tcp", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp")
stages["2_hpc6a_alinux2_tcp"] = get_test_stage("2_hpc6a_alinux2_tcp", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp")
stages["2_hpc6a_ubuntu2004_tcp"] = get_test_stage("2_hpc6a_ubuntu2004_tcp", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp")
stages["2_hpc6a_rhel8_tcp"] = get_test_stage("2_hpc6a_rhel8_tcp", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider tcp")

// Multi Node Tests - SOCKETS
stages["2_hpc6a_alinux2_sockets"] = common.get_test_stage("2_hpc6a_alinux2_sockets", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets")
stages["2_hpc6a_ubuntu2004_sockets"] = common.get_test_stage("2_hpc6a_ubuntu2004_sockets", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets")
stages["2_hpc6a_rhel8_sockets"] = common.get_test_stage("2_hpc6a_rhel8_sockets", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets")
stages["2_hpc6a_alinux2_sockets"] = get_test_stage("2_hpc6a_alinux2_sockets", env.BUILD_TAG, "alinux2", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets")
stages["2_hpc6a_ubuntu2004_sockets"] = get_test_stage("2_hpc6a_ubuntu2004_sockets", env.BUILD_TAG, "ubuntu2004", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets")
stages["2_hpc6a_rhel8_sockets"] = get_test_stage("2_hpc6a_rhel8_sockets", env.BUILD_TAG, "rhel8", "hpc6a.48xlarge", 2, "eu-north-1", "libfabric_pr_test.yaml", addl_args_pr + "--test-libfabric-provider sockets")

parallel stages
}
Expand All @@ -95,8 +230,7 @@ pipeline {
stage('check build_ok') {
steps {
script {
def common = load "contrib/aws/common.groovy"
if (common.build_ok) {
if (build_ok) {
currentBuild.result = "SUCCESS"
}
else {
Expand Down
Loading

0 comments on commit 656c380

Please sign in to comment.