diff --git a/contrib/intel/jenkins/Jenkinsfile b/contrib/intel/jenkins/Jenkinsfile deleted file mode 100644 index fa45f9d0413..00000000000 --- a/contrib/intel/jenkins/Jenkinsfile +++ /dev/null @@ -1,676 +0,0 @@ -import groovy.transform.Field - -properties([disableConcurrentBuilds(abortPrevious: true)]) -@Field def DO_RUN=true -@Field def TARGET="main" -@Field def SCRIPT_LOCATION="py_scripts/contrib/intel/jenkins" -@Field def RELEASE=false -@Field def BUILD_MODES=["reg", "dbg", "dl"] -@Field def PYTHON_VERSION="3.9" -@Field def TIMEOUT="3600" - -def run_python(version, command, output=null) { - if (output != null) - sh "python$version $command >> $output" - else - sh "python$version $command" -} - -def slurm_batch(partition, node_num, output, command) { - - try { - sh """timeout $TIMEOUT sbatch --partition=${partition} -N ${node_num} \ - --wait -o ${output} --open-mode=append --wrap=\'env; ${command}\' - """ - } catch (Exception e) { - sh "scancel \$(cat ${output} | grep SLURM_JOBID | cut -d \"=\" -f 2)" - sh "cat ${output}" - error("Build failed ${e}") - } - sh "cat ${output}" -} - -def run_fabtests(stage_name, partition, node_num, prov, util=null, - user_env=null, way=null) { - def command = "python3.9 ${RUN_LOCATION}/runtests.py" - def opts = "--prov=${prov} --test=fabtests" - def modes = BUILD_MODES - if (util) - opts = "${opts} --util=${util}" - - if (user_env) - opts = "${opts} --user_env ${user_env}" - - if (way) { - opts = "${opts} --way ${way}" - modes = ["reg"] - } - - for (mode in modes) { - echo "Running $stage_name fabtests $mode" - slurm_batch("${partition}", "${node_num}", - "${env.LOG_DIR}/${stage_name}_fabtests_${mode}", - "${command} ${opts} --ofi_build_mode=${mode}") - } - - echo "${stage_name} completed." -} - -def run_middleware(providers, stage_name, test, partition, node_num, mpi=null, - imb_grp=null) { - def base_cmd = "python3.9 ${RUN_LOCATION}/runtests.py --test=${test}" - def opts = "" - def prefix = "${env.LOG_DIR}/${stage_name}_" - def suffix = "_${test}_reg" - if (mpi) { - base_cmd = "${base_cmd} --mpi=${mpi}" - suffix = "_${mpi}${suffix}" - } - - if (imb_grp) - base_cmd = "${base_cmd} --imb_grp=${imb_grp}" - - if (env.WEEKLY.toBoolean()) - base_cmd = "${base_cmd} --weekly=${env.WEEKLY}" - - for (prov in providers) { - if (prov[1]) { - echo "Running ${prov[0]}-${prov[1]} ${stage_name}" - opts = "--prov=${prov[0]} --util=${prov[1]}" - output = "${prefix}${prov[0]}-${prov[1]}${suffix}" - } else { - echo "Running ${prov[0]} ${stage_name}" - opts = "--prov=${prov[0]}" - output = "${prefix}${prov[0]}${suffix}" - } - - slurm_batch("${partition}", "${node_num}", "${output}", - "${base_cmd} ${opts}") - } -} - -def gather_logs(cluster, key, dest, source) { - def address = "${env.USER}@${cluster}" - - try { - sh "scp -i ${key} ${address}:${source}/* ${dest}/" - } catch (Exception e) { - echo "Caught exception ${e} when transfering files from ${cluster}" - } -} - -def summarize(item, verbose=false, release=false, send_mail=false) { - def cmd = "${RUN_LOCATION}/summary.py --summary_item=all" - if (verbose) { - cmd = "${cmd} -v " - } - if (release) { - cmd = "${cmd} --release " - } - if (send_mail.toBoolean()) { - cmd = "${cmd} --send_mail " - } - - run_python(PYTHON_VERSION, cmd) -} - -def save_summary() { - sh """ - mkdir -p ${env.WORKSPACE}/internal - rm -rf ${env.WORKSPACE}/internal/* - git clone https://${env.PAT}@github.com/${env.INTERNAL} ${env.WORKSPACE}/internal - cd ${env.WORKSPACE}/internal - mkdir -p ${env.WORKSPACE}/internal/summaries - cp ${env.WORKSPACE}/summary_*.log ${env.WORKSPACE}/internal/summaries/ - git add ${env.WORKSPACE}/internal/summaries/ - git commit -am \"add ${env.JOB_NAME}'s summary\" - git pull -r origin master - git push origin master - """ -} - -def checkout_py_scripts() { - sh """ - if [[ ! -d ${env.WORKSPACE}/py_scripts ]]; then - mkdir ${env.WORKSPACE}/py_scripts - else - rm -rf ${env.WORKSPACE}/py_scripts && mkdir ${env.WORKSPACE}/py_scripts - fi - - git clone --branch ${TARGET} ${env.UPSTREAM} ${env.WORKSPACE}/py_scripts - """ -} - -def checkout_ci_resources() { - sh """ - if [[ ! -d ${env.WORKSPACE}/py_scripts ]]; then - mkdir ${env.WORKSPACE}/ci_resources - else - rm -rf ${env.WORKSPACE}/ci_resources && mkdir ${env.WORKSPACE}/ci_resources - fi - - git clone ${env.CI_RESOURCES} ${env.WORKSPACE}/ci_resources - - """ -} - -def checkout_external_resources() { - checkout_ci_resources() - checkout_py_scripts() -} - -def generate_diff(def branch_name, def output_loc) { - sh """ - git remote add mainRepo ${env.UPSTREAM} - git fetch mainRepo - git diff --name-only HEAD..mainRepo/${branch_name} > ${output_loc}/commit_id - git remote remove mainRepo - """ -} - -def generate_release_num(def branch_name, def output_loc) { - sh """ - git remote add mainRepo ${env.UPSTREAM} - git fetch mainRepo - git diff mainRepo/${branch_name}:Makefile.am Makefile.am > \ - ${output_loc}/Makefile.am.diff - git diff mainRepo/${branch_name}:configure.ac configure.ac > \ - ${output_loc}/configure.ac.diff - cat configure.ac | grep AC_INIT | cut -d ' ' -f 2 | \ - cut -d '[' -f 2 | cut -d ']' -f 1 > ${output_loc}/release_num.txt - git remote remove mainRepo - """ -} - -def build(item, mode=null, cluster=null, release=false, additional_args=null) { - def cmd = "${RUN_LOCATION}/build.py --build_item=${item}" - if (mode) { - cmd = "${cmd} --ofi_build_mode=${mode} " - } - - if (cluster) { - cmd = "${cmd} --build_cluster=${cluster} " - } - - if (release) { - cmd = "${cmd} --release " - } - - if (additional_args) { - cmd = "${cmd} ${additional_args} " - } - - run_python(PYTHON_VERSION, cmd) -} - -def check_target() { - echo "CHANGE_TARGET = ${env.CHANGE_TARGET}" - if (changeRequest()) { - TARGET = env.CHANGE_TARGET - } - - if (TARGET) { - return TARGET - } - - return "main" -} - -def release() { - def file = "${env.WORKSPACE}/commit_id" - if (!fileExists(file)) { - echo "CI Run has not rebased with ofiwg/libfabric. Please Rebase." - return 1 - } - - def changes = readFile file - def changeStrings = new ArrayList() - - for (line in changes.readLines()) { - changeStrings.add(line) - } - - if ((changeStrings.toArray().any { it =~ /(Makefile\.am)\b/ }) || - (changeStrings.toArray().any { it =~ /(configure\.ac)\b/ })) { - echo "This is probably a release" - return true - } - - return false -} - -def skip() { - def file = "${env.WORKSPACE}/commit_id" - if (!fileExists(file)) { - echo "CI Run has not rebased with ofiwg/libfabric. Please Rebase." - return 1 - } - - def changes = readFile file - def changeStrings = new ArrayList() - - for (line in changes.readLines()) { - changeStrings.add(line) - } - - echo "Changeset is: ${changeStrings.toArray()}" - if (changeStrings.toArray().every { it =~ /(?:fabtests\/pytests|man|prov\/efa|prov\/opx).*$/ }) { - echo "DONT RUN!" - return true - } - - if (changeStrings.isEmpty()) { - echo "DONT RUN!" - return true - } - - return false -} - -pipeline { - agent { - node { - label 'main' - customWorkspace "workspace/${JOB_NAME}/${env.BUILD_NUMBER}" - } - } - options { - timestamps() - timeout(activity: true, time: 6, unit: 'HOURS') - } - environment { - JOB_CADENCE = 'PR' - LOG_DIR = "${env.JOB_INSTALL_DIR}/${env.JOB_NAME}/${env.BUILD_NUMBER}/log_dir" - WITH_ENV="'PATH+EXTRA=/usr/sbin:/usr/bin:/sbin:/bin:/usr/local/bin:$PYTHONPATH'" - DELETE_LOCATION="${env.JOB_INSTALL_DIR}/${env.JOB_NAME}/${env.BUILD_NUMBER}" - RUN_LOCATION="${env.WORKSPACE}/${SCRIPT_LOCATION}/" - CUSTOM_WORKSPACE="${CB_HOME}/workspace/${JOB_NAME}/${env.BUILD_NUMBER}" - } - stages { - stage ('opt-out') { - steps { - script { - TARGET=check_target() - checkout_external_resources() - generate_diff("${TARGET}", "${env.WORKSPACE}") - generate_release_num("${TARGET}", "${env.WORKSPACE}") - - if (env.WEEKLY == null) { - weekly = false - } else { - weekly = env.WEEKLY.toBoolean() - } - if (weekly) { - TIMEOUT="21600" - } - skip = skip() - RELEASE = release() - if (skip && !weekly) { - DO_RUN=false - } - } - } - } - stage ('prepare build') { - when { equals expected: true, actual: DO_RUN } - steps { - script { - echo "Copying build dirs." - build("builddir") - echo "Copying log dirs." - build("logdir", null, null, RELEASE) - build("extract_mpich") - build("extract_impi_mpich") - } - } - } - stage ('parallel-builds') { - when { equals expected: true, actual: DO_RUN } - parallel { - stage ('build') { - steps { - script { - dir (CUSTOM_WORKSPACE) { - for (mode in BUILD_MODES) { - echo "Building Libfabric $mode" - build("libfabric", "$mode") - echo "Building Fabtests $mode" - build("fabtests", "$mode") - } - } - } - } - } - stage ('buildmpich-libfabric') { - steps { - script { - dir("${CUSTOM_WORKSPACE}/mpich"){ - checkout scm - echo "Building Libfabric reg" - slurm_batch("squirtle,totodile", "1", - "${env.LOG_DIR}/libfabric_mpich_log", - """python$PYTHON_VERSION ${RUN_LOCATION}/build.py \ - --build_item=libfabric_mpich """ - ) - slurm_batch("squirtle,totodile", "1", - "${env.LOG_DIR}/build_mpich_log", - """python$PYTHON_VERSION ${RUN_LOCATION}/build.py \ - --build_item=mpich """ - ) - } - } - } - } - stage ('build-daos') { - agent { - node { - label 'daos_head' - customWorkspace CUSTOM_WORKSPACE - } - } - steps { - script { - checkout_external_resources() - dir (CUSTOM_WORKSPACE) { - build("logdir") - build("libfabric", "reg", "daos") - build("fabtests", "reg") - } - } - } - } - stage ('build-gpu') { - agent { - node { - label 'ze' - customWorkspace CUSTOM_WORKSPACE - } - } - steps { - script { - checkout_external_resources() - dir (CUSTOM_WORKSPACE) { - build("logdir") - build("builddir") - build("libfabric", "reg", "gpu") - build("fabtests", "reg") - } - } - } - } - } - } - stage('parallel-tests') { - when { equals expected: true, actual: DO_RUN } - parallel { - stage('MPI_verbs-rxm_IMB') { - steps { - script { - dir (RUN_LOCATION) { - def providers = [["verbs", "rxm"]] - for (def mpi in ["impi"]) { - for (imb_grp = 1; imb_grp < 4; imb_grp++) { - run_middleware(providers, "MPI", "IMB", - "squirtle,totodile", "2", "${mpi}", - "${imb_grp}") - } - } - } - } - } - } - stage('MPI_verbs-rxm_OSU') { - steps { - script { - dir (RUN_LOCATION) { - def providers = [["verbs", "rxm"]] - for (def mpi in ["impi", "mpich"]) { - run_middleware(providers, "MPI", "osu", "squirtle,totodile", - "2", "${mpi}") - } - } - } - } - } - stage('MPI_tcp') { - steps { - script { - dir (RUN_LOCATION) { - def providers = [["tcp", null]] - for (imb_grp = 1; imb_grp < 4; imb_grp++) { - run_middleware(providers, "MPI", "IMB", - "bulbasaur", "2", "impi", "${imb_grp}") - } - for (def mpi in ["impi", "mpich"]) { - run_middleware(providers, "MPI", "osu", "bulbasaur", "2", - "${mpi}") - } - } - } - } - } - stage('tcp') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("tcp", "bulbasaur", "2", "tcp") - } - } - } - } - stage('verbs-rxm') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs", - "rxm") - run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs", - "rxm", "FI_MR_CACHE_MAX_COUNT=0") - run_fabtests("verbs-rxm", "squirtle,totodile", "2", "verbs", - "rxm", "FI_MR_CACHE_MONITOR=userfaultfd") - } - } - } - } - stage('verbs-rxd') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("verbs-rxd", "squirtle", "2", "verbs", - "rxd") - run_fabtests("verbs-rxd", "squirtle", "2", "verbs", - "rxd", "FI_MR_CACHE_MAX_COUNT=0") - run_fabtests("verbs-rxd", "squirtle", "2", "verbs", - "rxd", "FI_MR_CACHE_MONITOR=userfaultfd") - } - } - } - } - stage('udp') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("udp", "bulbasaur", "2", "udp") - } - } - } - } - stage('shm') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("shm", "bulbasaur", "1", "shm") - run_fabtests("shm", "bulbasaur", "1", "shm", null, - "FI_SHM_DISABLE_CMA=1") - } - } - } - } - stage('sockets') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("sockets", "bulbasaur", "2", "sockets") - } - } - } - } - stage('psm3') { - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("psm3", "squirtle", "2", "psm3", null, - "PSM3_IDENTIFY=1") - } - } - } - } - stage('SHMEM') { - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["verbs", null], ["tcp", null], - ["sockets", null]], "SHMEM", "shmem", - "squirtle,totodile", "2") - } - } - } - } - stage ('multinode_performance') { - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["tcp", null]], "multinode_performance", - "multinode", "bulbasaur", "2") - } - } - } - } - stage ('oneCCL-GPU-v3') { - agent { node { label 'ze' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_middleware([["verbs", "rxm"]], "oneCCL-GPU-v3", "onecclgpu", - "fabrics-ci", "2") - } - } - } - } - stage('daos_tcp') { - agent { node { label 'daos_tcp' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_python(PYTHON_VERSION, - """runtests.py --prov='tcp' --util='rxm' \ - --test=daos \ - --log_file=${env.LOG_DIR}/daos_tcp-rxm_reg""") - } - } - } - } - stage('daos_verbs') { - agent { node { label 'daos_verbs' } } - options { skipDefaultCheckout() } - steps { - script { - dir (RUN_LOCATION) { - run_python(PYTHON_VERSION, - """runtests.py --prov='verbs' --util='rxm' \ - --test=daos \ - --log_file=${env.LOG_DIR}/daos_verbs-rxm_reg""") - } - } - } - } - stage ('DMABUF-Tests') { - agent { node { label 'ze' } } - options { skipDefaultCheckout() } - steps { - script { - dir ("${env.WORKSPACE}/${SCRIPT_LOCATION}/") { - dmabuf_output = "${LOG_DIR}/DMABUF-Tests_verbs-rxm_dmabuf" - cmd = """ python3.9 runtests.py --test=dmabuf \ - --prov=verbs --util=rxm""" - slurm_batch("fabrics-ci", "1", "${dmabuf_output}_1_reg", - "${cmd}") - slurm_batch("fabrics-ci", "2", "${dmabuf_output}_2_reg", - "${cmd}") - } - } - } - } - stage('dsa') { - when { equals expected: true, actual: DO_RUN } - steps { - script { - dir (RUN_LOCATION) { - run_fabtests("shm_dsa", "pikachu", "1", "shm", null, - """FI_SHM_DISABLE_CMA=1 FI_SHM_USE_DSA_SAR=1 \ - FI_LOG_LEVEL=warn""") - } - } - } - } - } - } - stage ('Summary') { - when { equals expected: true, actual: DO_RUN } - steps { - script { - gather_logs("${env.DAOS_ADDR}", "${env.DAOS_KEY}", "${env.LOG_DIR}", - "${env.LOG_DIR}") - gather_logs("${env.ZE_ADDR}", "${env.ZE_KEY}", "${env.LOG_DIR}", - "${env.LOG_DIR}") - - summarize("all", verbose=false, release=RELEASE, - send_mail=env.WEEKLY.toBoolean()) - if (RELEASE) { - save_summary() - } - } - } - } - } - - post { - always { - script { - summarize("all") - } - } - success { - script { - summarize("all", verbose=true, release=false, - send_mail=env.WEEKLY.toBoolean()) - } - } - aborted { - node ('daos_head') { - dir ("${DELETE_LOCATION}/middlewares") { deleteDir() } - } - node ('ze') { - dir ("${DELETE_LOCATION}/middlewares") { deleteDir() } - } - dir ("${DELETE_LOCATION}/middlewares") { deleteDir() } - } - cleanup { - node ('daos_head') { - dir ("${DELETE_LOCATION}") { deleteDir() } - dir("${env.WORKSPACE}") { deleteDir() } - dir("${env.WORKSPACE}@tmp") { deleteDir() } - } - node ('ze') { - dir("${DELETE_LOCATION}") { deleteDir() } - dir("${env.WORKSPACE}") { deleteDir() } - dir("${env.WORKSPACE}@tmp") { deleteDir() } - } - dir("${DELETE_LOCATION}") { deleteDir() } - dir("${env.WORKSPACE}") { deleteDir() } - dir("${env.WORKSPACE}@tmp") { deleteDir() } - } - } -} \ No newline at end of file diff --git a/contrib/intel/jenkins/build.py b/contrib/intel/jenkins/build.py deleted file mode 100755 index e9e62caab05..00000000000 --- a/contrib/intel/jenkins/build.py +++ /dev/null @@ -1,212 +0,0 @@ -import os -import sys - -# add jenkins config location to PATH -sys.path.append(f"{os.environ['WORKSPACE']}/ci_resources/configs/{os.environ['CLUSTER']}") -import cloudbees_config - -import argparse -import subprocess -import shlex -import common -import re -import shutil - -def build_libfabric(libfab_install_path, mode, cluster=None, ucx=None): - - if (os.path.exists(libfab_install_path) != True): - os.makedirs(libfab_install_path) - - config_cmd = ['./configure', f'--prefix={libfab_install_path}'] - enable_prov_val = 'yes' - - if (mode == 'dbg'): - config_cmd.append('--enable-debug') - elif (mode == 'dl'): - enable_prov_val='dl' - - if (cluster == 'daos'): - prov_list = common.daos_prov_list - elif (cluster == 'gpu'): - prov_list = common.gpu_prov_list - else: - prov_list = common.default_prov_list - - for prov in prov_list: - if (ucx): - config_cmd.append('--enable-ucx=yes') - break - else: - config_cmd.append(f'--enable-{prov}={enable_prov_val}') - - for op in common.common_disable_list: - config_cmd.append(f'--enable-{op}=no') - - if (cluster == 'default' and build_item != 'libfabric_mpich' and not ucx): - for op in common.default_enable_list: - config_cmd.append(f'--enable-{op}') - - common.run_command(['./autogen.sh']) - common.run_command(shlex.split(" ".join(config_cmd))) - common.run_command(['make','clean']) - common.run_command(['make', '-j32']) - common.run_command(['make','install']) - - -def build_fabtests(libfab_install_path, mode): - - os.chdir(f'{workspace}/fabtests') - if (mode == 'dbg'): - config_cmd = ['./configure', '--enable-debug', - f'--prefix={libfab_install_path}', - f'--with-libfabric={libfab_install_path}'] - else: - config_cmd = ['./configure', f'--prefix={libfab_install_path}', - f'--with-libfabric={libfab_install_path}'] - - common.run_command(['./autogen.sh']) - common.run_command(config_cmd) - common.run_command(['make','clean']) - common.run_command(['make', '-j32']) - common.run_command(['make', 'install']) - -def extract_mpich(mpitype): - - dest = f'{install_path}/middlewares/{mpitype}_mpichtest' - if (mpitype == 'mpich'): - src_dir = 'mpich' - mpich_tar = cloudbees_config.mpich_tar - elif (mpitype == 'impi'): - src_dir = 'impi_mpichtest' - mpich_tar = cloudbees_config.impi_mpichtest_tar - else: - print(f"Invalid mpi type {mpitype}") - sys.exit(-1) - - cwd = os.getcwd() - if (os.path.exists(dest)): - shutil.rmtree(dest) - os.makedirs(f'{dest}/{mpitype}_mpichsuite') - os.chdir(f'{cloudbees_config.scm_dir}/{src_dir}/') - common.run_command(['tar', '-xvf', - f"{cloudbees_config.scm_dir}/{src_dir}/{mpich_tar}", - '-C', f'{dest}/{mpitype}_mpichsuite', - '--strip-components', '1']) - os.chdir(cwd) - -def build_mpich(libfab_installpath_mpich): - mpich_build_dir = f'{install_path}/middlewares/mpich_mpichtest' - mpich_path = f"{mpich_build_dir}/mpich_mpichsuite" - cwd = os.getcwd() - if (os.path.exists(f"{mpich_build_dir}/bin") !=True): - print("configure mpich") - os.chdir(mpich_path) - configure_cmd = f"./configure " - configure_cmd += f"--prefix={mpich_build_dir} " - configure_cmd += f"--with-libfabric={libfab_installpath_mpich} " - configure_cmd += "--disable-oshmem " - configure_cmd += "--disable-fortran " - configure_cmd += "--without-ch4-shmmods " - configure_cmd += "--with-device=ch4:ofi " - configure_cmd += "--without-ze " - print(configure_cmd) - common.run_command(['./autogen.sh']) - common.run_command(shlex.split(configure_cmd)) - common.run_command(['make','-j']) - common.run_command(['make','install']) - os.chdir(cwd) - -def copy_build_dir(install_path): - middlewares_path = f'{install_path}/middlewares' - if (os.path.exists(middlewares_path) != True): - os.makedirs(f'{install_path}/middlewares') - - shutil.copytree(f'{cloudbees_config.build_dir}/shmem', - f'{middlewares_path}/shmem') - shutil.copytree(f'{cloudbees_config.build_dir}/oneccl', - f'{middlewares_path}/oneccl') - - os.symlink(f'{cloudbees_config.build_dir}/mpich', - f'{middlewares_path}/mpich') - os.symlink(f'{cloudbees_config.build_dir}/impi', - f'{middlewares_path}/impi') - os.symlink(f'{cloudbees_config.build_dir}/ompi', - f'{middlewares_path}/ompi') - os.symlink(f'{cloudbees_config.build_dir}/oneccl_gpu', - f'{middlewares_path}/oneccl_gpu') - -def copy_file(file_name): - if (os.path.exists(f'{workspace}/{file_name}')): - shutil.copyfile(f'{workspace}/{file_name}', - f'{install_path}/log_dir/{file_name}') - -def log_dir(install_path, release=False): - if (os.path.exists(f'{install_path}/log_dir') != True): - os.makedirs(f'{install_path}/log_dir') - - if (release): - copy_file('Makefile.am.diff') - copy_file('configure.ac.diff') - copy_file('release_num.txt') - -if __name__ == "__main__": -#read Jenkins environment variables - # In Jenkins, JOB_NAME = 'ofi_libfabric/master' vs BRANCH_NAME = 'master' - # job name is better to use to distinguish between builds of different - # jobs but with same branch name. - jobname = os.environ['JOB_NAME'] - buildno = os.environ['BUILD_NUMBER'] - workspace = os.environ['WORKSPACE'] - - parser = argparse.ArgumentParser() - parser.add_argument('--build_item', help="build libfabric or fabtests", \ - choices=['libfabric', 'libfabric_mpich', 'fabtests', \ - 'builddir', 'logdir', 'extract_mpich', \ - 'extract_impi_mpich', 'mpich']) - parser.add_argument('--ofi_build_mode', help="select buildmode libfabric "\ - "build mode", choices=['reg', 'dbg', 'dl']) - parser.add_argument('--build_cluster', help="build libfabric on specified cluster", \ - choices=['daos', 'gpu'], default='default') - parser.add_argument('--release', help="This job is likely testing a "\ - "release and will be checked into a git tree.", - action='store_true') - parser.add_argument('--ucx', help="build with ucx", default=False, \ - action='store_true') - - args = parser.parse_args() - build_item = args.build_item - cluster = args.build_cluster - release = args.release - ucx = args.ucx - - if (args.ofi_build_mode): - ofi_build_mode = args.ofi_build_mode - else: - ofi_build_mode = 'reg' - - install_path = f'{cloudbees_config.install_dir}/{jobname}/{buildno}' - libfab_install_path = f'{cloudbees_config.install_dir}/{jobname}/{buildno}/{ofi_build_mode}' - - if (ucx): - libfab_install_path += '/ucx' - workspace += '/ucx' - - p = re.compile('mpi*') - - if (build_item == 'libfabric'): - build_libfabric(libfab_install_path, ofi_build_mode, cluster, ucx) - elif (build_item == 'libfabric_mpich'): - build_libfabric(f'{libfab_install_path}/libfabric_mpich', - ofi_build_mode, cluster) - elif (build_item == 'mpich'): - build_mpich(f'{libfab_install_path}/libfabric_mpich') - elif (build_item == 'fabtests'): - build_fabtests(libfab_install_path, ofi_build_mode) - elif (build_item == 'extract_mpich'): - extract_mpich('mpich') - elif (build_item == 'extract_impi_mpich'): - extract_mpich('impi') - elif (build_item == 'builddir'): - copy_build_dir(install_path) - elif (build_item == 'logdir'): - log_dir(install_path, release) diff --git a/contrib/intel/jenkins/common.py b/contrib/intel/jenkins/common.py deleted file mode 100755 index 88732504691..00000000000 --- a/contrib/intel/jenkins/common.py +++ /dev/null @@ -1,145 +0,0 @@ -import collections -import subprocess -import sys -import os -from subprocess import Popen, TimeoutExpired -from time import sleep - -def get_node_name(host, interface): - return '%s-%s' % (host, interface) - -def run_command(command): - print(" ".join(command)) - p = subprocess.Popen(command, stdout=subprocess.PIPE, text=True) - print(p.returncode) - while True: - out = p.stdout.read(1) - if (out == '' and p.poll() != None): - break - if (out != ''): - sys.stdout.write(out) - sys.stdout.flush() - - print(f"Return code is {p.returncode}") - if (p.returncode != 0): - print("exiting with " + str(p.poll())) - sys.exit(p.returncode) - -def run_logging_command(command, log_file): - print("filename: ".format(log_file)) - f = open(log_file, 'a') - print(" ".join(command)) - p = subprocess.Popen(command, stdout=subprocess.PIPE, text=True) - print(p.returncode) - f.write(" ".join(command) + '\n') - while True: - out = p.stdout.read(1) - f.write(out) - if (out == '' and p.poll() != None): - break - if (out != ''): - sys.stdout.write(out) - sys.stdout.flush() - - print(f"Return code is {p.returncode}") - if (p.returncode != 0): - print("exiting with " + str(p.poll())) - f.close() - sys.exit(p.returncode) - f.close() - -def read_file(file_name): - with open(file_name) as file_out: - output = file_out.read() - return output - -class ClientServerTest: - def __init__(self, server_cmd, client_cmd, server_log, client_log, - timeout=None): - self.server_cmd = server_cmd - self.client_cmd = client_cmd - self.server_log = server_log - self.client_log = client_log - self._timeout = timeout - - def run(self): - server_process = Popen( - f"{self.server_cmd} > {self.server_log} 2>&1", - shell=True, close_fds=True - ) - sleep(1) - client_process = Popen( - f"{self.client_cmd} > {self.client_log} 2>&1", - shell=True, close_fds=True - ) - - try: - server_process.wait(timeout=self._timeout) - except TimeoutExpired: - server_process.terminate() - - try: - client_process.wait(timeout=self._timeout) - except TimeoutExpired: - client_process.terminate() - - server_output = read_file(self.server_log) - client_output = read_file(self.client_log) - - print("") - print(f"server_command: {self.server_cmd}") - print('server_stdout:') - print(server_output) - print(f"client_command: {self.client_cmd}") - print('client_stdout:') - print(client_output) - - return (server_process.returncode, client_process.returncode) - -Prov = collections.namedtuple('Prov', 'core util') -prov_list = [ - Prov('psm3', None), - Prov('verbs', None), - Prov('verbs', 'rxd'), - Prov('verbs', 'rxm'), - Prov('sockets', None), - Prov('tcp', None), - Prov('udp', None), - Prov('udp', 'rxd'), - Prov('shm', None), - Prov('ucx', None) -] -default_prov_list = [ - 'verbs', - 'tcp', - 'sockets', - 'udp', - 'shm', - 'psm3' -] -daos_prov_list = [ - 'verbs', - 'tcp' -] -dsa_prov_list = [ - 'shm' -] -gpu_prov_list = [ - 'verbs', - 'shm' -] -common_disable_list = [ - 'usnic', - 'efa', - 'perf', - 'rstream', - 'hook_debug', - 'bgq', - 'mrail', - 'opx' -] -default_enable_list = [ - 'ze-dlopen' -] - -cloudbees_log_start_string = "Begin Cloudbees Test Output" diff --git a/contrib/intel/jenkins/run.py b/contrib/intel/jenkins/run.py deleted file mode 100755 index 88472c954da..00000000000 --- a/contrib/intel/jenkins/run.py +++ /dev/null @@ -1,228 +0,0 @@ -import tests -import subprocess -import sys -import argparse -import os -import common - -sys.path.append(f"{os.environ['WORKSPACE']}/ci_resources/configs/{os.environ['CLUSTER']}") -import cloudbees_config - -# read Jenkins environment variables -# In Jenkins, JOB_NAME = 'ofi_libfabric/master' vs BRANCH_NAME = 'master' -# job name is better to use to distinguish between builds of different -# jobs but with the same branch name. -fab = os.environ['FABRIC'] -if 'slurm' in fab: - fab = cloudbees_config.fabric_map[f"{os.environ['SLURM_JOB_PARTITION']}"] - -jbname = os.environ['JOB_NAME']#args.jobname -bno = os.environ['BUILD_NUMBER']#args.buildno - -def fi_info_test(core, hosts, mode, user_env, log_file, util): - - fi_info_test = tests.FiInfoTest(jobname=jbname,buildno=bno, - testname='fi_info', core_prov=core, - fabric=fab, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - print('-------------------------------------------------------------------') - print(f"Running fi_info test for {core}-{util}-{fab}") - fi_info_test.execute_cmd() - print('-------------------------------------------------------------------') - -def fabtests(core, hosts, mode, user_env, log_file, util, way): - - runfabtest = tests.Fabtest(jobname=jbname,buildno=bno, - testname='runfabtests', core_prov=core, - fabric=fab, hosts=hosts, ofi_build_mode=mode, - user_env=user_env, log_file=log_file, - util_prov=util, way=way) - - print('-------------------------------------------------------------------') - if (runfabtest.execute_condn): - print(f"Running Fabtests for {core}-{util}-{fab}") - runfabtest.execute_cmd() - else: - print(f"Skipping {core} {runfabtest.testname} as execute condition fails") - print('-------------------------------------------------------------------') - -def shmemtest(core, hosts, mode, user_env, log_file, util): - - runshmemtest = tests.ShmemTest(jobname=jbname,buildno=bno, - testname="shmem test", core_prov=core, - fabric=fab, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - - print('-------------------------------------------------------------------') - if (runshmemtest.execute_condn): -# skip unit because it is failing shmem_team_split_2d -# print(f"Running shmem unit test for {core}-{util}-{fab}") -# runshmemtest.execute_cmd("unit") - print(f"Running shmem PRK test for {core}-{util}-{fab}") - runshmemtest.execute_cmd("prk") - - print('--------------------------------------------------------------') - print(f"Running shmem ISx test for {core}-{util}-{fab}") - runshmemtest.execute_cmd("isx") - - print('---------------------------------------------------------------') - print(f"Running shmem uh test for {core}-{util}-{fab}") - runshmemtest.execute_cmd("uh") - else: - print(f"Skipping {core} {runshmemtest.testname} as execute condition fails") - print('-------------------------------------------------------------------') - -def multinodetest(core, hosts, mode, user_env, log_file, util): - - runmultinodetest = tests.MultinodeTests(jobname=jbname,buildno=bno, - testname="multinode performance test", - core_prov=core, fabric=fab, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - - print("-------------------------------------------------------------------") - if (runmultinodetest.execute_condn): - print("Running multinode performance test for {}-{}-{}" \ - .format(core, util, fab)) - runmultinodetest.execute_cmd() - - print("---------------------------------------------------------------") - else: - print("Skipping {} as execute condition fails" \ - .format(runmultinodetest.testname)) - print("-------------------------------------------------------------------") - -def intel_mpi_benchmark(core, hosts, mpi, mode, group, user_env, log_file, util): - - imb = tests.IMBtests(jobname=jbname, buildno=bno, - testname='IntelMPIbenchmark', core_prov=core, - fabric=fab, hosts=hosts, mpitype=mpi, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, test_group=group, util_prov=util) - - print('-------------------------------------------------------------------') - if (imb.execute_condn == True): - print(f"Running IMB-tests for {core}-{util}-{fab}-{mpi}") - imb.execute_cmd() - else: - print(f"Skipping {mpi.upper} {imb.testname} as execute condition fails") - print('-------------------------------------------------------------------') - -def mpich_test_suite(core, hosts, mpi, mode, user_env, log_file, util, weekly=None): - - mpich_tests = tests.MpichTestSuite(jobname=jbname,buildno=bno, - testname="MpichTestSuite",core_prov=core, - fabric=fab, mpitype=mpi, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util, - weekly=weekly) - - print('-------------------------------------------------------------------') - if (mpich_tests.execute_condn == True): - print(f"Running mpichtestsuite for {core}-{util}-{fab}-{mpi}") - mpich_tests.execute_cmd() - else: - print(f"Skipping {mpi.upper()} {mpich_tests.testname} as exec condn fails") - print('-------------------------------------------------------------------') - -def osu_benchmark(core, hosts, mpi, mode, user_env, log_file, util): - - osu_test = tests.OSUtests(jobname=jbname, buildno=bno, - testname='osu-benchmarks', core_prov=core, - fabric=fab, mpitype=mpi, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - - print('-------------------------------------------------------------------') - if (osu_test.execute_condn == True): - print(f"Running OSU-Test for {core}-{util}-{fab}-{mpi}") - osu_test.execute_cmd() - else: - print(f"Skipping {mpi.upper()} {osu_test.testname} as exec condn fails") - print('-------------------------------------------------------------------') - -def oneccltest(core, hosts, mode, user_env, log_file, util): - - runoneccltest = tests.OneCCLTests(jobname=jbname,buildno=bno, - testname="oneccl test", core_prov=core, - fabric=fab, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - - print('-------------------------------------------------------------------') - if (runoneccltest.execute_condn): - print(f"Running oneCCL cpu tests for {core}-{util}-{fab}") - runoneccltest.execute_cmd() - else: - print(f"Skipping {runoneccltest.testname} as execute condition fails") - print('-------------------------------------------------------------------') - -def oneccltestgpu(core, hosts, mode, user_env, log_file, util): - - runoneccltestgpu = tests.OneCCLTestsGPU(jobname=jbname,buildno=bno, - testname="oneccl GPU test", - core_prov=core, fabric=fab, - hosts=hosts, ofi_build_mode=mode, - user_env=user_env, log_file=log_file, - util_prov=util) - - print('-------------------------------------------------------------------') - if (runoneccltestgpu.execute_condn): - print(f"Running oneCCL GPU examples test for {core}-{util}-{fab}") - runoneccltestgpu.execute_cmd('examples') - - print('---------------------------------------------------------------') - print(f"Running oneCCL GPU functional test for {core}-{util}-{fab}") - runoneccltestgpu.execute_cmd('functional') - else: - print(f"Skipping {runoneccltestgpu.testname} as execute condition fails") - print('-------------------------------------------------------------------') - -def daos_cart_tests(core, hosts, mode, user_env, log_file, util): - - runcarttests = tests.DaosCartTest(jobname=jbname, buildno=bno, - testname="Daos Cart Test", core_prov=core, - fabric=fab, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - - print('-------------------------------------------------------------------') - if (runcarttests.execute_condn): - print(f"Running cart test for {core}-{util}-{fab}") - runcarttests.execute_cmd() - print('-------------------------------------------------------------------') - -def dmabuftests(core, hosts, mode, user_env, log_file, util): - - rundmabuftests = tests.DMABUFTest(jobname=jbname,buildno=bno, - testname="DMABUF Tests", core_prov=core, - fabric=fab, hosts=hosts, - ofi_build_mode=mode, user_env=user_env, - log_file=log_file, util_prov=util) - - print('-------------------------------------------------------------------') - if (rundmabuftests.execute_condn): - print(f"Running dmabuf H->H tests for {core}-{util}-{fab}") - rundmabuftests.execute_cmd('H2H') - - print('---------------------------------------------------------------') - print(f"Running dmabuf H->D tests for {core}-{util}-{fab}") - rundmabuftests.execute_cmd('H2D') - - print('---------------------------------------------------------------') - print(f"Running dmabuf D->H tests for {core}-{util}-{fab}") - rundmabuftests.execute_cmd('D2H') - - print('---------------------------------------------------------------') - print(f"Running dmabuf D->D tests for {core}-{util}-{fab}") - rundmabuftests.execute_cmd('D2D') - - print('---------------------------------------------------------------') - else: - print(f"Skipping {rundmabuftests.testname} as execute condition fails") - print('-------------------------------------------------------------------') - -if __name__ == "__main__": - pass diff --git a/contrib/intel/jenkins/runtests.py b/contrib/intel/jenkins/runtests.py deleted file mode 100755 index 51f5a0c2ef6..00000000000 --- a/contrib/intel/jenkins/runtests.py +++ /dev/null @@ -1,151 +0,0 @@ -import argparse -import os -import sys -sys.path.append(f"{os.environ['WORKSPACE']}/ci_resources/configs/{os.environ['CLUSTER']}") -import cloudbees_config -import subprocess -import run -import common -import shlex - -class ParseDict(argparse.Action): - def __call__(self, parser, namespace, values, option_string=None): - setattr(namespace, self.dest, dict()) - for value in values: - key, value = value.split('=') - getattr(namespace, self.dest)[key] = value - -parser = argparse.ArgumentParser() -parser.add_argument('--prov', help="core provider", choices=['verbs', \ - 'tcp', 'udp', 'sockets', 'shm', 'psm3', 'ucx']) -parser.add_argument('--util', help="utility provider", choices=['rxd', 'rxm']) -parser.add_argument('--ofi_build_mode', help="specify the build configuration",\ - choices = ['reg', 'dbg', 'dl'], default='reg') -parser.add_argument('--test', help="specify test to execute", \ - choices = ['all', 'shmem', 'IMB', 'osu', 'oneccl', \ - 'mpichtestsuite', 'fabtests', 'onecclgpu', \ - 'fi_info', 'daos', 'multinode', 'dmabuf']) - -parser.add_argument('--imb_grp', help="IMB test group 1:[MPI1, P2P], \ - 2:[EXT, IO], 3:[NBC, RMA, MT]", choices=['1', '2', '3']) -parser.add_argument('--way', help="direction to run with device option", - choices=['h2d', 'd2d', 'xd2d'], default=None) -parser.add_argument('--user_env', help="Run with additional environment " \ - "variables", nargs='*', action=ParseDict, default={}) -parser.add_argument('--mpi', help="Select mpi to use for middlewares", - choices=['impi', 'mpich', 'ompi'], default='impi') -parser.add_argument('--log_file', help="Full path to log file", - default=os.environ['DEFAULT_LOG_LOCATION'], type=str) -parser.add_argument('--weekly', help="run weekly", default=False, type=bool) - -args = parser.parse_args() -args_core = args.prov - -args_util = args.util -user_env = args.user_env -log_file = args.log_file -weekly = args.weekly - -if (args.ofi_build_mode): - ofi_build_mode = args.ofi_build_mode -else: - ofi_build_mode='reg' - -if (args.test): - run_test = args.test -else: - run_test = 'all' - -if (args.imb_grp): - imb_group = args.imb_grp -else: - imb_group = '1' - -mpi = args.mpi -way = args.way - -hosts = [] -if 'slurm' in os.environ['FABRIC']: - slurm_nodes = os.environ['SLURM_JOB_NODELIST'] # example cb[1-4,11] - common.run_command(shlex.split(f"sinfo --Format=Features -n {slurm_nodes}")) - if int(os.environ['SLURM_NNODES']) == 1: - hosts.append(slurm_nodes) - else: - prefix = slurm_nodes[0:slurm_nodes.find('[')] - nodes = slurm_nodes[slurm_nodes.find('[') + 1 : - slurm_nodes.find(']')].split(',') # ['1-4', '11'] - for item in nodes: # ['1-4', '11'] -> ['cb1', 'cb2', 'cb3', 'cb4', 'cb11'] - if '-' in item: - rng = item.split('-') - node_list = list(range(int(rng[0]), int(rng[1]) + 1)) - for node in node_list: - hosts.append(f'{prefix}{node}') - else: - hosts.append(f'{prefix}{item}') -else: - node = (os.environ['NODE_NAME']).split('_')[0] - hosts = [node] - for host in cloudbees_config.node_map[node]: - hosts.append(host) - print(f"hosts = {hosts}") - -print(common.cloudbees_log_start_string) - -#this script is executed from /tmp -#this is done since some mpi tests -#look for a valid location before running -# the test on the secondary host(client) -# but jenkins only creates a valid path on -# the primary host (server/test node) - -os.chdir('/tmp/') - -if(args_core): - if (run_test == 'all' or run_test == 'fi_info'): - run.fi_info_test(args_core, hosts, ofi_build_mode, - user_env, log_file, util=args.util) - - if (run_test == 'all' or run_test == 'fabtests'): - run.fabtests(args_core, hosts, ofi_build_mode, user_env, log_file, - args_util, way) - - if (run_test == 'all' or run_test == 'shmem'): - run.shmemtest(args_core, hosts, ofi_build_mode, user_env, log_file, - args_util) - - if (run_test == 'all' or run_test == 'oneccl'): - run.oneccltest(args_core, hosts, ofi_build_mode, user_env, log_file, - args_util) - - if (run_test == 'all' or run_test == 'onecclgpu'): - run.oneccltestgpu(args_core, hosts, ofi_build_mode, - user_env, log_file, args_util) - - if (run_test == 'all' or run_test == 'daos'): - run.daos_cart_tests(args_core, hosts, ofi_build_mode, - user_env, log_file, args_util) - - if (run_test == 'all' or run_test == 'multinode'): - run.multinodetest(args_core, hosts, ofi_build_mode, - user_env, log_file, args_util) - - if (run_test == 'all' or run_test == 'mpichtestsuite'): - run.mpich_test_suite(args_core, hosts, mpi, - ofi_build_mode, user_env, log_file, - args_util, weekly) - - if (run_test == 'all' or run_test == 'IMB'): - run.intel_mpi_benchmark(args_core, hosts, mpi, - ofi_build_mode, imb_group, - user_env, log_file, args_util) - - if (run_test == 'all' or run_test == 'osu'): - run.osu_benchmark(args_core, hosts, mpi, - ofi_build_mode, user_env, log_file, - args_util) - - if (run_test == 'all' or run_test == 'dmabuf'): - run.dmabuftests(args_core, hosts, ofi_build_mode, - user_env, log_file, args_util) -else: - print("Error : Specify a core provider to run tests") diff --git a/contrib/intel/jenkins/summary.py b/contrib/intel/jenkins/summary.py deleted file mode 100755 index 43199fc2a51..00000000000 --- a/contrib/intel/jenkins/summary.py +++ /dev/null @@ -1,992 +0,0 @@ -from abc import ABC, abstractmethod -import shutil -from datetime import datetime -from typing import Tuple -import os -from pickle import FALSE -import sys -import smtplib -from email.mime.multipart import MIMEMultipart -from email.mime.text import MIMEText -from email.mime.base import MIMEBase -from email import encoders - -# add jenkins config location to PATH -sys.path.append(f"{os.environ['WORKSPACE']}/ci_resources/configs/{os.environ['CLUSTER']}") - -import cloudbees_config -import argparse -import common - -verbose = False - -class SendEmail: - def __init__(self, sender=None, receivers=None, attachment=None): - self.sender = sender if sender is not None else os.environ['SENDER'] - self.receivers = (receivers if receivers is not None else \ - f"{os.environ['RECEIVER']}").split(',') - self.attachment = attachment - self.work_week = datetime.today().isocalendar()[1] - self.msg = MIMEMultipart() - - def __add_attachments(self): - print(f"Attachment is {self.attachment}") - if self.attachment is None: - return - - attachment = MIMEBase('application', 'octet-stream') - attachment.set_payload(open(self.attachment, 'rb').read()) - encoders.encode_base64(attachment) - name = f"Jenkins_Summary_ww{self.work_week}" - if (verbose): - name = f"{name}_all" - attachment.add_header('Content-Disposition', - f"attachment; filename={name}") - self.msg.attach(attachment) - - def __write_msg(self): - self.msg['Subject'] = f"Cloudbees Summary {os.environ['JOB_NAME']}" - self.msg['From'] = self.sender - self.msg['To'] = ", ".join(self.receivers) - self.msg.attach(MIMEText(f"WW{self.work_week} Summary for Libfabric "\ - "From Cloudbees")) - - def send_mail(self): - self.__write_msg() - self.__add_attachments() - server = smtplib.SMTP(os.environ['SMTP_SERVER'], - os.environ['SMTP_PORT']) - server.sendmail(self.sender, self.receivers, self.msg.as_string()) - server.quit() - -class Release: - def __init__(self, log_dir, output_file, logger, release_num): - self.log_dir = log_dir - self.output_file = output_file - self.logger = logger - self.release_num = release_num - - def __log_entire_file(self, file_name): - with open(file_name) as f: - for line in f: - self.logger.log(line, end_delimiter = '') - - def __append_release_changes(self, file_name): - if os.path.exists(file_name): - self.__log_entire_file(file_name) - - def add_release_changes(self): - self.logger.log(F"Release number: {self.release_num}") - self.__append_release_changes(f'{self.log_dir}/Makefile.am.diff') - self.__append_release_changes(f'{self.log_dir}/configure.ac.diff') - -class Logger: - def __init__(self, output_file, release): - self.output_file = output_file - self.release = release - self.padding = '\t' - - def log(self, line, end_delimiter='\n', lpad=0, ljust=0): - print(f'{self.padding * lpad}{line}'.ljust(ljust), end = end_delimiter) - self.output_file.write(f'{self.padding * lpad}{line}{end_delimiter}') - -class Summarizer(ABC): - @classmethod - def __subclasshook__(cls, subclass): - return ( - hasattr(subclass, "print_results") - and callable(subclass.print_results) - and hasattr(subclass, "check_features") - and callable(subclass.check_features) - and hasattr(subclass, "check_node") - and callable(subclass.check_node) - and hasattr(subclass, "check_name") - and callable(subclass.check_name) - and hasattr(subclass, "check_pass") - and callable(subclass.check_pass) - and hasattr(subclass, "check_fail") - and callable(subclass.check_fail) - and hasattr(subclass, "check_exclude") - and callable(subclass.check_exclude) - and hasattr(subclass, "fast_forward") - and callable(subclass.fast_forward) - and hasattr(subclass, "read_file") - and callable(subclass.read_file) - and hasattr(subclass, "run") - and callable(subclass.run) - or NotImplemented - ) - - @abstractmethod - def __init__(self, logger, log_dir, prov, file_name, stage_name): - self.logger = logger - self.log_dir = log_dir - self.prov = prov - self.file_name = file_name - self.stage_name = stage_name - self.file_path = os.path.join(self.log_dir, self.file_name) - self.exists = os.path.exists(self.file_path) - self.log = None - self.passes = 0 - self.passed_tests = [] - self.fails = 0 - self.failed_tests = [] - self.excludes = 0 - self.excluded_tests = [] - self.error = 0 - self.errored_tests = [] - self.test_name ='no_test' - self.name = 'no_name' - self.features = "no_features_found" - self.node = "no_node_found" - - def print_results(self): - total = self.passes + self.fails - # log was empty or not valid - if not total: - return - - percent = self.passes/total * 100 - if (verbose): - self.logger.log( - f"<>{self.stage_name} : ", lpad=1, ljust=50, end_delimiter = '' - ) - else: - self.logger.log( - f"{self.stage_name} : ", - lpad=1, ljust=50, end_delimiter = '' - ) - self.logger.log( - f"{self.node} : ", - lpad=1, ljust=20, end_delimiter = '' - ) - self.logger.log( - f"[{self.features}] : ", - lpad=1, ljust=30, end_delimiter = '' - ) - self.logger.log(f"{self.passes}:{total} ", ljust=10, end_delimiter = '') - self.logger.log(f": {percent:.2f}% : ", ljust=12, end_delimiter = '') - self.logger.log("Pass", end_delimiter = '') - if (self.excludes > 0): - self.logger.log(f" : {self.excludes:3.0f} : Excluded/Notrun") - else: - self.logger.log("") - - if (verbose and self.passes): - self.logger.log(f"Passed tests: {self.passes}", lpad=2) - for test in self.passed_tests: - self.logger.log(f'{test}', lpad=3) - if self.fails: - self.logger.log(f"Failed tests: {self.fails}", lpad=2) - for test in self.failed_tests: - self.logger.log(f'{test}', lpad=3) - if (verbose): - if self.excludes: - self.logger.log( - f"Excluded/Notrun tests: {self.excludes} ", lpad=2 - ) - for test in self.excluded_tests: - self.logger.log(f'{test}', lpad=3) - - if self.error: - self.logger.log( - "Errored, Interrupt, or Canceled Tests: "\ - f"{self.excludes} ", lpad=2 - ) - for test in self.errored_tests: - self.logger.log(f'{test}', lpad=3) - - def check_features(self, previous, line): - if ('avail_features') in previous: - self.features = line.strip() - - def check_node(self, line): - if ('slurm_nodelist' in line): - self.node = line.strip().split('=')[1] - - def check_name(self, line): - return - - def check_pass(self, line): - return - - def check_fail(self, line): - if "exiting with" in line: - self.fails += 1 - - def check_exclude(self, line): - return - - def check_line(self, line): - self.check_name(line) - self.check_pass(line) - self.check_fail(line) - self.check_exclude(line) - - def fast_forward(self, log_file): - previous = "" - line = log_file.readline().lower() - while line != "": - self.check_node(line) - self.check_features(previous, line) - if common.cloudbees_log_start_string.lower() in line: - break - - previous = line - line = log_file.readline().lower() - - def read_file(self): - with open(self.file_path, 'r') as log_file: - self.fast_forward(log_file) - for line in log_file: - self.check_line(line.lower()) - - def summarize(self): - if not self.exists: - return 0 - - self.read_file() - self.print_results() - return int(self.fails) - -class FiInfoSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - - def check_fail(self, line): - if "exiting with" in line: - self.fails += 1 - self.failed_tests.append(f"fi_info {self.prov}") - - def read_file(self): - super().read_file() - - if not self.fails: - self.passes += 1 - self.passed_tests.append(f"fi_info {self.prov}") - -class FabtestsSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - self.trace = False - - def check_name(self, line): - # don't double count ubertest output and don't count fi_ubertest's - # invocation - if 'ubertest' in line and 'client_cmd:' in line: - self.test_name = 'no_test' - if 'name:' not in line: # skip past client output in ubertest - return - - test_name = line.split("name:") - if len(test_name) > 1: - self.test_name = test_name[-1].lower().strip() - - def get_result_line(self, line) -> Tuple[str,str]: - result = line.split("result:") - if len(result) > 1: - return (result[-1].lower().strip(), line.split()) - return None, None - - def check_pass(self, line): - result, result_line = self.get_result_line(line) - if result == 'pass' or result == 'success' or result == 'passed': - self.passes += 1 - if 'ubertest' in self.test_name: - idx = (result_line.index('result:') - 1) - try: - int((result_line[idx].split(',')[0])) - except: - return - - ubertest_number = int((result_line[idx].split(',')[0])) - self.passed_tests.append(f"{self.test_name}: "\ - f"{ubertest_number}") - else: - self.passed_tests.append(self.test_name) - - def check_fail(self, line): - result, result_line = self.get_result_line(line) - if result == 'fail': - self.fails += 1 - if 'ubertest' in self.test_name: - idx = (result_line.index('result:') - 1) - try: - int((result_line[idx].split(',')[0])) - except: - return - ubertest_number = int((result_line[idx].split(',')[0])) - self.failed_tests.append(f"{self.test_name}: " \ - f"{ubertest_number}") - else: - self.failed_tests.append(self.test_name) - - if "exiting with" in line: - self.fails += 1 - self.failed_tests.append(self.test_name) - - def check_exclude(self, line): - result, _ = self.get_result_line(line) - if result == 'excluded' or result == 'notrun': - self.excludes += 1 - self.excluded_tests.append(self.test_name) - - def check_trace(self, line): - if not self.trace: - cmd_count = 0 - faults_count = 0 - if ("user to sar buffer" in line): - tokens = line.split(' ') - for i in range(0, len(tokens)): - if 'cmd' in tokens[i]: - cmd_count += int(tokens[i + 1]) - if 'faults' in tokens[i]: - faults_count += int(tokens[i + 1]) - - if (cmd_count > 0 or faults_count > 0): - self.trace = True - - def check_line(self, line): - self.check_name(line) - if (self.test_name != 'no_test'): - self.check_pass(line) - self.check_fail(line) - self.check_exclude(line) - if ('dsa' in self.file_name): - self.check_trace(line) - - def summarize(self): - if not self.exists: - return 0 - - self.read_file() - self.print_results() - if ('dsa' in self.file_name and not self.trace): - exit("Expected: DSA to run. Actual: DSA Not Run") - - return int(self.fails) - -class MultinodePerformanceSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - - def check_name(self, line): - #name lines look like "starting ... " - if 'starting' in line and '...' in line: - self.test_name = line.split()[1].split('.')[0] - - def check_pass(self, line): - if 'pass' in line: - self.passes += 1 - self.passed_tests.append(self.test_name) - - def check_fail(self, line): - if 'fail' in line: - self.fails += 1 - self.failed_tests.append(self.test_name) - - if "exiting with" in line: - self.fails += 1 - self.failed_tests.append(self.test_name) - -class OnecclSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - self.file_path = os.path.join(self.log_dir, self.file_name) - self.exists = os.path.exists(self.file_path) - self.name = 'no_test' - - def check_name(self, line): - #lines look like path/run_oneccl.sh ..... -test examples ..... test_name - if " -test" in line: - tokens = line.split() - self.name = f"{tokens[tokens.index('-test') + 1]} " \ - f"{tokens[len(tokens) - 1]}" - - def check_pass(self, line): - if 'passed' in line or "all done" in line: - self.passes += 1 - self.passed_tests.append(self.name) - - def check_fail(self, line): - if 'failed' in line or "exiting with" in line: - self.fails += 1 - self.failed_tests.append(self.name) - -class ShmemSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - self.shmem_type = { - 'uh' : { 'func' : self.check_uh, - 'keyphrase' : 'summary', - 'passes' : 0, - 'fails' : 0 - }, - 'isx' : { 'func' : self.check_isx, - 'keyphrase' : 'scaling', - 'passes' : 0, - 'fails' : 0 - }, - 'prk' : { 'func' : self.check_prk, - 'keyphrase' : 'solution', - 'passes' : 0, - 'fails' : 0 - } - } - self.test_type = 'prk' - self.keyphrase = self.shmem_type[self.test_type]['keyphrase'] - self.name = 'no_test' - - def check_uh(self, line, log_file): - # (test_002) Running test_shmem_atomics.x: Test all atomics... OK - # (test_003) Running test_shmem_barrier.x: Tests barrier ... Failed - if "running test_" in line: - tokens = line.split() - for token in tokens: - if 'test_' in token: - self.name = token - if tokens[len(tokens) - 1] == 'ok': - self.shmem_type[self.test_type]['passes'] += 1 - self.passed_tests.append(self.name) - else: - self.shmem_type[self.test_type]['fails'] += 1 - self.failed_tests.append(self.name) - # Summary - # x/z Passed. - # y/z Failed. - if self.keyphrase in line: #double check - passed = log_file.readline().lower() - failed = log_file.readline().lower() - token = int(passed.split()[1].split('/')[0]) - if self.shmem_type[self.test_type]['passes'] != token: - self.logger.log( - f"passes {self.shmem_type[self.test_type]['passes']} do " \ - f"not match log reported passes {token}" - ) - token = int(failed.split()[1].split('/')[0]) - if self.shmem_type[self.test_type]['fails'] != int(token): - self.logger.log( - f"fails {self.shmem_type[self.test_type]['fails']} does "\ - f"not match log fails {token}" - ) - - def check_prk(self, line, log_file=None): - if self.keyphrase in line: - self.shmem_type[self.test_type]['passes'] += 1 - if 'error:' in line or "exiting with" in line: - self.shmem_type[self.test_type]['fails'] += 1 - p = self.shmem_type[self.test_type]['passes'] - f = self.shmem_type[self.test_type]['fails'] - self.failed_tests.append(f"{self.prov} {p + f}") - if 'test(s)' in line: - token = line.split()[0] - if self.fails != int(token): - self.logger.log( - f"fails {self.fails} does not match log reported fails " \ - f"{token}" - ) - - def check_isx(self, line, log_file=None): - if self.keyphrase in line: - self.shmem_type[self.test_type]['passes'] += 1 - if ('failed' in line and 'test(s)' not in line) or \ - "exiting with" in line: - self.shmem_type[self.test_type]['fails'] += 1 - p = self.shmem_type[self.test_type]['passes'] - f = self.shmem_type[self.test_type]['fails'] - self.failed_tests.append(f"{self.prov} {p + f}") - if 'test(s)' in line: - token = line.split()[0] - if int(token) != self.shmem_type[self.test_type]['fails']: - self.logger.log( - f"fails {self.shmem_type[self.test_type]['fails']} does " \ - f"not match log reported fails {int(token)}" - ) - - def check_fails(self, line): - if "exiting with" in line: - self.shmem_type[self.test_type]['fails'] += 1 - p = self.shmem_type[self.test_type]['passes'] - f = self.shmem_type[self.test_type]['fails'] - self.failed_tests.append(f"{self.prov} {p + f}") - - def check_test_type(self, line): - if "running shmem" in line: - self.test_type = line.split(' ')[2].lower() - self.keyphrase = self.shmem_type[self.test_type]['keyphrase'] - - def check_line(self, line, log_file): - self.check_test_type(line) - if self.test_type is not None: - self.shmem_type[self.test_type]['func'](line, log_file) - self.check_fails(line) - - def read_file(self): - with open(self.file_path, 'r') as log_file: - super().fast_forward(log_file) - for line in log_file: - self.check_line(line.lower(), log_file) - - for key in self.shmem_type.keys(): - self.passes += self.shmem_type[key]['passes'] - self.fails += self.shmem_type[key]['fails'] - -class MpichTestSuiteSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, mpi, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - - self.mpi = mpi - self.run = 'mpiexec' - - def read_file(self): - with open(self.file_path,'r') as log_file: - super().fast_forward(log_file) - for line in log_file: - super().check_line(line.lower().strip()) - - def check_exclude(self, line): - if line.startswith('excluding:'): - test = line.split(':')[-1] - self.excludes += 1 - self.excluded_tests.append(test) - - def check_name(self, line): - if (line.startswith('ok') or - line.startswith('not ok')): - self.name = line.split('-')[1].split('#')[0].strip() - - def check_pass(self, line): - if (line.startswith('ok') and not - line.split('#')[1].strip().startswith('skip')): - self.passes += 1 - self.passed_tests.append(self.name) - - def check_fail(self, line): - if (line.startswith('not ok') and not - line.split('#')[1].strip().startswith('skip')): - self.fails += 1 - self.failed_tests.append(self.name) - - -class ImbSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, mpi, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - - self.mpi = mpi - if self.mpi == 'impi': - self.run = 'mpiexec' - else: - self.run = 'mpirun' - self.test_type = '' - - def check_type(self, line): - if 'part' in line: - self.test_type = line.split()[len(line.split()) - 2] - - def check_name(self, line): - if "benchmarking" in line: - self.name = line.split()[len(line.split()) - 1] - - def check_pass(self, line): - if "benchmarking" in line: - self.passes += 1 - self.passed_tests.append(self.name) - - def check_fail(self, line): - if "exiting with" in line: - self.fails += 1 - self.failed_tests.append(f"{self.test_type} {self.name}") - self.passes -= 1 - - def check_line(self, line): - self.check_type(line) - self.check_name(line) - self.check_pass(line) - self.check_fail(line) - super().check_exclude(line) - -class OsuSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, mpi, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - self.mpi = mpi - if self.mpi == 'impi': - self.run = 'mpiexec' - else: - self.run = 'mpirun' - - self.type = '' - self.tokens = [] - - def get_tokens(self, line): - if "# osu" in line: - self.tokens = line.split() - else: - self.tokens = [] - - def check_name(self, line): - if 'osu' in self.tokens: - self.name = " ".join(self.tokens[self.tokens.index('osu') + \ - 1:self.tokens.index('test')]) - - def check_type(self): - if self.tokens: - self.test_type = self.tokens[1] - - def check_pass(self, line): - if 'osu' in self.tokens: - # Assume pass - self.passes += 1 - self.passed_tests.append(self.name) - - def check_fail(self, line): - if "exiting with" in line: - self.fails += 1 - self.failed_tests.append(f"{self.test_type} {self.name}") - # Remove assumed pass - self.passes -= 1 - - def check_line(self, line): - self.get_tokens(line) - self.check_name(line) - self.check_type() - self.check_pass(line) - self.check_fail(line) - super().check_exclude(line) - -class DaosSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - - if (self.exists): - if ('verbs' in file_name): - self.node = cloudbees_config.daos_prov_node_map['verbs'] - if ('tcp' in file_name): - self.node = cloudbees_config.daos_prov_node_map['tcp'] - - self.features = cloudbees_config.daos_node_features - - def check_name(self, line): - if "reading ." in line: - self.test_name = line.split('/')[len(line.split('/')) - 1] \ - .rstrip('.yaml\n') - - def check_pass(self, line): - res_string = line.lstrip("results :").rstrip() - res_list = res_string.split(' | ') - for elem in res_list: - if 'pass' in elem: - self.passes += [int(s) for s in elem.split() if s.isdigit()][0] - display_testname = self.test_name.ljust(20) - self.passed_tests.append(f"{display_testname} : {res_string}") - - def check_fail(self, line): - res_list = line.lstrip("results :").rstrip().split('|') - for elem in res_list: - total = [int(s) for s in elem.split() if s.isdigit()][0] - if total != 0: - if 'fail' in elem: - self.fails += total - self.failed_tests.append(f'{self.test_name}') - if 'error' in elem: - self.error += total - self.errored_tests.append(f'error: {self.test_name}') - if 'interrupt' in elem: - self.error += total - self.errored_tests.append(f'interrupt: {self.test_name}') - if 'cancel' in elem: - self.error += total - self.errored_tests.append(f'cancel: {self.test_name}') - - def check_exclude(self, line): - res_list = line.lstrip("results :").rstrip().split('|') - for elem in res_list: - total = [int(s) for s in elem.split() if s.isdigit()][0] - if total != 0: - if 'skip' in elem: - self.excludes += total - self.excluded_tests.append(f'skip: {self.test_name}') - if 'warn' in elem: - self.excludes += total - self.excluded_tests.append(f'warn: {self.test_name}') - - def check_line(self, line): - self.check_name(line) - if "results :" in line: - self.check_pass(line) - self.check_fail(line) - self.check_exclude(line) - -class DmabufSummarizer(Summarizer): - def __init__(self, logger, log_dir, prov, file_name, stage_name): - super().__init__(logger, log_dir, prov, file_name, stage_name) - - self.test_type = '' - - def check_type(self, line): - if "Running" in line: - self.test_type = line.split()[2] - - def check_num_node(self, line): - if "SLURM_NNODES" in line: - self.num_nodes = line.split("=")[-1].strip() - self.num_nodes = ' '.join([self.num_nodes, 'node']) - - def check_name(self, line): - if "client_command" in line: - name_list = line.split()[-2:] - name_list.insert(0, str(self.num_nodes)) - name_list.insert(1, str(self.test_type)) - self.test_name = name_list - - def check_pass(self, line): - if "TEST COMPLETED" in line: - self.passes += 1 - self.passed_tests.append(self.test_name) - - def check_fail(self, line): - if "TEST FAILED" in line: - self.fails += 1 - self.failed_tests.append(self.test_name) - - def fast_forward(self, log_file): - previous = "" - line = log_file.readline() - while line != "": - self.check_num_node(line) - self.check_node(line.lower()) - self.check_features(previous.lower(), line.lower()) - if common.cloudbees_log_start_string.lower() in line.lower(): - break - - previous = line - line = log_file.readline() - - def read_file(self): - with open(self.file_path, 'r') as log_file: - self.fast_forward(log_file) - for line in log_file: - self.check_type(line) - self.check_line(line) - -def get_release_num(log_dir): - file_name = f'{log_dir}/release_num.txt' - if os.path.exists(file_name): - with open(file_name) as f: - num = f.readline() - - return num.strip() - - raise Exception("No release num") - -def summarize_items(summary_item, logger, log_dir, mode): - err = 0 - mpi_list = ['impi', 'mpich', 'ompi'] - logger.log(f"Summarizing {mode} build mode:") - if summary_item == 'fabtests' or summary_item == 'all': - for prov,util in common.prov_list: - if util: - prov = f'{prov}-{util}' - ret = FabtestsSummarizer( - logger, log_dir, prov, - f'{prov}_fabtests_{mode}', - f"{prov} fabtests {mode}" - ).summarize() - err += ret if ret else 0 - ret = FiInfoSummarizer( - logger, log_dir, prov, - f'{prov}_fi_info_{mode}', - f"{prov} fi_info {mode}" - ).summarize() - err += ret if ret else 0 - - if ((summary_item == 'daos' or summary_item == 'all') - and mode == 'reg'): - for prov in ['tcp-rxm', 'verbs-rxm']: - ret = DaosSummarizer( - logger, log_dir, prov, - f'daos_{prov}_{mode}', - f"{prov} daos {mode}" - ).summarize() - err += ret if ret else 0 - - if summary_item == 'imb' or summary_item == 'all': - for mpi in mpi_list: - for item in ['tcp-rxm', 'verbs-rxm', 'tcp']: - ret = ImbSummarizer( - logger, log_dir, item, mpi, - f'MPI_{item}_{mpi}_IMB_{mode}', - f"{item} {mpi} IMB {mode}" - ).summarize() - err += ret if ret else 0 - - if summary_item == 'osu' or summary_item == 'all': - for mpi in mpi_list: - for item in ['tcp-rxm', 'verbs-rxm', 'tcp']: - ret = OsuSummarizer( - logger, log_dir, item, mpi, - f'MPI_{item}_{mpi}_osu_{mode}', - f"{item} {mpi} OSU {mode}" - ).summarize() - err += ret if ret else 0 - - if summary_item == 'mpichtestsuite' or summary_item == 'all': - for mpi in mpi_list: - for item in ['tcp', 'verbs-rxm']: - ret = MpichTestSuiteSummarizer( - logger, log_dir, item, mpi, - f'mpichtestsuite_{item}_{mpi}_'\ - f'mpichtestsuite_{mode}', - f"{item} {mpi} mpichtestsuite {mode}" - ).summarize() - err += ret if ret else 0 - if summary_item == 'multinode' or summary_item == 'all': - for prov,util in common.prov_list: - if util: - prov = f'{prov}-{util}' - - ret = MultinodePerformanceSummarizer( - logger, log_dir, prov, - f'multinode_performance_{prov}_multinode_{mode}', - f"multinode performance {prov} {mode}" - ).summarize() - err += ret if ret else 0 - - if summary_item == 'oneccl' or summary_item == 'all': - for prov in ['tcp-rxm', 'verbs-rxm']: - ret = OnecclSummarizer( - logger, log_dir, 'oneCCL', - f'oneCCL_{prov}_oneccl_{mode}', - f'oneCCL {prov} {mode}' - ).summarize() - err += ret if ret else 0 - ret = OnecclSummarizer( - logger, log_dir, 'oneCCL-GPU', - f'oneCCL-GPU_{prov}_onecclgpu_{mode}', - f'oneCCL-GPU {prov} {mode}' - ).summarize() - err += ret if ret else 0 - - if summary_item == 'shmem' or summary_item == 'all': - for prov in ['tcp', 'verbs', 'sockets']: - ret= ShmemSummarizer( - logger, log_dir, prov, - f'SHMEM_{prov}_shmem_{mode}', - f'shmem {prov} {mode}' - ).summarize() - err += ret if ret else 0 - - if summary_item == 'v3' or summary_item == 'all': - test_types = ['h2d', 'd2d', 'xd2d'] - for type in test_types: - ret = FabtestsSummarizer( - logger, log_dir, 'shm', - f'ze_v3_shm_{type}_{mode}', - f"ze v3 shm {type} {mode}" - ).summarize() - err += ret if ret else 0 - - ret = OnecclSummarizer( - logger, log_dir, 'oneCCL-GPU', - f'oneCCL-GPU-v3_verbs-rxm_onecclgpu_{mode}', - f'oneCCL-GPU-v3 verbs-rxm {mode}' - ).summarize() - err += ret if ret else 0 - - if summary_item == 'dsa' or summary_item == 'all': - for prov in ['shm']: - ret = FabtestsSummarizer( - logger, log_dir, 'shm', - f'{prov}_dsa_fabtests_{mode}', - f"{prov} dsa fabtests {mode}" - ).summarize() - err += ret if ret else 0 - - if summary_item == 'dmabuf' or summary_item == 'all': - for prov in ['verbs-rxm']: - for num_nodes in range(1,3): - ret = DmabufSummarizer( - logger, log_dir, 'verbs-rxm', - f'DMABUF-Tests_{prov}_dmabuf_{num_nodes}_{mode}', - f"DMABUF-Tests {prov} dmabuf {num_nodes} node {mode}" - ).summarize() - err += ret if ret else 0 - - return err - -if __name__ == "__main__": -#read Jenkins environment variables - # In Jenkins, JOB_NAME = 'ofi_libfabric/master' vs BRANCH_NAME = 'master' - # job name is better to use to distinguish between builds of different - # jobs but with same branch name. - jobname = os.environ['JOB_NAME'] - buildno = os.environ['BUILD_NUMBER'] - workspace = os.environ['WORKSPACE'] - - parser = argparse.ArgumentParser() - parser.add_argument('--summary_item', help="functional test to summarize", - choices=['fabtests', 'imb', 'osu', 'mpichtestsuite', - 'oneccl', 'shmem', 'multinode', 'daos', 'v3', - 'dsa', 'dmabuf', 'all']) - parser.add_argument('--ofi_build_mode', help="select buildmode debug or dl", - choices=['dbg', 'dl', 'reg'], default='all') - parser.add_argument('-v', help="Verbose mode. Print all tests", \ - action='store_true') - parser.add_argument('--release', help="This job is testing a release."\ - "It will be saved and checked into a git tree.", - action='store_true') - parser.add_argument('--send_mail', help="Email mailing list with summary "\ - "results", action='store_true') - - args = parser.parse_args() - verbose = args.v - summary_item = args.summary_item - release = args.release - ofi_build_mode = args.ofi_build_mode - send_mail = args.send_mail - - mpi_list = ['impi', 'mpich', 'ompi'] - log_dir = f'{cloudbees_config.install_dir}/{jobname}/{buildno}/log_dir' - if (not os.path.exists(log_dir)): - os.makedirs(log_dir) - - job_name = os.environ['JOB_NAME'].replace('/', '_') - - print(f"Files to be summarized: {os.listdir(log_dir)}") - - if (release): - release_num = get_release_num(log_dir) - date = datetime.now().strftime("%Y%m%d%H%M%S") - output_name = f'summary_{release_num}_{job_name}_{date}.log' - else: - output_name = f'summary_{job_name}.log' - - full_file_name = f'{log_dir}/{output_name}' - - with open(full_file_name, 'a') as output_file: - if (ofi_build_mode == 'all'): - output_file.truncate(0) - - logger = Logger(output_file, release) - if (release): - Release( - log_dir, output_file, logger, release_num - ).add_release_changes() - - err = 0 - build_modes = ['reg', 'dbg', 'dl'] - for mode in build_modes: - if ofi_build_mode != 'all' and mode != ofi_build_mode: - continue - - err += summarize_items(summary_item, logger, log_dir, mode) - - if (release): - shutil.copyfile(f'{full_file_name}', f'{workspace}/{output_name}') - - if (send_mail): - SendEmail(sender = os.environ['SENDER'], - receivers = os.environ['mailrecipients'], - attachment = full_file_name - ).send_mail() - - exit(err) diff --git a/contrib/intel/jenkins/tests.py b/contrib/intel/jenkins/tests.py deleted file mode 100755 index 17cf063a2a5..00000000000 --- a/contrib/intel/jenkins/tests.py +++ /dev/null @@ -1,1141 +0,0 @@ -import sys -import os -import io - -sys.path.append(f"{os.environ['WORKSPACE']}/ci_resources/configs/{os.environ['CLUSTER']}") - -import subprocess -import re -import cloudbees_config -import common -import shlex -import time - -# A Jenkins env variable for job name is composed of the name of the jenkins job and the branch name -# it is building for. for e.g. in our case jobname = 'ofi_libfabric/master' -class Test: - - def __init__ (self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, mpitype=None, - util_prov=None, way=None): - self.jobname = jobname - self.buildno = buildno - self.testname = testname - self.core_prov = core_prov - self.util_prov = f'ofi_{util_prov}' if util_prov != None else '' - self.fabric = fabric - self.hosts = hosts - self.log_file = log_file - self.mpi_type = mpitype - self.ofi_build_mode = ofi_build_mode - if (len(hosts) == 1): - self.server = hosts[0] - self.client = hosts[0] - elif (len(hosts) == 2): - self.server = hosts[0] - self.client = hosts[1] - - self.nw_interface = cloudbees_config.interface_map[self.fabric] - self.libfab_installpath = f'{cloudbees_config.install_dir}/'\ - f'{self.jobname}/{self.buildno}/'\ - f'{self.ofi_build_mode}' - if (self.core_prov == 'ucx'): - self.libfab_installpath += "/ucx" - - self.middlewares_path = f'{cloudbees_config.install_dir}/'\ - f'{self.jobname}/{self.buildno}/'\ - 'middlewares' - self.ci_logdir_path = f'{cloudbees_config.install_dir}/'\ - f'{self.jobname}/{self.buildno}/'\ - 'log_dir' - self.env = user_env - self.way = way - - self.mpi = '' - if (self.mpi_type == 'impi'): - self.mpi = IMPI(self.core_prov, self.hosts, - self.libfab_installpath, self.nw_interface, - self.server, self.client, self.env, - self.middlewares_path, self.util_prov) - elif (self.mpi_type == 'ompi'): - self.mpi = OMPI(self.core_prov, self.hosts, - self.libfab_installpath, self.nw_interface, - self.server, self.client, self.env, - self.middlewares_path, self.util_prov) - elif (self.mpi_type == 'mpich'): - self.mpi = MPICH(self.core_prov, self.hosts, - self.libfab_installpath, self.nw_interface, - self.server, self.client, self.env, - self.middlewares_path, self.util_prov) - - -class FiInfoTest(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, util_prov) - - self.fi_info_testpath = f'{self.libfab_installpath}/bin' - - @property - def cmd(self): - return f"{self.fi_info_testpath}/fi_info " - - @property - def options(self): - if (self.util_prov): - opts = f"-f {self.fabric} -p {self.core_prov};{self.util_prov}" - elif (self.core_prov == 'psm3'): - opts = f"-p {self.core_prov}" - else: - opts = f"-f {self.fabric} -p {self.core_prov}" - - return opts - - def execute_cmd(self): - command = self.cmd + self.options - outputcmd = shlex.split(command) - common.run_command(outputcmd) - - -class Fabtest(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None, - way=None): - - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, - util_prov, way) - self.fabtestpath = f'{self.libfab_installpath}/bin' - self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests' - - def get_exclude_file(self): - path = self.libfab_installpath - efile_path = f'{path}/share/fabtests/test_configs' - - prov = self.util_prov if self.util_prov else self.core_prov - efile_old = f'{efile_path}/{prov}/{prov}.exclude' - - if self.util_prov: - efile = f'{efile_path}/{self.util_prov}/{self.core_prov}/exclude' - else: - efile = f'{efile_path}/{self.core_prov}/exclude' - - if os.path.isfile(efile): - return efile - elif os.path.isfile(efile_old): - return efile_old - else: - print(f"Exclude file: {efile} not found!") - return None - - @property - def cmd(self): - return f"{self.fabtestpath}/runfabtests.sh " - - @property - def options(self): - opts = f"-T 300 -vvv -p {self.fabtestpath} -S " - if (self.core_prov != 'shm' and self.nw_interface): - opts += f"-s {common.get_node_name(self.server, self.nw_interface)} " - opts += f"-c {common.get_node_name(self.client, self.nw_interface)} " - - if (self.core_prov == 'shm'): - opts += f"-s {self.server} " - opts += f"-c {self.client} " - opts += "-N " - - if (self.core_prov == 'ucx'): - opts += "-b " - - if (self.ofi_build_mode == 'dl'): - opts += "-t short " - else: - opts += "-t all " - - if (self.way == 'h2d'): - opts += "-C \"-H\" -L \"-D ze\" " - elif (self.way == 'd2d'): - opts += "-C \"-D ze\" -L \"-D ze\" " - elif (self.way == 'xd2d'): - opts += "-C \"-D ze\" -L \"-D ze -i 1\" " - - if (self.core_prov == 'sockets' and self.ofi_build_mode == 'reg'): - complex_test_file = f'{self.libfab_installpath}/share/fabtests/'\ - f'test_configs/{self.core_prov}/quick.test' - if (os.path.isfile(complex_test_file)): - opts += "-u {complex_test_file} " - else: - print(f"{self.core_prov} Complex test file not found") - - if (self.ofi_build_mode != 'reg' or self.core_prov == 'udp'): - opts += "-e \'ubertest,multinode\' " - - efile = self.get_exclude_file() - if efile: - opts += "-R " - opts += f"-f {efile} " - - for key in self.env: - opts += f"-E {key}={self.env[key]} " - - if self.util_prov: - opts += f"{self.core_prov};{self.util_prov} " - else: - opts += f"{self.core_prov} " - - if (self.core_prov == 'shm'): - opts += f"{self.server} {self.server} " - else: - opts += f"{self.server} {self.client} " - - return opts - - @property - def execute_condn(self): - return True - - def execute_cmd(self): - curdir = os.getcwd() - os.chdir(self.fabtestconfigpath) - command = self.cmd + self.options - outputcmd = shlex.split(command) - common.run_command(outputcmd) - os.chdir(curdir) - - -class ShmemTest(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, - util_prov) - - self.n = 4 - self.ppn = 2 - self.shmem_dir = f'{self.middlewares_path}/shmem' - self.hydra = f'{cloudbees_config.hydra}' - self.shmem_testname = '' - self.threshold = '1' - self.isx_shmem_total_size = 33554432 - self.isx_shmem_kernel_max = 134217728 - self.prk_iterations = 10 - self.prk_first_arr_dim = 1000 - self.prk_second_arr_dim = 1000 - if self.util_prov: - self.prov = f'{self.core_prov};{self.util_prov}' - else: - self.prov = self.core_prov - - self.test_dir = { - 'unit' : 'SOS', - 'uh' : 'tests-uh', - 'isx' : 'ISx/SHMEM', - 'prk' : 'PRK/SHMEM' - } - - self.make = { - 'unit' : 'make VERBOSE=1', - 'uh' : 'make C_feature_tests-run', - 'isx' : '', - 'prk' : '' - } - - self.shmem_environ = { - 'SHMEM_OFI_USE_PROVIDER': self.prov, - 'OSHRUN_LAUNCHER' : self.hydra, - 'PATH' : f'{self.shmem_dir}/bin:$PATH', - 'LD_LIBRARY_PATH' : f'{self.shmem_dir}/lib:'\ - f'{self.libfab_installpath}/lib', - 'SHMEM_SYMMETRIC_SIZE' : '4G', - 'LD_PRELOAD' : f'{self.libfab_installpath}'\ - '/lib/libfabric.so', - 'threshold' : self.threshold - } - - def export_env(self): - environ = '' - if self.shmem_testname == 'isx' or self.shmem_testname == 'prk': - self.threshold = '0' - - for key,val in self.shmem_environ.items(): - environ += f"export {key}={val}; " - return environ - - def cmd(self): - cmd = '' - if self.shmem_testname == 'unit': - cmd += f"{self.make[self.shmem_testname]} " - cmd += "mpiexec.hydra " - cmd += f"-n {self.n} " - cmd += f"-np {self.ppn} " - cmd += 'check' - elif self.shmem_testname == 'uh': - cmd += f'{self.make[self.shmem_testname]}' - elif self.shmem_testname == 'isx': - cmd += f"oshrun -np 4 ./bin/isx.strong {self.isx_shmem_kernel_max}"\ - " output_strong; " - cmd += f"oshrun -np 4 ./bin/isx.weak {self.isx_shmem_total_size} "\ - "output_weak; " - cmd += f"oshrun -np 4 ./bin/isx.weak_iso "\ - f"{self.isx_shmem_total_size} output_weak_iso " - elif self.shmem_testname == 'prk': - cmd += f"oshrun -np 4 ./Stencil/stencil {self.prk_iterations} "\ - f"{self.prk_first_arr_dim}; " - cmd += f"oshrun -np 4 ./Synch_p2p/p2p {self.prk_iterations} "\ - f"{self.prk_first_arr_dim} {self.prk_second_arr_dim}; " - cmd += f"oshrun -np 4 ./Transpose/transpose {self.prk_iterations} "\ - f"{self.prk_first_arr_dim} " - - return cmd - - - @property - def execute_condn(self): - #make always true when verbs and sockets are passing - return True if (self.core_prov == 'tcp') \ - else False - - def execute_cmd(self, shmem_testname): - self.shmem_testname = shmem_testname - cwd = os.getcwd() - os.chdir(f'{self.shmem_dir}/{self.test_dir[self.shmem_testname]}') - print("Changed directory to "\ - f'{self.shmem_dir}/{self.test_dir[self.shmem_testname]}') - command = f"bash -c \'{self.export_env()} {self.cmd()}\'" - outputcmd = shlex.split(command) - common.run_command(outputcmd) - os.chdir(cwd) - -class MultinodeTests(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, util_prov) - self.fabtestpath = f'{self.libfab_installpath}/bin' - self.fabtestconfigpath = f'{self.libfab_installpath}/share/fabtests' - self.n = 2 - self.ppn = 64 - self.iterations = 1 - self.method = 'msg' - self.pattern = "full_mesh" - - @property - def cmd(self): - return f"{self.fabtestpath}/runmultinode.sh " - - @property - def options(self): - opts = f"-h {common.get_node_name(self.server, self.nw_interface)}" - opts += f",{common.get_node_name(self.client, self.nw_interface)}" - opts += f" -n {self.ppn}" - opts += f" -I {self.iterations}" - opts += f" -z {self.pattern}" - opts += f" -C {self.method}" - if self.util_prov: - opts += f" -p {self.core_prov};{self.util_prov}" - else: - opts += f" -p {self.core_prov}" - opts += f" --ci {self.fabtestpath}/" #enable ci mode to disable tput - - return opts - - @property - def execute_condn(self): - return True - - def execute_cmd(self): - if self.util_prov: - prov = f"{self.core_prov}-{self.util_prov} " - else: - prov = self.core_prov - curdir = os.getcwd() - os.chdir(self.fabtestconfigpath) - command = self.cmd + self.options - outputcmd = shlex.split(command) - common.run_command(outputcmd) - os.chdir(curdir) - -class OMPI: - def __init__(self, core_prov, hosts, libfab_installpath, nw_interface, - server, client, environ, middlewares_path, util_prov=None): - - self.ompi_src = f'{middlewares_path}/ompi' - self.core_prov = core_prov - self.hosts = hosts - self.util_prov = util_prov - self.libfab_installpath = libfab_installpath - self.nw_interface = nw_interface - self.server = server - self.client = client - self.environ = environ - self.n = 4 - self.ppn = 2 - - @property - def env(self): - cmd = "bash -c \'" - if (self.util_prov): - cmd += f"export FI_PROVIDER={self.core_prov}\\;{self.util_prov}; " - else: - cmd += f"export FI_PROVIDER={self.core_prov}; " - cmd += "export I_MPI_FABRICS=ofi; " - cmd += f"export LD_LIBRARY_PATH={self.ompi_src}/lib:$LD_LIBRARY_PATH; " - cmd += f"export LD_LIBRARY_PATH={self.libfab_installpath}/lib/:"\ - "$LD_LIBRARY_PATH; " - cmd += f"export PATH={self.ompi_src}/bin:$PATH; " - cmd += f"export PATH={self.libfab_installpath}/bin:$PATH; " - return cmd - - @property - def options(self): - opts = f"-np {self.n} " - hosts = '\',\''.join([':'.join([common.get_node_name(host, \ - self.nw_interface), str(self.ppn)]) \ - for host in self.hosts]) - opts += f"--host \'{hosts}\' " - if self.util_prov: - opts += f"--mca mtl_ofi_provider_include {self.core_prov}\\;"\ - f"{self.util_prov} " - opts += f"--mca btl_ofi_provider_include {self.core_prov}\\;"\ - f"{self.util_prov} " - else: - opts += f"--mca mtl_ofi_provider_include {self.core_prov} " - opts += f"--mca btl_ofi_provider_include {self.core_prov} " - opts += "--mca orte_base_help_aggregate 0 " - # This is necessary to prevent verbs from printing warning messages - # The test still uses libfabric verbs even when enabled. - # if (self.core_prov == 'verbs'): - # opts += "--mca btl_openib_allow_ib 1 " - opts += "--mca mtl ofi " - opts += "--mca pml cm -tag-output " - for key in self.environ: - opts += f"-x {key}={self.environ[key]} " - - return opts - - @property - def cmd(self): - return f"{self.ompi_src}/bin/mpirun {self.options}" - -class MPICH: - def __init__(self, core_prov, hosts, libfab_installpath, nw_interface, - server, client, environ, middlewares_path, util_prov=None): - - self.mpich_dir = f'{middlewares_path}/mpich_mpichtest' - self.mpichpath = f'{self.mpich_dir}/mpich_mpichsuite' - self.core_prov = core_prov - self.hosts = hosts - self.util_prov = util_prov - self.libfab_installpath = f'{libfab_installpath}/libfabric_mpich' - self.nw_interface = nw_interface - self.server = server - self.client = client - self.environ = environ - self.n = 4 - self.ppn = 1 - - @property - def env(self): - cmd = "bash -c \'" - if (self.util_prov): - cmd += f"export FI_PROVIDER={self.core_prov}\\;{self.util_prov}; " - else: - cmd += f"export FI_PROVIDER={self.core_prov}; " - cmd += "export I_MPI_FABRICS=ofi; " - cmd += "export MPIR_CVAR_CH4_OFI_ENABLE_ATOMICS=0; " - cmd += "export MPIR_CVAR_CH4_OFI_CAPABILITY_SETS_DEBUG=0; " - cmd += f"export LD_LIBRARY_PATH={self.mpich_dir}/lib:$LD_LIBRARY_PATH; " - cmd += f"export LD_LIBRARY_PATH={self.libfab_installpath}/lib/:"\ - "$LD_LIBRARY_PATH; " - cmd += f"export PATH={self.mpich_dir}/bin:$PATH; " - cmd += f"export PATH={self.libfab_installpath}/bin:$PATH; " - return cmd - - @property - def options(self): - opts = f"-n {self.n} " - opts += f"-ppn {self.ppn} " - opts += "-launcher ssh " - # Removed because sbatch does this for us whenwe use mpirun - # opts += f"-hosts {common.get_node_name(self.server, self.nw_interface)},"\ - # f"{common.get_node_name(self.client, self.nw_interface)} " - for key in self.environ: - opts += f"-genv {key} {self.environ[key]} " - - return opts - - @property - def cmd(self): - return f"{self.mpich_dir}/bin/mpirun {self.options}" - -class IMPI: - def __init__(self, core_prov, hosts, libfab_installpath, nw_interface, - server, client, environ, middlewares_path, util_prov=None): - - self.impi_src = f'{cloudbees_config.impi_root}' - self.mpichpath = f"{middlewares_path}/impi_mpichtest/" \ - f"impi_mpichsuite/" - self.core_prov = core_prov - self.hosts = hosts - self.util_prov = util_prov - self.libfab_installpath = libfab_installpath - self.nw_interface = nw_interface - self.server = server - self.client = client - self.environ = environ - self.n = 4 - self.ppn = 1 - - @property - def env(self): - cmd = f"bash -c \'source {self.impi_src}/env/vars.sh "\ - "-i_mpi_ofi_internal=0; " - cmd += f"source {cloudbees_config.intel_compiler_root}/env/vars.sh; " - if (self.util_prov): - cmd += f"export FI_PROVIDER={self.core_prov}\\;{self.util_prov}; " - else: - cmd += f"export FI_PROVIDER={self.core_prov}; " - if (self.core_prov == 'tcp'): - cmd += "export FI_IFACE=eth0; " - elif (self.core_prov == 'verbs'): - cmd += "export FI_IFACE=ib0; " - cmd += "export I_MPI_FABRICS=ofi; " - cmd += f"export LD_LIBRARY_PATH={self.impi_src}/lib:$LD_LIBRARY_PATH; " - cmd += f"export LD_LIBRARY_PATH={self.impi_src}/lib/release:"\ - "$LD_LIBRARY_PATH; " - cmd += f"export LD_LIBRARY_PATH={self.libfab_installpath}/lib/:"\ - "$LD_LIBRARY_PATH; " - cmd += f"export PATH={self.libfab_installpath}/bin:$PATH; " - return cmd - - @property - def options(self): - opts = f"-n {self.n} " - opts += f"-ppn {self.ppn} " - opts += f"-hosts {common.get_node_name(self.server, self.nw_interface)},"\ - f"{common.get_node_name(self.client, self.nw_interface)} " - for key in self.environ: - opts += f"-genv {key} {self.environ[key]} " - - return opts - - @property - def cmd(self): - return f"{self.impi_src}/bin/mpiexec {self.options}" - - -class IMBtests(Test): - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, mpitype, ofi_build_mode, user_env, log_file, test_group, - util_prov=None): - - super().__init__(jobname, buildno, testname, core_prov, - fabric, hosts, ofi_build_mode, user_env, log_file, mpitype, - util_prov) - - self.test_group = test_group - self.mpi_type = mpitype - self.imb_src = '' - self.imb_tests = { - '1' :[ - 'MPI1', - 'P2P' - ], - '2' :[ - 'EXT', - 'IO' - ], - '3' :[ - 'NBC', - 'RMA', - 'MT' - ] - } - self.iter = 100 - self.include = { - 'MPI1':[ - 'Biband', - 'Uniband', - 'PingPongAnySource', - 'PingPingAnySource', - 'PingPongSpecificSource', - 'PingPingSpecificSource' - ], - 'P2P':[], - 'EXT':[], - 'IO':[], - 'NBC':[], - 'RMA':[], - 'MT':[] - } - self.exclude = { - 'MPI1':[], - 'P2P':[], - 'EXT':[ - 'Accumulate' - ], - 'IO':[], - 'NBC':[], - 'RMA':[ - 'Accumulate', - 'Get_accumulate', - 'Fetch_and_op', - 'Compare_and_swap', - 'All_put_all', - 'All_get_all' - ], - 'MT':[] - } - self.imb_src = f'{self.middlewares_path}/{self.mpi_type}/imb' - - @property - def execute_condn(self): - # Mpich and ompi are excluded to save time. Run manually if needed - return (self.mpi_type == 'impi') - - def imb_cmd(self, imb_test): - print(f"Running IMB-{imb_test}") - cmd = f"{self.imb_src}/IMB-{imb_test} " - if (imb_test != 'MT'): - cmd += f"-iter {self.iter} " - - if (len(self.include[imb_test]) > 0): - cmd += f"-include {','.join(self.include[imb_test])}" - - if (len(self.exclude[imb_test]) > 0): - cmd += f"-exclude {','.join(self.exclude[imb_test])}" - - return cmd - - def execute_cmd(self): - for test_type in self.imb_tests[self.test_group]: - outputcmd = shlex.split(self.mpi.env + self.mpi.cmd + \ - self.imb_cmd(test_type) + '\'') - common.run_command(outputcmd) - - -class OSUtests(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, mpitype, ofi_build_mode, user_env, log_file, util_prov=None): - - super().__init__(jobname, buildno, testname, core_prov, - fabric, hosts, ofi_build_mode, user_env, log_file, mpitype, - util_prov) - - self.n_ppn = { - 'pt2pt': (2, 1), - 'collective': (4, 2), - 'one-sided': (2, 1), - 'startup': (2, 1) - } - self.osu_src = f'{self.middlewares_path}/{mpitype}/osu/libexec/'\ - 'osu-micro-benchmarks/mpi/' - self.mpi_type = mpitype - - @property - def execute_condn(self): - # mpich-tcp, ompi are the only osu test combinations failing - return False if ((self.mpi_type == 'mpich' and self.core_prov == 'tcp') or \ - self.mpi_type == 'ompi') \ - else True - - def osu_cmd(self, test_type, test): - print(f"Running OSU-{test_type}-{test}") - cmd = f'{self.osu_src}/{test_type}/{test} ' - return cmd - - def execute_cmd(self): - assert(self.osu_src) - p = re.compile('osu_put*') - for root, dirs, tests in os.walk(self.osu_src): - for test in tests: - self.mpi.n = self.n_ppn[os.path.basename(root)][0] - self.mpi.ppn = self.n_ppn[os.path.basename(root)][1] - - if (test == 'osu_latency_mp' and self.core_prov == 'verbs'): - self.env['IBV_FORK_SAFE'] = '1' - - if(p.search(test) == None): - osu_command = self.osu_cmd(os.path.basename(root), test) - outputcmd = shlex.split(self.mpi.env + self.mpi.cmd + \ - osu_command + '\'') - common.run_command(outputcmd) - - if (test == 'osu_latency_mp' and self.core_prov == 'verbs'): - self.env.pop('IBV_FORK_SAFE') - - -class MpichTestSuite(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, mpitype, ofi_build_mode, user_env, log_file, util_prov=None, weekly=None): - - super().__init__(jobname, buildno, testname, core_prov, - fabric, hosts, ofi_build_mode, user_env, log_file, mpitype, - util_prov) - self.mpi_type = mpitype - if (mpitype != 'ompi'): - self.mpichsuitepath = f'{self.mpi.mpichpath}/test/mpi/' - self.pwd = os.getcwd() - self.weekly = weekly - self.mpichtests_exclude = { - 'tcp' : { '.' : [('spawn','dir')], - 'rma' : [('win_shared_put_flush_load 3', 'test')], - 'threads' : [('spawn','dir')], - 'threads/comm' : [('idup_nb 4','test'), - ('idup_comm_gen 4','test')], - 'errors' : [('spawn','dir')] - }, - 'verbs' : { '.' : [('spawn','dir')], - 'threads/comm' : [('idup_nb 4','test')], - 'threads' : [('spawn','dir'), ('rma','dir')], - 'pt2pt' : [('sendrecv3 2','test'), - ('sendrecv3 2 arg=-isendrecv','test')], - 'threads/pt2pt': [(f"mt_improbe_sendrecv_huge 2 " - f"arg=-iter=64 arg=-count=4194304 " - f"env=MPIR_CVAR_CH4_OFI_EAGER_MAX_MSG_SIZE" - f"=16384", 'test')] - } - } - - def create_hostfile(self, file, hostlist): - with open(file, "w") as f: - for host in hostlist: - f.write(f"{host}\n") - - def update_testlists(self, filename, category): - with open(filename, 'r') as file: - lines = file.read().splitlines() - for line in lines: - if (line == category): - lines[lines.index(line)] = f'#{line}' - else: - continue - with open(filename, 'w') as file: - file.write('\n'.join(lines)) - - def exclude_tests(self, test_root, provider): - for path,exclude_list in self.mpichtests_exclude[f'{provider}'].items(): - for item in exclude_list: - self.update_testlists(f'{test_root}/{path}/testlist', item[0]) - if (item[1] == 'dir'): - filename = f'{test_root}/{path}/{item[0]}/testlist' - with open(filename,'r') as file: - for line in file: - line = line.strip() - if (not line.startswith('#')): - print(f'excluding:{path}/{item[0]}:{line}') - else: #item[1]=test - print(f'excluding:{path}/{item[0]}') - - @property - def execute_condn(self): - return ((self.mpi_type == 'impi' and self.weekly) or \ - self.mpi_type == 'mpich') - - def execute_cmd(self): - if (self.mpi_type == 'mpich'): - configure_cmd = f"./configure --with-mpi={self.mpi.mpich_dir} " - if (self.weekly): - print(f'Weekly {self.mpi_type} mpichsuite tests') - os.chdir(self.mpichsuitepath) - common.run_command(shlex.split(self.mpi.env + - configure_cmd + '\'')) - self.exclude_tests(self.mpichsuitepath, self.core_prov) - testcmd = 'make testing' - outputcmd = shlex.split(self.mpi.env + testcmd + '\'') - common.run_command(outputcmd) - common.run_command(shlex.split(f"cat {self.mpichsuitepath}/" \ - f"summary.tap")) - os.chdir(self.pwd) - else: - print(f"PR {self.mpi_type} mpichsuite tests") - os.chdir(self.mpichsuitepath) - common.run_command(shlex.split(self.mpi.env + - configure_cmd + '\'')) - common.run_command(['make', '-j']) - self.exclude_tests(self.mpichsuitepath, self.core_prov) - testcmd = "./runtests -tests=testlist " - testcmd += f" -xmlfile=summary.xml -tapfile=summary.tap " \ - f"-junitfile=summary.junit.xml " - common.run_command(shlex.split(self.mpi.env + testcmd + '\'')) - common.run_command(shlex.split(f"cat {self.mpichsuitepath}/" \ - f"summary.tap")) - os.chdir(self.pwd) - if (self.mpi_type == 'impi' and self.weekly == True): - print (f'Weekly {self.mpi_type} mpichsuite tests') - os.chdir(self.mpi.mpichpath) - print(self.hosts) - self.create_hostfile(f'{self.mpi.mpichpath}/hostfile', - self.hosts) - os.environ["I_MPI_HYDRA_HOST_FILE"] = \ - f'{self.mpi.mpichpath}/hostfile' - test_cmd = f"export I_MPI_HYDRA_HOST_FILE=" \ - f"{self.mpi.mpichpath}/hostfile; " - test_cmd += f"./test.sh --exclude lin,{self.core_prov},*,*,*,*; " - common.run_command(shlex.split(self.mpi.env + test_cmd + '\'')) - common.run_command(shlex.split(f"cat {self.mpichsuitepath}/" \ - f"summary.tap")) - os.chdir(self.pwd) - -class OneCCLTests(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, util_prov) - - self.oneccl_path = f'{self.middlewares_path}/oneccl/' - self.test_dir = f'{self.middlewares_path}/oneccl/ci_tests' - if self.util_prov: - self.prov = f"{self.core_prov}\;{self.util_prov}" - else: - self.prov = self.core_prov - self.oneccl_environ = { - 'FI_PROVIDER' : f"\"{self.prov}\"", - 'CCL_ATL_TRANSPORT' : 'ofi', - 'CCL_ATL_TRANSPORT_LIST' : 'ofi' - } - - self.ld_library = [ - f'{self.libfab_installpath}/lib', - f'{self.oneccl_path}/build/_install/lib' - ] - - def export_env(self): - environ = f"source {cloudbees_config.oneapi_root}/setvars.sh; " - environ += f"source {self.oneccl_path}/build/_install/env/vars.sh; " - if self.core_prov == 'psm3': - self.oneccl_environ['PSM3_MULTI_EP'] = '1' - - for key, val in self.oneccl_environ.items(): - environ += f"export {key}={val}; " - - ld_library_path = 'LD_LIBRARY_PATH=' - for item in self.ld_library: - ld_library_path += f'{item}:' - - environ += f"export {ld_library_path}$LD_LIBRARY_PATH; " - return environ - - def cmd(self): - return './run.sh ' - - def options(self): - opts = "--mode cpu " - return opts - - @property - def execute_condn(self): - return True - - @property - def execute_condn(self): - return True - - def execute_cmd(self): - curr_dir = os.getcwd() - os.chdir(self.test_dir) - command = f"bash -c \'{self.export_env()} {self.cmd()} "\ - f"{self.options()}\'" - outputcmd = shlex.split(command) - common.run_command(outputcmd) - os.chdir(curr_dir) - -class OneCCLTestsGPU(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, util_prov) - - self.n = 2 - self.ppn = 1 - self.oneccl_path = f'{self.middlewares_path}/oneccl_gpu/build' - if self.util_prov: - self.prov = f"{self.core_prov}\;{self.util_prov}" - else: - self.prov = self.core_prov - - self.onecclgpu_environ = { - 'FI_PROVIDER' : self.prov, - # 'LD_PRELOAD' : f"{self.libfab_installpath}/lib/libfabric.so", - 'CCL_ATL_TRANSPORT' : 'ofi', - 'CCL_ROOT' : f"{self.oneccl_path}/_install" - } - - self.ld_library = [ - f'{self.libfab_installpath}/lib', - '$LD_LIBRARY_PATH', - f'{self.oneccl_path}/_install/lib' - ] - - self.tests = { - 'examples' : [ - 'sycl_allgatherv_custom_usm_test', - 'sycl_allgatherv_inplace_test', - 'sycl_allgatherv_inplace_usm_test', - 'sycl_allgatherv_test', - 'sycl_allgatherv_usm_test', - 'sycl_allreduce_inplace_usm_test', - 'sycl_allreduce_test', - 'sycl_allreduce_usm_test', - 'sycl_alltoall_test', - 'sycl_alltoall_usm_test', - 'sycl_alltoallv_test', - 'sycl_alltoallv_usm_test', - 'sycl_broadcast_test', - 'sycl_broadcast_usm_test', - 'sycl_reduce_inplace_usm_test', - 'sycl_reduce_scatter_test', - 'sycl_reduce_scatter_usm_test', - 'sycl_reduce_test', - 'sycl_reduce_usm_test' - ], - 'functional' : [ - 'allgatherv_test', - 'alltoall_test', - 'alltoallv_test', - 'bcast_test', - 'reduce_scatter_test', - 'reduce_test' - ] - } - - def export_env(self): - environ = f"source {cloudbees_config.impi_root}/env/vars.sh "\ - "-i_mpi_internal=0; " - environ += f"source {cloudbees_config.intel_compiler_root}/env/vars.sh; " - for key, val in self.onecclgpu_environ.items(): - environ += f"export {key}={val}; " - - ld_library_path = 'LD_LIBRARY_PATH=' - for item in self.ld_library: - ld_library_path += f'{item}:' - - environ += f"export {ld_library_path}$LD_LIBRARY_PATH; " - return environ - - def cmd(self): - return f"{self.oneccl_path}/_install/bin/mpiexec " - - def options(self): - opts = "-l " - opts += f"-n {self.n} " - opts += f"-ppn {self.ppn} " - opts += f"-hosts {self.server},{self.client} " - return opts - - @property - def execute_condn(self): - return True - - - def execute_cmd(self, oneccl_test_gpu): - curr_dir = os.getcwd() - if 'examples' in oneccl_test_gpu: - os.chdir(f"{self.oneccl_path}/_install/examples/sycl") - else: - os.chdir(f"{self.oneccl_path}/tests/functional") - - for test in self.tests[oneccl_test_gpu]: - if '_usm_' in test: - gpu_selector = 'device' - else: - gpu_selector = 'default' - - command = f"bash -c \'{self.export_env()} {self.cmd()} "\ - f"{self.options()} ./{test} " - if 'examples' in oneccl_test_gpu: - command += f"gpu {gpu_selector}" - command += "\'" - - outputcmd = shlex.split(command) - common.run_command(outputcmd) - os.chdir(curr_dir) - -class DaosCartTest(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, None, util_prov) - - - self.set_paths(core_prov) - print(core_prov) - self.daos_nodes = cloudbees_config.prov_node_map[core_prov] - print(self.daos_nodes) - self.launch_node = self.daos_nodes[0] - - self.cart_tests = { - 'corpc_one_node' : {'tags' :'cart,corpc,one_node', 'numservers':1, 'numclients':0}, - 'corpc_two_node' : {'tags' :'cart,corpc,two_node', 'numservers':2, 'numclients':0}, - 'ctl_one_node' : {'tags' :'cart,ctl,one_node', 'numservers':1, 'numclients':1}, - 'ghost_rank_rpc_one_node' : {'tags' :'cart,ghost_rank_rpc,one_node', 'numservers':1, 'numclients':0}, - 'group_test' : {'tags' :'cart,group_test,one_node', 'numservers':1, 'numclients':0}, - 'iv_one_node' : {'tags' :'cart,iv,one_node', 'numservers':1, 'numclients':1}, - 'iv_two_node' : {'tags' :'cart,iv,two_node', 'numservers':2, 'numclients':1}, - 'launcher_one_node' : {'tags' :'cart,no_pmix_launcher,one_node','numservers':1, 'numclients':1}, - 'multictx_one_node' : {'tags' :'cart,no_pmix,one_node', 'numservers':1, 'numclients':0}, - 'rpc_one_node' : {'tags' :'cart,rpc,one_node', 'numservers':1, 'numclients':1}, - 'rpc_two_node' : {'tags' :'cart,rpc,two_node','numservers':2, 'numclients':1}, - 'swim_notification' : {'tags' :'cart,rpc,swim_rank_eviction,one_node', 'numservers':1, 'numclients':1} - } - - - def set_paths(self, core_prov): - self.ci_middlewares_path = f'{cloudbees_config.build_dir}/{core_prov}' - self.daos_install_root = f'{self.ci_middlewares_path}/daos/install' - self.cart_test_scripts = f'{self.daos_install_root}/lib/daos/TESTING/ftest' - self.mpipath = f'{cloudbees_config.daos_mpi}/bin' - self.pathlist = [f'{self.daos_install_root}/bin/', self.cart_test_scripts, self.mpipath, \ - f'{self.daos_install_root}/lib/daos/TESTING/tests'] - self.daos_prereq = f'{self.daos_install_root}/prereq' - common.run_command(['rm', '-rf', f'{self.ci_middlewares_path}/daos_logs/*']) - common.run_command(['rm','-rf', f'{self.daos_prereq}/debug/ofi']) - common.run_command(['ln', '-sfn', self.libfab_installpath, f'{self.daos_prereq}/debug/ofi']) - - @property - def cmd(self): - return f"env; echo {common.cloudbees_log_start_string}; "\ - "python3.6 launch.py " - - def remote_launch_cmd(self, testname): - -# The following env variables must be set appropriately prior -# to running the daos/cart tests OFI_DOMAIN, OFI_INTERFACE, -# CRT_PHY_ADDR_STR, PATH, DAOS_TEST_SHARED_DIR DAOS_TEST_LOG_DIR, -# LD_LIBRARY_PATH in the script being sourced below. - launch_cmd = f"ssh {self.launch_node} \"source {self.ci_middlewares_path}/daos_ci_env_setup.sh && \ - cd {self.cart_test_scripts} &&\" " - return launch_cmd - - def options(self, testname): - opts = "-s " - opts += f"{self.cart_tests[testname]['tags']} " - - if (self.cart_tests[testname]['numservers'] != 0): - servers = ",".join(self.daos_nodes[:self.cart_tests[testname]['numservers']]) - opts += f"--test_servers={servers} " - if (self.cart_tests[testname]['numclients'] != 0): - clients = ",".join(self.daos_nodes[:self.cart_tests[testname]['numclients']]) - opts += f"--test_clients={clients}" - return opts - - @property - def execute_condn(self): - return True - def execute_cmd(self): - sys.path.append(f'{self.daos_install_root}/lib64/python3.6/site-packages') - os.environ['PYTHONPATH']=f'{self.daos_install_root}/lib64/python3.6/site-packages' - - test_dir=self.cart_test_scripts - curdir=os.getcwd() - os.chdir(test_dir) - for test in self.cart_tests: - print(test) - command = self.remote_launch_cmd(test) + self.cmd + self.options(test) - outputcmd = shlex.split(command) - common.run_logging_command(outputcmd, self.log_file) - print("--------------------TEST COMPLETED----------------------") - os.chdir(curdir) - -class DMABUFTest(Test): - - def __init__(self, jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, util_prov=None): - - super().__init__(jobname, buildno, testname, core_prov, fabric, - hosts, ofi_build_mode, user_env, log_file, - None, util_prov) - self.DMABUFtestpath = f'{self.libfab_installpath}/bin' - self.timeout = 300 - self.n = os.environ['SLURM_NNODES'] if 'SLURM_NNODES' \ - in os.environ.keys() \ - else 0 - - if util_prov: - self.prov = f'{self.core_prov}\;{self.util_prov}' - else: - self.prov = self.core_prov - - self.dmabuf_environ = { - 'ZEX_NUMBER_OF_CCS' : '0:4,1:4', - 'NEOReadDebugKeys' : '1', - 'EnableImplicitScaling' : '0', - 'MLX5_SCATTER_TO_CQE' : '0' - } - - self.tests = { - 'H2H' : [ - 'write', - 'read', - 'send' - ], - 'H2D' : [ - 'write', - 'read', - 'send' - ], - 'D2H' : [ - 'write', - 'read', - 'send' - ], - 'D2D' : [ - 'write', - 'read', - 'send' - ] - } - - @property - def execute_condn(self): - return True if (self.core_prov == 'verbs') \ - else False - - @property - def cmd(self): - return f"{self.DMABUFtestpath}/fi-rdmabw-xe" - - def dmabuf_env(self): - return ' '.join([f"{key}={self.dmabuf_environ[key]}" \ - for key in self.dmabuf_environ]) - - def execute_cmd(self, test_type): - os.chdir(self.DMABUFtestpath) - base_cmd = '' - log_prefix = f"{os.environ['LOG_DIR']}/dmabuf_{self.n}" - if 'H2H' in test_type or 'D2H' in test_type: - base_cmd = f"{self.cmd} -m malloc -p {self.core_prov}" - else: - base_cmd = f"{self.cmd} -m device -d 0 -p {self.core_prov}" - - for test in self.tests[test_type]: - client_command = f"{base_cmd} -t {test} {self.server}" - if 'send' in test: - server_command = f"{base_cmd} -t {test} " - else: - server_command = f"{base_cmd} " - RC = common.ClientServerTest( - f"ssh {self.server} {self.dmabuf_env()} {server_command}", - f"ssh {self.client} {self.dmabuf_env()} {client_command}", - f"{log_prefix}_server.log", f"{log_prefix}_client.log", - self.timeout - ).run() - - if RC == (0, 0): - print("------------------ TEST COMPLETED -------------------") - else: - print("------------------ TEST FAILED -------------------") - sys.exit(f"Exiting with returncode: {RC}")