Skip to content

Commit

Permalink
sync with 2023.06-software.eessi.io branch
Browse files Browse the repository at this point in the history
  • Loading branch information
bedroge committed May 7, 2024
2 parents ab535f6 + e729115 commit 96e7bb6
Show file tree
Hide file tree
Showing 15 changed files with 145 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/scorecards.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
persist-credentials: false

- name: "Run analysis"
uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # v2.0.6
uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
with:
results_file: results.sarif
results_format: sarif
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/tests_archdetect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ jobs:
- x86_64/intel/skylake_avx512/archspec-linux-6132
- x86_64/amd/zen2/Azure-CentOS7-7V12
- x86_64/amd/zen3/Azure-CentOS7-7V73X
- x86_64/amd/zen4/Azure-Alma8-9V33X
- x86_64/amd/zen4/Shinx-RHEL8-9654
- aarch64/neoverse_n1/Azure-Ubuntu20-Altra
- aarch64/neoverse_n1/AWS-awslinux-graviton2
- aarch64/neoverse_v1/AWS-awslinux-graviton3
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests_scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ name: Tests for scripts
on:
push:
paths:
- build_container.sh
- create_directory_tarballs.sh
- create_lmodsitepackage.py
- eessi_container.sh
- EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
Expand All @@ -15,9 +15,9 @@ on:

pull_request:
paths:
- build_container.sh
- create_directory_tarballs.sh
- create_lmodsitepackage.py
- eessi_container.sh
- EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
Expand Down
31 changes: 30 additions & 1 deletion EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,24 @@ else
mkdir -p ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
fi

# if we run the script for the first time, e.g., to start building for a new
# stack, we need to ensure certain files are present in
# ${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
# - .lmod/lmodrc.lua
# - .lmod/SitePackage.lua
_eessi_software_path=${EESSI_PREFIX}/software/${EESSI_OS_TYPE}/${EESSI_SOFTWARE_SUBDIR_OVERRIDE}
_lmod_cfg_dir=${_eessi_software_path}/.lmod
_lmod_rc_file=${_lmod_cfg_dir}/lmodrc.lua
if [ ! -f ${_lmod_rc_file} ]; then
command -V python3
python3 ${TOPDIR}/create_lmodrc.py ${_eessi_software_path}
fi
_lmod_sitepackage_file=${_lmod_cfg_dir}/SitePackage.lua
if [ ! -f ${_lmod_sitepackage_file} ]; then
command -V python3
python3 ${TOPDIR}/create_lmodsitepackage.py ${_eessi_software_path}
fi

# Set all the EESSI environment variables (respecting $EESSI_SOFTWARE_SUBDIR_OVERRIDE)
# $EESSI_SILENT - don't print any messages
# $EESSI_BASIC_ENV - give a basic set of environment variables
Expand Down Expand Up @@ -203,10 +221,21 @@ ${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
# Allow skipping CUDA SDK install in e.g. CI environments
# The install_cuda... script uses EasyBuild. So, we need to check if we have EB
# or skip this step.
module_avail_out=$TMPDIR/ml.out
module avail 2>&1 | grep EasyBuild &> ${module_avail_out}
if [[ $? -eq 0 ]]; then
echo_green ">> Found an EasyBuild module"
else
echo_yellow ">> No EasyBuild module found: skipping step to install CUDA (see output in ${module_avail_out})"
export skip_cuda_install=True
fi

if [ -z "${skip_cuda_install}" ] || [ ! "${skip_cuda_install}" ]; then
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula
else
echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed"
echo "Skipping installation of CUDA SDK in host_injections, since the --skip-cuda-install flag was passed OR no EasyBuild module was found"
fi

# Install drivers in host_injections
Expand Down
69 changes: 0 additions & 69 deletions build_container.sh

This file was deleted.

29 changes: 27 additions & 2 deletions create_lmodsitepackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,28 @@
return content
end
local function from_eessi_prefix(t)
-- eessi_prefix is the prefix with official EESSI modules
-- e.g. /cvmfs/software.eessi.io/versions/2023.06
local eessi_prefix = os.getenv("EESSI_PREFIX")
-- If EESSI_PREFIX wasn't defined, we cannot check if this module was from the EESSI environment
-- In that case, we assume it isn't, otherwise EESSI_PREFIX would (probably) have been set
if eessi_prefix == nil then
return False
else
-- NOTE: exact paths for site so may need to be updated later.
-- See https://github.com/EESSI/software-layer/pull/371
-- eessi_prefix_host_injections is the prefix with site-extensions (i.e. additional modules)
-- to the official EESSI modules, e.g. /cvmfs/software.eessi.io/host_injections/2023.06
local eessi_prefix_host_injections = string.gsub(eessi_prefix, 'versions', 'host_injections')
-- Check if the full modulepath starts with the eessi_prefix_*
return string.find(t.fn, "^" .. eessi_prefix) ~= nil or string.find(t.fn, "^" .. eessi_prefix_host_injections) ~= nil
end
end
local function load_site_specific_hooks()
-- This function will be run after the EESSI hooks are registered
-- It will load a local SitePackage.lua that is architecture independent (if it exists) from e.g.
Expand Down Expand Up @@ -153,10 +175,13 @@
-- Combine both functions into a single one, as we can only register one function as load hook in lmod
-- Also: make it non-local, so it can be imported and extended by other lmodrc files if needed
function eessi_load_hook(t)
eessi_cuda_enabled_load_hook(t)
-- Only apply CUDA hooks if the loaded module is in the EESSI prefix
-- This avoids getting an Lmod Error when trying to load a CUDA module from a local software stack
if from_eessi_prefix(t) then
eessi_cuda_enabled_load_hook(t)
end
end
hook.register("load", eessi_load_hook)
-- Note that this needs to happen at the end, so that any EESSI specific hooks can be overwritten by the site
Expand Down
20 changes: 18 additions & 2 deletions eb_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,21 @@ def pre_configure_hook(self, *args, **kwargs):
PRE_CONFIGURE_HOOKS[self.name](self, *args, **kwargs)


def pre_configure_hook_gromacs(self, *args, **kwargs):
"""
Pre-configure hook for GROMACS:
- avoid building with SVE instructions on Neoverse V1 as workaround for failing tests,
see https://gitlab.com/gromacs/gromacs/-/issues/5057 + https://gitlab.com/eessi/support/-/issues/47
"""
if self.name == 'GROMACS':
cpu_target = get_eessi_envvar('EESSI_SOFTWARE_SUBDIR')
if LooseVersion(self.version) <= LooseVersion('2024.1') and cpu_target == CPU_TARGET_NEOVERSE_V1:
self.cfg.update('configopts', '-DGMX_SIMD=ARM_NEON_ASIMD')
print_msg("Avoiding use of SVE instructions for GROMACS %s by using ARM_NEON_ASIMD as GMX_SIMD value", self.version)
else:
raise EasyBuildError("GROMACS-specific hook triggered for non-GROMACS easyconfig?!")


def pre_configure_hook_openblas_optarch_generic(self, *args, **kwargs):
"""
Pre-configure hook for OpenBLAS: add DYNAMIC_ARCH=1 to build/test/install options when using --optarch=GENERIC
Expand Down Expand Up @@ -597,8 +612,8 @@ def post_sanitycheck_cuda(self, *args, **kwargs):
full_path = os.path.join(dir_path, filename)
# we only really care about real files, i.e. not symlinks
if not os.path.islink(full_path):
# check if the current file is part of the allowlist
basename = os.path.splitext(filename)[0]
# check if the current file name stub is part of the allowlist
basename = filename.split('.')[0]
if basename in allowlist:
self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path)
else:
Expand Down Expand Up @@ -665,6 +680,7 @@ def inject_gpu_property(ec):
}

PRE_CONFIGURE_HOOKS = {
'GROMACS': pre_configure_hook_gromacs,
'libfabric': pre_configure_hook_libfabric_disable_psm3_x86_64_generic,
'MetaBAT': pre_configure_hook_metabat_filtered_zlib_dep,
'OpenBLAS': pre_configure_hook_openblas_optarch_generic,
Expand Down
3 changes: 3 additions & 0 deletions eessi-2023.06-known-issues.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
- FFTW.MPI-3.3.10-gompi-2023b:
- issue: https://github.com/EESSI/software-layer/issues/325
- info: "Flaky FFTW tests, random failures"
- GROMACS-2024.1-foss-2023b:
- issue: https://github.com/EESSI/software-layer/issues/557
- info: "SVE disabled due to known bug which causes test failures"
- Highway-1.0.3-GCCcore-12.2.0.eb:
- issue: https://github.com/EESSI/software-layer/issues/469
- info: "failing SVE test due to wrong expected value"
Expand Down
7 changes: 4 additions & 3 deletions init/arch_specs/eessi_arch_x86.spec
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# x86_64 CPU architecture specifications
# Software path in EESSI | Vendor ID | List of defining CPU features
"x86_64/intel/haswell" "GenuineIntel" "avx2 fma" # Intel Haswell, Broadwell
"x86_64/intel/haswell" "GenuineIntel" "avx2 fma" # Intel Haswell, Broadwell
"x86_64/intel/skylake_avx512" "GenuineIntel" "avx2 fma avx512f avx512bw avx512cd avx512dq avx512vl" # Intel Skylake, Cascade Lake
"x86_64/amd/zen2" "AuthenticAMD" "avx2 fma" # AMD Rome
"x86_64/amd/zen3" "AuthenticAMD" "avx2 fma vaes" # AMD Milan, Milan-X
"x86_64/amd/zen2" "AuthenticAMD" "avx2 fma" # AMD Rome
"x86_64/amd/zen3" "AuthenticAMD" "avx2 fma vaes" # AMD Milan, Milan-X
"x86_64/amd/zen4" "AuthenticAMD" "avx2 fma vaes avx512f avx512ifma" # AMD Genoa, Genoa-X
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/amd/zen4:x86_64/amd/zen3:x86_64/amd/zen2:x86_64/generic
27 changes: 27 additions & 0 deletions tests/archdetect/x86_64/amd/zen4/Azure-Alma8-9V33X.cpuinfo
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
processor : 0
vendor_id : AuthenticAMD
cpu family : 25
model : 17
model name : AMD EPYC 9V33X 96-Core Processor
stepping : 1
microcode : 0xffffffff
cpu MHz : 3705.853
cache size : 1024 KB
physical id : 0
siblings : 88
core id : 0
cpu cores : 88
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 13
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl tsc_reliable nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm cmp_legacy svm cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw topoext perfctr_core invpcid_single vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid avx512f avx512dq rdseed adx smap avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves avx512_bf16 clzero xsaveerptr arat npt nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold v_vmsave_vmload avx512vbmi umip avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq rdpid fsrm
bugs : sysret_ss_attrs null_seg spectre_v1 spectre_v2 spec_store_bypass
bogomips : 5100.08
TLB size : 3584 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 48 bits physical, 48 bits virtual
power management:
1 change: 1 addition & 0 deletions tests/archdetect/x86_64/amd/zen4/Azure-Alma8-9V33X.output
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/amd/zen4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/amd/zen4:x86_64/amd/zen3:x86_64/amd/zen2:x86_64/generic
27 changes: 27 additions & 0 deletions tests/archdetect/x86_64/amd/zen4/Shinx-RHEL8-9654.cpuinfo
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
processor : 0
vendor_id : AuthenticAMD
cpu family : 25
model : 17
model name : AMD EPYC 9654 96-Core Processor
stepping : 1
microcode : 0xa10113e
cpu MHz : 3699.993
cache size : 1024 KB
physical id : 0
siblings : 96
core id : 0
cpu cores : 96
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 16
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 invpcid_single hw_pstate ssbd mba perfmon_v2 ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx avx512ifma clflushopt clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local avx512_bf16 clzero irperf xsaveerptr wbnoinvd amd_ppin cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq la57 rdpid overflow_recov succor smca fsrm flush_l1d
bugs : sysret_ss_attrs spectre_v1 spectre_v2 spec_store_bypass
bogomips : 4799.99
TLB size : 3584 4K pages
clflush size : 64
cache_alignment : 64
address sizes : 52 bits physical, 57 bits virtual
power management: ts ttp tm hwpstate cpb eff_freq_ro [13] [14]
1 change: 1 addition & 0 deletions tests/archdetect/x86_64/amd/zen4/Shinx-RHEL8-9654.output
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
x86_64/amd/zen4

0 comments on commit 96e7bb6

Please sign in to comment.