Skip to content

Commit

Permalink
Gaea C6 support for UFSWM (#2448)
Browse files Browse the repository at this point in the history
* UFSWM - Gaea C6 Support

---------

Co-authored-by: JONG KIM <jong.kim@noaa.gov>
Co-authored-by: Ratko Vasic <ratko.vasic@noaa.gov>
  • Loading branch information
3 people authored Dec 18, 2024
1 parent 6ec6b45 commit e119370
Show file tree
Hide file tree
Showing 47 changed files with 1,372 additions and 785 deletions.
File renamed without changes.
File renamed without changes.
4 changes: 4 additions & 0 deletions cmake/configure_gaeac6.intel.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
set(PARALLEL_NETCDF ON CACHE BOOL "Enable parallel NetCDF" FORCE)
set(MOM6_Extra_FORTRAN_FLAGS "-xsse2")
set(HYCOM_Extra_FORTRAN_FLAGS "-xSSE4.2")
set(HYCOM_Extra_C_FLAGS "-xSSE4.2")
2 changes: 2 additions & 0 deletions cmake/configure_gaeac6.intelllvm.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
set(PARALLEL_NETCDF ON CACHE BOOL "Enable parallel NetCDF" FORCE)
set(AVX2 OFF CACHE BOOL "Enable AVX2 instruction set" FORCE)
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ help([[
on the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea ]===])
whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea C5 ]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

Expand All @@ -30,4 +30,4 @@ unload("cray-libsci")
setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaea.intel")
setenv("CMAKE_Platform","gaeac5.intel")
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ help([[
on the NOAA RDHPC machine Gaea C5 using Intel-2023.1.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea ]===])
whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea C5]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

Expand Down Expand Up @@ -36,4 +36,4 @@ setenv("I_MPI_F90", "ifx")
setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaea.intelllvm")
setenv("CMAKE_Platform","gaeac5.intelllvm")
33 changes: 33 additions & 0 deletions modulefiles/ufs_gaeac6.intel.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
help([[
This module loads libraries required for building and running UFS Weather Model
on the NOAA RDHPC machine Gaea C6 using Intel-2023.2.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea C6]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.29"
load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver))

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
load(pathJoin("stack-python", stack_python_ver))

cmake_ver=os.getenv("cmake_ver") or "3.23.1"
load(pathJoin("cmake", cmake_ver))

load("ufs_common")

nccmp_ver=os.getenv("nccmp_ver") or "1.9.0.1"
load(pathJoin("nccmp", nccmp_ver))

unload("darshan-runtime")
unload("cray-libsci")

setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaeac6.intel")
40 changes: 40 additions & 0 deletions modulefiles/ufs_gaeac6.intelllvm.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
help([[
This module loads libraries required for building and running UFS Weather Model
on the NOAA RDHPC machine Gaea C6 using Intel-2023.2.0.
]])

whatis([===[Loads libraries needed for building the UFS Weather Model on Gaea C6]===])

prepend_path("MODULEPATH", "/ncrc/proj/epic/spack-stack/c6/spack-stack-1.6.0/envs/fms-2024.01/install/modulefiles/Core")

stack_intel_ver=os.getenv("stack_intel_ver") or "2023.2.0"
load(pathJoin("stack-intel", stack_intel_ver))

stack_cray_mpich_ver=os.getenv("stack_cray_mpich_ver") or "8.1.29"
load(pathJoin("stack-cray-mpich", stack_cray_mpich_ver))

stack_python_ver=os.getenv("stack_python_ver") or "3.10.13"
load(pathJoin("stack-python", stack_python_ver))

cmake_ver=os.getenv("cmake_ver") or "3.23.1"
load(pathJoin("cmake", cmake_ver))

load("ufs_common")

nccmp_ver=os.getenv("nccmp_ver") or "1.9.0.1"
load(pathJoin("nccmp", nccmp_ver))

unload("darshan-runtime")
unload("cray-libsci")

unload("intel-classic/2023.2.0")
load("intel-oneapi/2023.2.0")

setenv("I_MPI_CC", "icx")
setenv("I_MPI_CXX", "icpx")
setenv("I_MPI_F90", "ifx")

setenv("CC","cc")
setenv("CXX","CC")
setenv("FC","ftn")
setenv("CMAKE_Platform","gaeac6.intelllvm")
16 changes: 4 additions & 12 deletions tests/compile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,10 @@ case ${MACHINE_ID} in
;;
*)
# Activate lua environment for gaea c5
if [[ ${MACHINE_ID} == gaea ]]; then
if [[ ${MACHINE_ID} == gaeac5 ]]; then
module reset
fi
if [[ ${MACHINE_ID} == gaeac6 ]]; then
module reset
elif [[ ${MACHINE_ID} == hercules ]]; then
module purge
Expand Down Expand Up @@ -97,17 +100,6 @@ SUITES=$(grep -Po "\-DCCPP_SUITES=\K[^ ]*" <<< "${MAKE_OPT}")
export SUITES
set -ex

# Valid applications
if [[ ${MACHINE_ID} != gaea ]] || [[ ${RT_COMPILER} != intelllvm ]]; then # skip MOM6SOLO on gaea with intelllvm
if [[ "${MAKE_OPT}" == *"-DAPP=S2S"* ]]; then
CMAKE_FLAGS+=" -DMOM6SOLO=ON"
fi

if [[ "${MAKE_OPT}" == *"-DAPP=NG-GODAS"* ]]; then
CMAKE_FLAGS+=" -DMOM6SOLO=ON"
fi
fi

CMAKE_FLAGS=$(set -e; trim "${CMAKE_FLAGS}")
echo "CMAKE_FLAGS = ${CMAKE_FLAGS}"

Expand Down
22 changes: 21 additions & 1 deletion tests/default_vars.sh
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ elif [[ ${MACHINE_ID} = s4 ]]; then
export ICE_tasks_cpl_bmrk=48
export WAV_tasks_cpl_bmrk=80

elif [[ ${MACHINE_ID} = gaea ]]; then
elif [[ ${MACHINE_ID} = gaeac5 ]]; then

export TPN=128

Expand All @@ -321,6 +321,26 @@ elif [[ ${MACHINE_ID} = gaea ]]; then
export WPG_cpl_atmw_gdas=24
export WAV_tasks_atmw_gdas=264

elif [[ ${MACHINE_ID} = gaeac6 ]]; then

export TPN=192

export INPES_dflt=3
export JNPES_dflt=8
export INPES_thrd=3
export JNPES_thrd=4
export INPES_c384=6
export JNPES_c384=8
export THRD_c384=1
export INPES_c768=8
export JNPES_c768=16
export THRD_c768=2

export THRD_cpl_atmw_gdas=3
export INPES_cpl_atmw_gdas=6
export JNPES_cpl_atmw_gdas=8
export WPG_cpl_atmw_gdas=24
export WAV_tasks_atmw_gdas=264
elif [[ ${MACHINE_ID} = derecho ]]; then

export TPN=128
Expand Down
15 changes: 10 additions & 5 deletions tests/detect_machine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ case $(hostname -f) in
dlogin0[1-9].dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood01-9
dlogin10.dogwood.wcoss2.ncep.noaa.gov) MACHINE_ID=wcoss2 ;; ### dogwood10

gaea5[1-8]) MACHINE_ID=gaea ;; ### gaea51-58
gaea5[1-8].ncrc.gov) MACHINE_ID=gaea ;; ### gaea51-58
gaea5[1-8]) MACHINE_ID=gaeac5 ;; ### gaea51-58
gaea5[1-8].ncrc.gov) MACHINE_ID=gaeac5 ;; ### gaea51-58
gaea6[1-8]) MACHINE_ID=gaeac6 ;; ### gaea61-68
gaea6[1-8].ncrc.gov) MACHINE_ID=gaeac6 ;; ### gaea61-68

hfe0[1-9]) MACHINE_ID=hera ;; ### hera01-09
hfe1[0-2]) MACHINE_ID=hera ;; ### hera10-12
Expand Down Expand Up @@ -94,9 +96,12 @@ elif [[ -d /work ]]; then
else
MACHINE_ID=orion
fi
elif [[ -d /gpfs && -d /ncrc ]]; then
# We are on GAEA.
MACHINE_ID=gaea
elif [[ -d /gpfs/f5 && -d /ncrc ]]; then
# We are on GAEA C5.
MACHINE_ID=gaeac5
elif [[ -d /gpfs/f6 && -d /ncrc ]]; then
# We are on GAEA C6.
MACHINE_ID=gaeac6
elif [[ -d /data/prod ]]; then
# We are on SSEC's S4
MACHINE_ID=s4
Expand Down
File renamed without changes.
22 changes: 22 additions & 0 deletions tests/fv3_conf/compile_slurm.IN_gaeac6
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash -l
#SBATCH -e err
#SBATCH -o out
#SBATCH --account=@[ACCNR]
##SBATCH --qos=@[QUEUE]
#SBATCH --clusters=es
#SBATCH --partition=eslogin_c6
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=8
#SBATCH --mem-per-cpu=4G
#SBATCH --time=180
#SBATCH --job-name="@[JBNME]"

set -eux

echo -n " $( date +%s )," > job_timestamp.txt
echo "Compile started: " `date`

@[PATHRT]/compile.sh @[MACHINE_ID] "@[MAKE_OPT]" @[COMPILE_ID] @[RT_COMPILER]

echo "Compile ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
File renamed without changes.
49 changes: 49 additions & 0 deletions tests/fv3_conf/fv3_slurm.IN_gaeac6
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/bin/bash -l
#SBATCH -e err
#SBATCH -o out
#SBATCH --job-name="@[JBNME]"
#SBATCH --account=@[ACCNR]
#SBATCH --qos=@[QUEUE]
#SBATCH --clusters=c6
#SBATCH --partition=batch
#SBATCH --nodes=@[NODES]
#SBATCH --ntasks-per-node=@[TPN]
#SBATCH --time=@[WLCLK]

set -eux
echo -n " $( date +%s )," > job_timestamp.txt

set +x
MACHINE_ID=gaeac6
source ./module-setup.sh
module use --prepend $PWD/modulefiles
module load modules.fv3
module list
set -x

echo "Model started: " `date`

export OMP_NUM_THREADS=@[THRD]
export OMP_STACKSIZE=1024M
export NC_BLKSZ=1M
export ESMF_RUNTIME_PROFILE=ON
export ESMF_RUNTIME_PROFILE_OUTPUT="SUMMARY"
export FI_VERBS_PREFER_XRC=0
export FI_CXI_RX_MATCH_MODE=hybrid
export COMEX_EAGER_THRESHOLD=65536
export FI_CXI_RDZV_THRESHOLD=65536

# Avoid job errors because of filesystem synchronization delays
sync && sleep 1

# This "if" block is part of the rt.sh self-tests in error-test.conf. It emulates the model failing to run.
if [ "${JOB_SHOULD_FAIL:-NO}" = WHEN_RUNNING ] ; then
echo "The job should abort now, with exit status 1." 1>&2
echo "If error checking is working, the metascheduler should mark the job as failed." 1>&2
false
fi

srun --label -n @[TASKS] ./fv3.exe

echo "Model ended: " `date`
echo -n " $( date +%s )," >> job_timestamp.txt
Loading

0 comments on commit e119370

Please sign in to comment.