From b9070cae1d28834799b7977f0f914c4bb83ac1e9 Mon Sep 17 00:00:00 2001 From: <> Date: Fri, 27 Oct 2023 20:20:30 +0000 Subject: [PATCH] Deployed 56b13d1 with MkDocs version: 1.5.3 --- .nojekyll | 0 404.html | 4413 ++++++++ Documentation/Accounts/index.html | 4428 ++++++++ .../Applications/Matlab/batch/index.html | 4622 +++++++++ Documentation/Applications/Matlab/index.html | 4558 +++++++++ .../Matlab/interactive/index.html | 4616 +++++++++ .../Applications/Matlab/parallel/index.html | 4830 +++++++++ .../Applications/Matlab/versions/index.html | 4602 +++++++++ Documentation/Applications/Plexos/index.html | 4573 +++++++++ .../Applications/Plexos/run_plexos/index.html | 4774 +++++++++ .../Plexos/setup_plexos/index.html | 4776 +++++++++ .../applications_template/index.html | 4606 +++++++++ .../Templates/berkeleygw_example/index.html | 4691 +++++++++ Documentation/Applications/ansys/index.html | 4681 +++++++++ Documentation/Applications/comsol/index.html | 4690 +++++++++ Documentation/Applications/fenics/index.html | 4700 +++++++++ Documentation/Applications/gams/index.html | 4734 +++++++++ .../Applications/gaussian/index.html | 4734 +++++++++ Documentation/Applications/gurobi/index.html | 4605 +++++++++ Documentation/Applications/lammps/index.html | 4611 +++++++++ Documentation/Applications/namd/index.html | 4428 ++++++++ Documentation/Applications/qchem/index.html | 4607 +++++++++ Documentation/Applications/starccm/index.html | 4565 +++++++++ Documentation/Applications/vasp/index.html | 5310 ++++++++++ Documentation/Applications/wrf/index.html | 5270 ++++++++++ .../File-Permissions/index.html | 4679 +++++++++ .../Development/Build_Tools/cmake/index.html | 4576 +++++++++ .../Development/Build_Tools/git/index.html | 4798 +++++++++ .../Development/Build_Tools/spack/index.html | 5254 ++++++++++ .../Development/Compilers/aocc/index.html | 4428 ++++++++ .../Development/Compilers/cray/index.html | 4428 ++++++++ .../Development/Compilers/gnu/index.html | 4428 ++++++++ .../Development/Compilers/intel/index.html | 4428 ++++++++ .../Development/Compilers/nvhpc/index.html | 4428 ++++++++ .../Compilers/rosetta_stone/index.html | 6872 +++++++++++++ .../Containers/apptainer/index.html | 5013 +++++++++ .../Development/Containers/index.html | 4571 +++++++++ .../Containers/registries/index.html | 4875 +++++++++ .../Containers/singularity/index.html | 4640 +++++++++ .../Debug_Tools/ARM/ddt/index.html | 4428 ++++++++ .../Development/Debug_Tools/ARM/index.html | 4433 ++++++++ .../Development/Debug_Tools/gdb/index.html | 4625 +++++++++ .../Development/Debug_Tools/index.html | 4433 ++++++++ Documentation/Development/Jupyter/index.html | 4969 +++++++++ .../Development/Jupyter/jupyterhub/index.html | 4610 +++++++++ .../Languages/Fortran/f90_advanced/index.html | 8981 +++++++++++++++++ .../Development/Languages/Fortran/index.html | 4738 +++++++++ .../Development/Languages/Julia/index.html | 4611 +++++++++ .../Julia/julia_ccall_pycall/index.html | 4776 +++++++++ .../Languages/Julia/julia_install/index.html | 4794 +++++++++ .../Languages/Julia/julia_parallel/index.html | 5351 ++++++++++ .../Languages/Julia/julia_tour/index.html | 5690 +++++++++++ .../Languages/Python/dask/index.html | 4765 +++++++++ .../Development/Languages/Python/index.html | 4535 +++++++++ .../Development/Languages/bash/index.html | 4725 +++++++++ .../Development/Languages/c++/index.html | 4667 +++++++++ .../Development/Languages/idl/index.html | 4428 ++++++++ .../Development/Languages/r/index.html | 5000 +++++++++ .../Development/Libraries/fftw/index.html | 4509 +++++++++ .../Development/Libraries/hdf5/index.html | 4510 +++++++++ .../Development/Libraries/howto/index.html | 4823 +++++++++ .../Development/Libraries/hsl/index.html | 4890 +++++++++ .../Development/Libraries/index.html | 4510 +++++++++ .../Development/Libraries/ipopt/index.html | 4702 +++++++++ .../Development/Libraries/lapack/index.html | 4512 +++++++++ .../Development/Libraries/libsci/index.html | 4521 +++++++++ .../Development/Libraries/mkl/index.html | 4611 +++++++++ .../Development/Libraries/netcdf/index.html | 4428 ++++++++ .../Development/Libraries/petsc/index.html | 4522 +++++++++ .../Performance_Tools/Intel/index.html | 4614 +++++++++ .../Performance_Tools/Intel/trace/index.html | 4428 ++++++++ .../Performance_Tools/Intel/vtune/index.html | 4428 ++++++++ .../Performance_Tools/Linaro-Forge/index.html | 4520 +++++++++ .../Linaro-Forge/map/index.html | 4683 +++++++++ .../Linaro-Forge/performance_rep/index.html | 4602 +++++++++ .../Performance_Tools/craypat/index.html | 4428 ++++++++ .../Performance_Tools/hpctoolkit/index.html | 4428 ++++++++ .../Development/Performance_Tools/index.html | 4428 ++++++++ .../Development/Programming_Models/index.html | 4433 ++++++++ .../Programming_Models/mpi/index.html | 5088 ++++++++++ .../Programming_Models/openacc/index.html | 4428 ++++++++ .../Programming_Models/openmp/index.html | 4428 ++++++++ .../Building_Packages/acquire/index.html | 4534 +++++++++ .../config_make_install/index.html | 4570 +++++++++ .../Environment/Building_Packages/index.html | 4554 +++++++++ .../Building_Packages/modules/index.html | 4560 +++++++++ .../Customization/conda/index.html | 4961 +++++++++ .../Customization/containers/index.html | 4428 ++++++++ .../Customization/images/conda_logo.png | Bin 0 -> 127639 bytes .../Customization/spack/index.html | 4428 ++++++++ Documentation/Environment/lmod/index.html | 4428 ++++++++ Documentation/Environment/shell/index.html | 4999 +++++++++ .../Containerized_TensorFlow/index.html | 4613 +++++++++ .../images/ppo_rew_comparison.png | Bin 0 -> 542518 bytes .../images/tensorboard-initpag-2.png | Bin 0 -> 188903 bytes .../images/tensorboard-initpage.png | Bin 0 -> 211351 bytes .../Reinforcement_Learning/index.html | 5140 ++++++++++ .../Machine_Learning/TensorBoard/index.html | 4550 +++++++++ Documentation/Machine_Learning/index.html | 4601 +++++++++ .../Transferring_Files/FileZilla/index.html | 4603 +++++++++ .../file-transfers/index.html | 4542 +++++++++ .../Transferring_Files/globus/index.html | 4736 +++++++++ .../Transferring_Files/index.html | 4622 +++++++++ .../Transferring_Files/winscp/index.html | 4601 +++++++++ .../Managing_Data/file_permissions/index.html | 4876 +++++++++ Documentation/Managing_Data/mss/index.html | 4644 +++++++++ Documentation/Slurm/batch_jobs/index.html | 4987 +++++++++ Documentation/Slurm/index.html | 4529 +++++++++ .../Slurm/interactive_jobs/index.html | 4675 +++++++++ .../Slurm/monitor_and_control/index.html | 4854 +++++++++ .../Slurm/multiple_sub_jobs/index.html | 4701 +++++++++ .../Running/debugging_at_scale/index.html | 4555 +++++++++ .../Running/eagle_job_priorities/index.html | 4660 +++++++++ .../Eagle/Running/eagle_partitions/index.html | 4644 +++++++++ .../Eagle/Running/example_sbatch/index.html | 4598 +++++++++ .../Systems/Eagle/Running/index.html | 4554 +++++++++ .../Running/monitor_and_control/index.html | 4626 +++++++++ Documentation/Systems/Eagle/index.html | 4561 +++++++++ .../Eagle/lustrebestpractices/index.html | 4747 +++++++++ .../Systems/Eagle/modules/index.html | 4621 +++++++++ .../Environments/Toolchains/intel/index.html | 4946 +++++++++ .../Systems/Kestrel/Environments/index.html | 4916 +++++++++ .../Kestrel/Environments/tutorial/index.html | 4971 +++++++++ .../Systems/Kestrel/filesystems/index.html | 4708 +++++++++ .../getting_started_kestrel/index.html | 4910 +++++++++ Documentation/Systems/Kestrel/index.html | 4587 +++++++++ .../Systems/Kestrel/modules/index.html | 4428 ++++++++ .../Systems/Kestrel/running/index.html | 4691 +++++++++ .../Systems/Swift/applications/index.html | 4468 ++++++++ .../Systems/Swift/filesystems/index.html | 4677 +++++++++ Documentation/Systems/Swift/index.html | 4570 +++++++++ Documentation/Systems/Swift/known/index.html | 4428 ++++++++ .../Systems/Swift/modules/index.html | 4578 +++++++++ .../Systems/Swift/running/index.html | 5280 ++++++++++ .../Systems/Vermilion/applications/index.html | 4441 ++++++++ .../Systems/Vermilion/filesystems/index.html | 4500 +++++++++ .../Systems/Vermilion/help/index.html | 4467 ++++++++ Documentation/Systems/Vermilion/index.html | 4576 +++++++++ .../Systems/Vermilion/known/index.html | 4428 ++++++++ .../Systems/Vermilion/modules/index.html | 4633 +++++++++ .../Systems/Vermilion/running/index.html | 5098 ++++++++++ Documentation/Systems/index.html | 4592 +++++++++ Documentation/Viz_Analytics/avizo/index.html | 4634 +++++++++ Documentation/Viz_Analytics/index.html | 4578 +++++++++ .../Viz_Analytics/paraview/index.html | 4928 +++++++++ .../Viz_Analytics/virtualgl_fastx/index.html | 4852 +++++++++ Documentation/Viz_Analytics/visit/index.html | 4600 +++++++++ Documentation/getting_started/index.html | 4679 +++++++++ Documentation/help/index.html | 4617 +++++++++ Documentation/index.html | 4544 +++++++++ Friendly_User/Applications/vasp/index.html | 4543 +++++++++ .../Environments/Toolchains/intel/index.html | 4701 +++++++++ Friendly_User/Environments/index.html | 4850 +++++++++ .../Environments/tutorial/index.html | 4797 +++++++++ Friendly_User/Getting_Started/index.html | 4505 +++++++++ .../Getting_Started/known_issues/index.html | 4428 ++++++++ _includes/analytics.html | 8 + _includes/calendar.html | 1 + ...placement-mate-interface-step5-offsite.png | Bin 0 -> 17405 bytes ...agle-dav-ssh-login-fastx-cleaned-step3.png | Bin 0 -> 22170 bytes .../images/FastX/eagle-dav-step4-offsite.png | Bin 0 -> 12212 bytes .../images/FastX/fastx-installer-image-1.png | Bin 0 -> 26585 bytes .../FastX/xfce-interface-cleaned-step5.png | Bin 0 -> 12285 bytes .../Julia-Calling-Python-C-Tutorial_29_0.svg | 314 + .../Julia-Calling-Python-C-Tutorial_30_0.svg | 202 + assets/images/Profiling/MAP-1.png | Bin 0 -> 530377 bytes assets/images/Profiling/MAP-2.png | Bin 0 -> 842781 bytes assets/images/Profiling/MAP-3.png | Bin 0 -> 505364 bytes assets/images/Profiling/MAP-4.png | Bin 0 -> 762741 bytes assets/images/Profiling/MAP-5.png | Bin 0 -> 771547 bytes assets/images/Profiling/MAP-6.png | Bin 0 -> 863017 bytes assets/images/Profiling/MAP-7.png | Bin 0 -> 530377 bytes assets/images/Profiling/PR-1.png | Bin 0 -> 260423 bytes assets/images/Profiling/PR-2.png | Bin 0 -> 486877 bytes assets/images/VisIT/eagle-14.png | Bin 0 -> 21237 bytes assets/images/VisIT/eagle-5a.png | Bin 0 -> 70766 bytes assets/images/VisIT/eagle-5b.png | Bin 0 -> 42197 bytes assets/images/VisIT/eagle-6.png | Bin 0 -> 25501 bytes assets/images/VisIT/eagle-8.png | Bin 0 -> 94448 bytes assets/images/VisIT/eagle-9.png | Bin 0 -> 26881 bytes .../VisIT/eagle-software-visit-step7.png | Bin 0 -> 38646 bytes assets/images/favicon.png | Bin 0 -> 15102 bytes assets/images/gpu_ai_benchmark.png | Bin 0 -> 249972 bytes assets/images/output_4_0.png | Bin 0 -> 14781 bytes assets/javascripts/bundle.aecac24b.min.js | 29 + assets/javascripts/bundle.aecac24b.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 ++++++++++++ .../workers/search.f886a092.min.js | 42 + .../workers/search.f886a092.min.js.map | 7 + assets/stylesheets/main.4b4a2bd9.min.css | 1 + assets/stylesheets/main.4b4a2bd9.min.css.map | 1 + assets/stylesheets/palette.356b1318.min.css | 1 + .../stylesheets/palette.356b1318.min.css.map | 1 + blog/2020-12-01-numba/index.html | 4506 +++++++++ blog/2021-05-06-tf/index.html | 4507 +++++++++ blog/2021-06-18-srun/index.html | 5163 ++++++++++ .../index.html | 4512 +++++++++ .../index.html | 4519 +++++++++ blog/2022-10-04-python2to3/index.html | 4681 +++++++++ blog/2022-12-19-windows_ssh/index.html | 4503 +++++++++ .../index.html | 4522 +++++++++ index.html | 4612 +++++++++ search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes stylesheets/extra.css | 36 + 239 files changed, 759706 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 Documentation/Accounts/index.html create mode 100644 Documentation/Applications/Matlab/batch/index.html create mode 100644 Documentation/Applications/Matlab/index.html create mode 100644 Documentation/Applications/Matlab/interactive/index.html create mode 100644 Documentation/Applications/Matlab/parallel/index.html create mode 100644 Documentation/Applications/Matlab/versions/index.html create mode 100644 Documentation/Applications/Plexos/index.html create mode 100644 Documentation/Applications/Plexos/run_plexos/index.html create mode 100644 Documentation/Applications/Plexos/setup_plexos/index.html create mode 100644 Documentation/Applications/Templates/applications_template/index.html create mode 100644 Documentation/Applications/Templates/berkeleygw_example/index.html create mode 100644 Documentation/Applications/ansys/index.html create mode 100644 Documentation/Applications/comsol/index.html create mode 100644 Documentation/Applications/fenics/index.html create mode 100644 Documentation/Applications/gams/index.html create mode 100644 Documentation/Applications/gaussian/index.html create mode 100644 Documentation/Applications/gurobi/index.html create mode 100644 Documentation/Applications/lammps/index.html create mode 100644 Documentation/Applications/namd/index.html create mode 100644 Documentation/Applications/qchem/index.html create mode 100644 Documentation/Applications/starccm/index.html create mode 100644 Documentation/Applications/vasp/index.html create mode 100644 Documentation/Applications/wrf/index.html create mode 100644 Documentation/Data-and-File-Systems/File-Permissions/index.html create mode 100644 Documentation/Development/Build_Tools/cmake/index.html create mode 100644 Documentation/Development/Build_Tools/git/index.html create mode 100644 Documentation/Development/Build_Tools/spack/index.html create mode 100644 Documentation/Development/Compilers/aocc/index.html create mode 100644 Documentation/Development/Compilers/cray/index.html create mode 100644 Documentation/Development/Compilers/gnu/index.html create mode 100644 Documentation/Development/Compilers/intel/index.html create mode 100644 Documentation/Development/Compilers/nvhpc/index.html create mode 100644 Documentation/Development/Compilers/rosetta_stone/index.html create mode 100644 Documentation/Development/Containers/apptainer/index.html create mode 100644 Documentation/Development/Containers/index.html create mode 100644 Documentation/Development/Containers/registries/index.html create mode 100644 Documentation/Development/Containers/singularity/index.html create mode 100644 Documentation/Development/Debug_Tools/ARM/ddt/index.html create mode 100644 Documentation/Development/Debug_Tools/ARM/index.html create mode 100644 Documentation/Development/Debug_Tools/gdb/index.html create mode 100644 Documentation/Development/Debug_Tools/index.html create mode 100644 Documentation/Development/Jupyter/index.html create mode 100644 Documentation/Development/Jupyter/jupyterhub/index.html create mode 100644 Documentation/Development/Languages/Fortran/f90_advanced/index.html create mode 100644 Documentation/Development/Languages/Fortran/index.html create mode 100644 Documentation/Development/Languages/Julia/index.html create mode 100644 Documentation/Development/Languages/Julia/julia_ccall_pycall/index.html create mode 100644 Documentation/Development/Languages/Julia/julia_install/index.html create mode 100644 Documentation/Development/Languages/Julia/julia_parallel/index.html create mode 100644 Documentation/Development/Languages/Julia/julia_tour/index.html create mode 100644 Documentation/Development/Languages/Python/dask/index.html create mode 100644 Documentation/Development/Languages/Python/index.html create mode 100644 Documentation/Development/Languages/bash/index.html create mode 100644 Documentation/Development/Languages/c++/index.html create mode 100644 Documentation/Development/Languages/idl/index.html create mode 100644 Documentation/Development/Languages/r/index.html create mode 100644 Documentation/Development/Libraries/fftw/index.html create mode 100644 Documentation/Development/Libraries/hdf5/index.html create mode 100644 Documentation/Development/Libraries/howto/index.html create mode 100644 Documentation/Development/Libraries/hsl/index.html create mode 100644 Documentation/Development/Libraries/index.html create mode 100644 Documentation/Development/Libraries/ipopt/index.html create mode 100644 Documentation/Development/Libraries/lapack/index.html create mode 100644 Documentation/Development/Libraries/libsci/index.html create mode 100644 Documentation/Development/Libraries/mkl/index.html create mode 100644 Documentation/Development/Libraries/netcdf/index.html create mode 100644 Documentation/Development/Libraries/petsc/index.html create mode 100644 Documentation/Development/Performance_Tools/Intel/index.html create mode 100644 Documentation/Development/Performance_Tools/Intel/trace/index.html create mode 100644 Documentation/Development/Performance_Tools/Intel/vtune/index.html create mode 100644 Documentation/Development/Performance_Tools/Linaro-Forge/index.html create mode 100644 Documentation/Development/Performance_Tools/Linaro-Forge/map/index.html create mode 100644 Documentation/Development/Performance_Tools/Linaro-Forge/performance_rep/index.html create mode 100644 Documentation/Development/Performance_Tools/craypat/index.html create mode 100644 Documentation/Development/Performance_Tools/hpctoolkit/index.html create mode 100644 Documentation/Development/Performance_Tools/index.html create mode 100644 Documentation/Development/Programming_Models/index.html create mode 100644 Documentation/Development/Programming_Models/mpi/index.html create mode 100644 Documentation/Development/Programming_Models/openacc/index.html create mode 100644 Documentation/Development/Programming_Models/openmp/index.html create mode 100644 Documentation/Environment/Building_Packages/acquire/index.html create mode 100644 Documentation/Environment/Building_Packages/config_make_install/index.html create mode 100644 Documentation/Environment/Building_Packages/index.html create mode 100644 Documentation/Environment/Building_Packages/modules/index.html create mode 100644 Documentation/Environment/Customization/conda/index.html create mode 100644 Documentation/Environment/Customization/containers/index.html create mode 100644 Documentation/Environment/Customization/images/conda_logo.png create mode 100644 Documentation/Environment/Customization/spack/index.html create mode 100644 Documentation/Environment/lmod/index.html create mode 100644 Documentation/Environment/shell/index.html create mode 100644 Documentation/Machine_Learning/Containerized_TensorFlow/index.html create mode 100644 Documentation/Machine_Learning/Reinforcement_Learning/images/ppo_rew_comparison.png create mode 100644 Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpag-2.png create mode 100644 Documentation/Machine_Learning/Reinforcement_Learning/images/tensorboard-initpage.png create mode 100644 Documentation/Machine_Learning/Reinforcement_Learning/index.html create mode 100644 Documentation/Machine_Learning/TensorBoard/index.html create mode 100644 Documentation/Machine_Learning/index.html create mode 100644 Documentation/Managing_Data/Transferring_Files/FileZilla/index.html create mode 100644 Documentation/Managing_Data/Transferring_Files/file-transfers/index.html create mode 100644 Documentation/Managing_Data/Transferring_Files/globus/index.html create mode 100644 Documentation/Managing_Data/Transferring_Files/index.html create mode 100644 Documentation/Managing_Data/Transferring_Files/winscp/index.html create mode 100644 Documentation/Managing_Data/file_permissions/index.html create mode 100644 Documentation/Managing_Data/mss/index.html create mode 100644 Documentation/Slurm/batch_jobs/index.html create mode 100644 Documentation/Slurm/index.html create mode 100644 Documentation/Slurm/interactive_jobs/index.html create mode 100644 Documentation/Slurm/monitor_and_control/index.html create mode 100644 Documentation/Slurm/multiple_sub_jobs/index.html create mode 100644 Documentation/Systems/Eagle/Running/debugging_at_scale/index.html create mode 100644 Documentation/Systems/Eagle/Running/eagle_job_priorities/index.html create mode 100644 Documentation/Systems/Eagle/Running/eagle_partitions/index.html create mode 100644 Documentation/Systems/Eagle/Running/example_sbatch/index.html create mode 100644 Documentation/Systems/Eagle/Running/index.html create mode 100644 Documentation/Systems/Eagle/Running/monitor_and_control/index.html create mode 100644 Documentation/Systems/Eagle/index.html create mode 100644 Documentation/Systems/Eagle/lustrebestpractices/index.html create mode 100644 Documentation/Systems/Eagle/modules/index.html create mode 100644 Documentation/Systems/Kestrel/Environments/Toolchains/intel/index.html create mode 100644 Documentation/Systems/Kestrel/Environments/index.html create mode 100644 Documentation/Systems/Kestrel/Environments/tutorial/index.html create mode 100644 Documentation/Systems/Kestrel/filesystems/index.html create mode 100644 Documentation/Systems/Kestrel/getting_started_kestrel/index.html create mode 100644 Documentation/Systems/Kestrel/index.html create mode 100644 Documentation/Systems/Kestrel/modules/index.html create mode 100644 Documentation/Systems/Kestrel/running/index.html create mode 100644 Documentation/Systems/Swift/applications/index.html create mode 100644 Documentation/Systems/Swift/filesystems/index.html create mode 100644 Documentation/Systems/Swift/index.html create mode 100644 Documentation/Systems/Swift/known/index.html create mode 100644 Documentation/Systems/Swift/modules/index.html create mode 100644 Documentation/Systems/Swift/running/index.html create mode 100644 Documentation/Systems/Vermilion/applications/index.html create mode 100644 Documentation/Systems/Vermilion/filesystems/index.html create mode 100644 Documentation/Systems/Vermilion/help/index.html create mode 100644 Documentation/Systems/Vermilion/index.html create mode 100644 Documentation/Systems/Vermilion/known/index.html create mode 100644 Documentation/Systems/Vermilion/modules/index.html create mode 100644 Documentation/Systems/Vermilion/running/index.html create mode 100644 Documentation/Systems/index.html create mode 100644 Documentation/Viz_Analytics/avizo/index.html create mode 100644 Documentation/Viz_Analytics/index.html create mode 100644 Documentation/Viz_Analytics/paraview/index.html create mode 100644 Documentation/Viz_Analytics/virtualgl_fastx/index.html create mode 100644 Documentation/Viz_Analytics/visit/index.html create mode 100644 Documentation/getting_started/index.html create mode 100644 Documentation/help/index.html create mode 100644 Documentation/index.html create mode 100644 Friendly_User/Applications/vasp/index.html create mode 100644 Friendly_User/Environments/Toolchains/intel/index.html create mode 100644 Friendly_User/Environments/index.html create mode 100644 Friendly_User/Environments/tutorial/index.html create mode 100644 Friendly_User/Getting_Started/index.html create mode 100644 Friendly_User/Getting_Started/known_issues/index.html create mode 100644 _includes/analytics.html create mode 100644 _includes/calendar.html create mode 100644 assets/images/FastX/eagle-dav-replacement-mate-interface-step5-offsite.png create mode 100644 assets/images/FastX/eagle-dav-ssh-login-fastx-cleaned-step3.png create mode 100644 assets/images/FastX/eagle-dav-step4-offsite.png create mode 100644 assets/images/FastX/fastx-installer-image-1.png create mode 100644 assets/images/FastX/xfce-interface-cleaned-step5.png create mode 100644 assets/images/Julia/Julia-Calling-Python-C-Tutorial_29_0.svg create mode 100644 assets/images/Julia/Julia-Calling-Python-C-Tutorial_30_0.svg create mode 100644 assets/images/Profiling/MAP-1.png create mode 100644 assets/images/Profiling/MAP-2.png create mode 100644 assets/images/Profiling/MAP-3.png create mode 100644 assets/images/Profiling/MAP-4.png create mode 100644 assets/images/Profiling/MAP-5.png create mode 100644 assets/images/Profiling/MAP-6.png create mode 100644 assets/images/Profiling/MAP-7.png create mode 100644 assets/images/Profiling/PR-1.png create mode 100644 assets/images/Profiling/PR-2.png create mode 100644 assets/images/VisIT/eagle-14.png create mode 100644 assets/images/VisIT/eagle-5a.png create mode 100644 assets/images/VisIT/eagle-5b.png create mode 100644 assets/images/VisIT/eagle-6.png create mode 100644 assets/images/VisIT/eagle-8.png create mode 100644 assets/images/VisIT/eagle-9.png create mode 100644 assets/images/VisIT/eagle-software-visit-step7.png create mode 100644 assets/images/favicon.png create mode 100644 assets/images/gpu_ai_benchmark.png create mode 100644 assets/images/output_4_0.png create mode 100644 assets/javascripts/bundle.aecac24b.min.js create mode 100644 assets/javascripts/bundle.aecac24b.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.f886a092.min.js create mode 100644 assets/javascripts/workers/search.f886a092.min.js.map create mode 100644 assets/stylesheets/main.4b4a2bd9.min.css create mode 100644 assets/stylesheets/main.4b4a2bd9.min.css.map create mode 100644 assets/stylesheets/palette.356b1318.min.css create mode 100644 assets/stylesheets/palette.356b1318.min.css.map create mode 100644 blog/2020-12-01-numba/index.html create mode 100644 blog/2021-05-06-tf/index.html create mode 100644 blog/2021-06-18-srun/index.html create mode 100644 blog/2022-02-02-Changes_to_Slurm_srun_for_interactive_jobs/index.html create mode 100644 blog/2022-03-05-local-io-performance/index.html create mode 100644 blog/2022-10-04-python2to3/index.html create mode 100644 blog/2022-12-19-windows_ssh/index.html create mode 100644 blog/2023-01-10-using_specific_module_versions_on_hpc/index.html create mode 100644 index.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 stylesheets/extra.css diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/404.html b/404.html new file mode 100644 index 000000000..0e8a829eb --- /dev/null +++ b/404.html @@ -0,0 +1,4413 @@ + + + +
+ + + + + + + + + + + + + + +Learn how to run MATLAB software in batch mode
+Below is an example MATLAB script, matlabTest.m, that creates and populates a +vector using a simple for-loop and writes the result to a binary file, +x.dat. The shell script matlabTest.sb can be passed to the scheduler to run the +job in batch (non-interactive) mode.
+To try the example out, create both matlabTest.sb and matlabTest.m files in an
+appropriate directory, cd
to that directory, and call sbatch:
$ sbatch matlabTest.sb
+
Note
+Note: MATLAB comprises many independently licensed components, and in your work +it might be necessary to wait for multiple components to become +available. Currently, the scheduler does not handle this automatically. Because +of this, we strongly recommend using compiled MATLAB code for batch processing.
+Calling squeue
should show that your job is queued:
JOBID PARTITION NAME USER ST TIME NODES NODELIST(REASON)
+<JobID> <partition> matlabTe username PD 0:00 1 (<reason>)
+
Once the job has finished, the standard output is saved in a file called
+slurm-<JobID>.out
, standard error to slurm-<JobID>.out
, and the binary file
+x.dat
contains the result of the MATLAB script.
--account=<account_string>
flag must include a valid account string or
+ the job will encounter a permanent hold (it will appear in the queue but will
+ never run). For more information, see user
+ accounts.$SLURM_SUBMIT_DIR
is set by the scheduler to the
+ directory from which the sbatch command was executed, e.g., /scratch/$USER.
+ In this example, it is also the directory into which MATLAB will write the
+ output file x.dat.matlabTest.sb
+#!/bin/bash --login
+#SBATCH --time=05:00 # Maximum time requested for job (5 min.)
+#SBATCH --nodes=1 # Number of nodes
+#SBATCH --job-name=matlabTest # Name of job
+#SBATCH --account=<account_string> # Program-based WCID (account string associated with job)
+
+module purge
+module load matlab
+
+# execute code
+cd $SLURM_SUBMIT_DIR # Change directories (output will save here)
+matlab -nodisplay -r matlabTest # Run the MATLAB script
+
matlabTest.m
+format long
+xmin = 2;
+xmax = 10;
+x = zeros(xmax-xmin+1,1);
+for i = xmin:xmax
+ display(i);
+ x(i-xmin+1) = i
+end
+savefile = 'x.dat';
+save(savefile,'x','-ASCII')
+exit
+
Learn how to use MATLAB software on the NREL HPC systems.
+Details on how to run MATLAB scripts in batch mode. Steps are illustrated by a +simple example.
+How to run interactively using either a terminal or FastX.
+Toolbox used to run parallel MATLAB code on a single, multi-core compute +node. Use of the toolbox is demonstrated via a parallel "hello world" example +and a Monte Carlo example that leverages MATLAB's parfor command.
+Learn about the MATLAB software versions and licenses available for use.
+If you're an NREL user, on GitHub view MATLAB presentations and code +examples.
+For all users, see a summary PowerPoint deck on the MATLAB Compiler, MATLAB +Coder, and MATLAB Engine for +Python.
+ + + + + + + + +Learn how to run MATLAB software interactively on NREL HPC systems.
+To run MATLAB interactively there are two ways to proceed: you can choose to start an +interactive job and use a basic MATLAB terminal (no GUI), or you can use the GUI +with a FastX session on a DAV +node. For information on how to connect to NREL HPC systems, see System +Connection.
+After connecting to the login node, the next step is to start an interactive job. For example, the following command gets a user-selected number of nodes for interactive use, taking as input tasks per node, job duration, and account.
+$ salloc --nodes=<number of nodes> --ntasks-per-node=<tasks per node> --account=<your account here> --time=<desired time>
+
When your job starts, you will have a shell on a compute node.
+Note
+--account=<handle>
flag
+ and include a valid project allocation handle. For more information, see
+ User Accounts.From the shell on the compute node, the next steps are to load the MATLAB module +to set up your user environment, which includes setting the location of the +license server,
+$ module load matlab
+
and starting a simple MATLAB terminal (no GUI),
+$ matlab -nodisplay
+
For instructions on starting a FastX session on a DAV node, see the FastX +page. Once you have +started a FastX session and have access to a terminal, load the MATLAB module to +set up your user environment, which includes setting the location of the license +server,
+$ module load matlab
+
and start the MATLAB GUI,
+$ matlab &
+
With FastX, this will enable you to use the GUI as if MATLAB was running +directly on your laptop. The ampersand "&" lets MATLAB run as a background job +so the terminal is freed up for other uses.
+ + + + + + + + +Learn how to use the Parallel Computing Toolbox (PCT) with MATLAB software on the NREL HPC systems.
+Note
+Due to an issue with the scheduler and software licenses, we strongly recommend +the use of compiled MATLAB code for batch processing. Using the PCT with MATLAB +in batch mode may lead to failed jobs due to unavailability of licenses.
+PCT provides the simplest way for users to run parallel MATLAB code on a single, +multi-core compute node. Here, we describe how to configure your local MATLAB +settings to utilize the PCT and provide some basic examples of running parallel +code on NREL HPC systems.
+For more extensive examples of PCT usage and code examples, see the MathWorks +documentation.
+Configuration of the PCT is done most easily through the interactive +GUI. However, the opening of parallel pools can be significantly slower in +interactive mode than in non-interactive (batch) mode. For this reason, the +interactive GUI will only be used to set up your local configuration. Runtime +examples will include batch scripts that submit jobs directly to the scheduler.
+To configure your local parallel settings, start an interactive MATLAB session +with X11 forwarding (see Running Interactive Jobs on +Kestrel and Environment +Modules on the Kestrel System). Open MATLAB +R2023a and do the following:
+For various reasons, you might not have 104 workers available at runtime. In this +case, MATLAB will just use the largest number available.
+Note
+Specifying the number of tasks for an interactive job (i.e., using salloc
+--ntasks-per-node=<n>
to start your interactive job) will interfere with parallel
+computing toolbox. We recommend not specifying the number of tasks.
Here we demonstrate how to use the PCT on a single compute node on NREL HPC systems. Learn +how to open a local parallel pool with some examples of how to use it for +parallel computations. Because the opening of parallel pools can be extremely +slow in interactive sessions, the examples here will be restricted to +non-interactive (batch) job submission.
+Note
+Each example below will check out one "MATLAB" and one +"Distrib_Computing_Toolbox" license at runtime.
+In this example, a parallel pool is opened and each worker identifies itself via
+spmd
("single program
+multiple data"). Create the MATLAB script helloWorld.m:
% open the local cluster profile
+p = parcluster('Processes');
+
+% open the parallel pool, recording the time it takes
+tic;
+parpool(p); % open the pool
+fprintf('Opening the parallel pool took %g seconds.\n', toc)
+
+% "single program multiple data"
+spmd
+ fprintf('Worker %d says Hello World!\n', labindex)
+end
+
+delete(gcp); % close the parallel pool
+exit
+
To run the script on a compute node, create the file helloWorld.sb:
+#!/bin/bash
+#SBATCH --time=05:00
+#SBATCH --nodes=1
+#SBATCH --job-name=helloWorld
+#SBATCH --account=<account_string>
+
+# load modules
+module purge
+module load matlab/R2023a
+
+# define an environment variable for the MATLAB script and output
+BASE_MFILE_NAME=helloWorld
+MATLAB_OUTPUT=${BASE_MFILE_NAME}.out
+
+# execute code
+cd $SLURM_SUBMIT_DIR
+matlab -nodisplay -r $BASE_MFILE_NAME > $MATLAB_OUTPUT
+
where, again, the fields in < >
must be properly specified. Finally, at the
+terminal prompt, submit the job to the scheduler:
$ sbatch helloWorld.sb
+
The output file helloWorld.out should contain messages about the parallel pool +and a "Hello World" message from each of the available workers.
+MATLAB's parfor
+("parallel for-loop") can be used to parallelize tasks that require no
+communication between workers. In this example, the aim is to solve a stiff,
+one-parameter system of ordinary differential equations (ODE) for different
+(randomly sampled) values of the parameter and to compare the compute time when
+using serial and parfor loops. This is a quintessential example of Monte Carlo
+simulation that is suitable for parfor: the solution for each value of the
+parameter is time-consuming to compute but can be computed independently of the
+other values.
First, create a MATLAB function stiffODEfun.m that defines the right-hand side +of the ODE system:
+function dy = stiffODEfun(t,y,c)
+ % This is a modified example from MATLAB's documentation at:
+ % http://www.mathworks.com/help/matlab/ref/ode15s.html
+ % The difference here is that the coefficient c is passed as an argument.
+ dy = zeros(2,1);
+ dy(1) = y(2);
+ dy(2) = c*(1 - y(1)^2)*y(2) - y(1);
+end
+
Second, create a driver file stiffODE.m that samples the input parameter and +solves the ODE using the ode15s function.
+%{
+ This script samples a parameter of a stiff ODE and solves it both in
+ serial and parallel (via parfor), comparing both the run times and the
+ max absolute values of the computed solutions. The code -- especially the
+ serial part -- will take several minutes to run on Eagle.
+%}
+
+% open the local cluster profile
+p = parcluster('Processes');
+
+% open the parallel pool, recording the time it takes
+time_pool = tic;
+parpool(p);
+time_pool = toc(time_pool);
+fprintf('Opening the parallel pool took %g seconds.\n', time_pool)
+
+% create vector of random coefficients on the interval [975,1050]
+nsamples = 10000; % number of samples
+coef = 975 + 50*rand(nsamples,1); % randomly generated coefficients
+
+% compute solutions within serial loop
+time_ser = tic;
+y_ser = cell(nsamples,1); % cell to save the serial solutions
+for i = 1:nsamples
+ if mod(i,10)==0
+ fprintf('Serial for loop, i = %d\n', i);
+ end
+ [~,y_ser{i}] = ode15s(@(t,y) stiffODEfun(t,y,coef(i)) ,[0 10000],[2 0]);
+end
+time_ser = toc(time_ser);
+
+% compute solutions within parfor
+time_parfor = tic;
+y_par = cell(nsamples,1); % cell to save the parallel solutions
+err = zeros(nsamples,1); % vector of errors between serial and parallel solutions
+parfor i = 1:nsamples
+ if mod(i,10)==0
+ fprintf('Parfor loop, i = %d\n', i);
+ end
+ [~,y_par{i}] = ode15s(@(t,y) stiffODEfun(t,y,coef(i)) ,[0 10000],[2 0]);
+ err(i) = norm(y_par{i}-y_ser{i}); % error between serial and parallel solutions
+end
+time_parfor = toc(time_parfor);
+time_par = time_parfor + time_pool;
+
+% print results
+fprintf('RESULTS\n\n')
+fprintf('Serial time : %g\n', time_ser)
+fprintf('Parfor time : %g\n', time_par)
+fprintf('Speedup : %g\n\n', time_ser/time_par)
+fprintf('Max error between serial and parallel solutions = %e\n', max(abs(err)))
+
+% close the parallel pool
+delete(gcp)
+exit
+
Finally, create the batch script stiffODE.sb:
+#!/bin/bash
+#SBATCH --time=20:00
+#SBATCH --nodes=1
+#SBATCH --job-name=stiffODE
+#SBATCH --account=<account_string>
+
+# load modules
+module purge
+module load matlab/R2023a
+
+# define environment variables for MATLAB script and output
+BASE_MFILE_NAME=stiffODE
+MATLAB_OUTPUT=${BASE_MFILE_NAME}.out
+
+# execute code
+cd $SLURM_SUBMIT_DIR
+matlab -nodisplay -r $BASE_MFILE_NAME > MATLAB_OUTPUT
+
Next, submit the job (which will take several minutes to complete):
+$ sbatch stiffODE.sb
+
If the code executed correctly, the end of the text file stiffODE.out should +contain the times needed to compute the solutions in serial and parallel as well +as the error between the serial and parallel solutions (which should be +0!). There should be a significant speed-up — how much depends on the runtime +environment — for the parallelized computation.
+ + + + + + + + +Learn about the MATLAB software versions and licenses available for the NREL HPC systems.
+The latest version available on NREL HPC systems is R2023a.
+MATLAB is proprietary software. As such, users have access to a limited number +of licenses both for the base MATLAB software as well as some specialized +toolboxes.
+To see which toolboxes are available, regardless of how they are licensed, start +an interactive MATLAB session and run:
+>> ver
+
For a comprehensive list of available MATLAB-related licenses (including those not under active maintenance, such as the Database Toolbox), as +well as their current availability, run the following terminal command:
+$ lmstat.matlab
+
Among other things, you should see the following:
+Feature usage info:
+
+Users of MATLAB: (Total of 6 licenses issued; Total of ... licenses in use)
+
+Users of Compiler: (Total of 1 license issued; Total of ... licenses in use)
+
+Users of Distrib_Computing_Toolbox: (Total of 4 licenses issued; Total of ... licenses in use)
+
+Users of MATLAB_Distrib_Comp_Engine: (Total of 16 licenses issued; Total of ... licenses in use)
+
This documentation only covers the base MATLAB package and the Parallel +Computing Toolbox, which check out the "MATLAB" and "Distrib_Computing_Toolbox" +licenses, respectively.
+ + + + + + + + +PLEXOS is a simulation software for modeling electric, gas, and water systems for optimizing energy markets.
+Users can run PLEXOS models on NREL's computing clusters. However, users need to build the PLEXOS models on a Windows system as there is no GUI available on the clusters and on Linux in general
+Kestrel | +Eagle | +Swift | +Vermilion | +
---|---|---|---|
+ | plexos/8.300R09 | ++ | + |
+ | plexos/9.000R07 | ++ | + |
plexos/9.000R09 | +plexos/9.000R09 | +plexos/9.000R09 | ++ |
+ | plexos/9.200R05 | ++ | + |
plexos/9.200R06 | ++ | + | + |
Info
+A user can only run PLEXOS with Gurobi solvers at this time. Please set up your model accordingly.
+Please follow the setup instructions before running the examples. Example scripts for new users are available within the master branch.
+Note
+Sometimes newer modules may be availabe in a test
directory which is hidden by default from the general user base. This obscured release is done to iron out any bugs that may arise during the installation and use of the module while avoiding breaking users existing jobs and workflows. You can use these test modules by running
module use /nopt/nrel/apps/modules/test/modulefiles
+module avail
+
This should display all of the test modules available in addition to the defaults. We encourage you to reach out to us at HPC-Help@nrel.gov for access if you would like access to these modules.
+We will load the requisite modules for running PLEXOS 9.2R06 for this example. Please see the module compatibility chart for loading the correct modules
+module load gurobi/10.0.2
+module load plexos/9.200R06
+
Recall that we can only use the Gurobi solver while running the PLEXOS on the NREL cluster. Now that we have the modules loaded, PLEXOS can be called as follows
+$PLEXOS/PLEXOS64 -n 5_bus_system_v2.xml -m 2024_yr_15percPV_MT_Gurobi
+
The command above assumes that we are running the model 2024_yr_15percPV_MT_Gurobi
from file 5_bus_system_v2.xml
. PLEXOS 9.0RX requires validating user-credentials for a local
+PLEXOS account for each run. Therefore, if we ran the above command in an interactive session, we would need to enter the following username and password
username : nrelplexos
+password : Nr3lplex0s
+
Fortunately, we can bypass the prompt for a local PLEXOS account username and password (useful for slurm batch jobs) by passing them as command line arguments as follows.
+$PLEXOS/PLEXOS64 -n 5_bus_system_v2.xml -m 2024_yr_15percPV_MT_Gurobi -cu nrelplexos -cp Nr3lplex0s
+
Warning
+Not providing the username and password in batch jobs WILL cause your jobs to fail.
+The example scripts are available here. Please clone the repository to run those examples.
+Note
+The slurm output files generated by PLEXOS may not load correctly because of special characters that PLEXOS output introduces. To remove thoses special characters, open the slurm output file and run the following command
+# On PC
+:%s/<CTRL-2>//g
+# On Mac
+%s/<CTRL-SHIFT-2>//g
+
<CTRL-2>
or <CTRL-SHIFT-2>
should generate the symbol ^@
that is messing up the output. Please refer to this stack exchange post for further information
The basic functionality test is the same as the example run in the section above. We will
+# Request an interactive session on the cluster
+salloc -N 1 --account=<your_hpc_allocation_name> --time=1:00:00 --partition=debug
+
+# Go to the working directory that contains the 5_bus_system_v2.xml example
+cd /to/you/XML/file/
+
+# Load the requisite modules
+module load gurobi/10.0.2
+module load plexos/9.200R06
+
+# Finally run the PLEXOS executable
+$PLEXOS/PLEXOS64 -n 5_bus_system_v2.xml -m 2024_yr_15percPV_MT_Gurobi -cu nrelplexos -cp Nr3lplex0s
+
We will run the same example by submitting the job to the SLURM queue. This example uses the batch file submit_simple.sh
. In order to run this example as is, run the following commands
# SSH into Kestrel or your cluster of choice
+ssh $USER@kestrel.hpc.nrel.gov
+
+# Clone the HPC master branch in your scratch folder
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go to the appropriate folder and submit the job on the HPC
+cd HPC/applications/plexos/RunFiles
+sbatch -A account_name --mail-user=your.email@nrel.gov submit_simple.sh
+
This builds upon the previous example where it tries to run the same model as before, but adds redundancy where the job doesn't fail if a license is not found. The submission script submit_enhanced.sh
attempts to re-run the job after waiting 120 seconds for each attempt.
# Skip this if you already have the repo cloned in your scratch directory
+ssh $USER@kestrel.hpc.nrel.gov
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go into the appropriate directory
+cd /scratch/${USER}/HPC/applications/plexos/RunFiles
+sbatch -A account_name --mail-user=your.email@nrel.gov submit_enhanced.sh
+
This example demonstrates how to submit multiple PLEXOS jobs. The model names are present in a file called models.txt
. submit_multiple.sh
is simply a wrapper that calls the batch file submit_plexos.sh
.
# Skip this if you already have the repo cloned in your scratch directory
+ssh $USER@kestrel.hpc.nrel.gov
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go into the appropriate directory
+cd /scratch/${USER}/HPC/applications/plexos/RunFiles
+./submit_multiple.sh 5_bus_system_v2.xml models.txt
+
This example demonstrates the use of SLURM job arrays to run multiple PLEXOS jobs using the script submit_job_array.sh
# Skip this if you already have the repo cloned in your scratch directory
+ssh $USER@kestrel.hpc.nrel.gov
+cd /scratch/${USER}/
+git clone git@github.com:NREL/HPC.git
+
+# Go into the appropriate directory
+cd /scratch/${USER}/HPC/applications/plexos/RunFiles
+export filename=5_bus_system_v2 # Export the XML dataset name
+export models.txt # Export the file that contains the models names within the XML dataset
+sbatch -A account_name -t 5 --mail-user=your.email@nrel.gov --array=1-4 submit_job_array.sh
+
Info
+A user can only run PLEXOS with Gurobi solvers on the clusters at this time. Please set up your model accordingly.
+PLEXOS XML model files can only run with the Gurobi solver specified while creating the models. The most common combinations you may encounter are
+PLEXOS Module | +Gurobi Module | +
---|---|
plexos/9.000R09 | +gurobi/9.5.1 | +
plexos/9.200R05 | +gurobi/10.0.1 | +
plexos/9.200R06 | +gurobi/10.0.2 | +
Please contact us if you encounter any issues or require a newer version.
+Before we can run PLEXOS, we need to create a license file on the cluster. For this, run the following commands with some minor modifications
+mkdir -p ~/.config/PLEXOS
+echo '<?xml version="1.0"?>
+<XmlRegistryRoot>
+ <comms>
+ <licServer_IP val="10.60.3.188" />
+ <licServer_CommsPort val="399" />
+ <licServer_IP_Secondary />
+ <connect>
+ <PrimaryServer_Port />
+ <SecondaryServer_Port />
+ </connect>
+ <licServer_CommsPort_Secondary />
+ <LastLicTypeUsed val="server" />
+ </comms>
+ <server>
+ <licServer_LogFolder val="/tmp/" />
+ <licServer_LogEvents val="true" />
+ </server>
+ <proxy_cred>
+ <proxy_ip val="" />
+ <proxy_port val="" />
+ <proxy_uname val="" />
+ <proxy_pass val="" />
+ </proxy_cred>
+ <BannedList>
+ <BanListedMachines val="true" />
+ </BannedList>
+ <ProductUpdates>
+ <LastUpdateDate val="10/10/2021 13:11:10" />
+ </ProductUpdates>
+ <UserName />
+ <Company />
+ <UserEmail />
+ <CompanyCode />
+ <LicenseServerRequestCount />
+</XmlRegistryRoot>' > ~/.config/PLEXOS/EE_reg.xml
+
module purge
+module load conda
+conda create -n plex1 r-essentials
+
module purge
+module load comp-intel intel-mpi mkl conda
+conda activate plex1
+
Install additional R libraries using conda +
conda install r-doParallel
+conda install r-RSQLite
+conda install r-testthat
+conda install r-covr
+
Note
+Most of the R libraries should be added as part of the initial install, but keep an eye out for the following packages.
+Info
+See below if you wish to use your own version of R and Python for PLEXOS.
+We need to install one, rplexos
library from source. To do this, execute the following commands
+
mkdir /home/$USER/temporary
+cd /home/$USER/temporary
+git clone https://github.com/NREL/rplexos.git
+cd rplexos
+CXX=`which icpc` R CMD INSTALL .
+
Note
+rplexos
needs to be built using an Intel compiler and R always wishes to build libraries using the same compilers that was used in its creation. If setting CXX=which icpc
shown above does not work, we need to fool R by renaming the intel C++ compiler using a symbolic link. This is a hack and should only be used if the above way of installation fails. In order for the hack run the following after replacing username in the 3rd line with your own username.
+
ln -s `which icpc` x86_64-conda_cos6-linux-gnu-c++
+export PATH=`pwd`:$PATH
+Rscript -e "install.packages('/home/username/temporary/rplexos/',repos=NULL,type='source')"
+rm x86_64-conda_cos6-linux-gnu-c++
+
For some PLEXOS examples, we need to install an additional package called plexos-coad
. For this run the following
+
cd /scratch/$USER
+git clone https://github.com/Computational-Energy/plexos-coad.git
+cd plexos-coad
+
+#patch for python 3.9
+tofix=`grep -lr getchild`
+for f in $tofix ; do sed -i3x "s/for el_data in elem.getchildren()/for el_data in list\(elem\)/" $f ; done
+pip install Cython
+python setup.py install
+
Finally make sure we have numpy and pandas in the plex1
conda environment.
+
pip install numpy pandas
+
If you have successfully followed all the instructions in the previous subsection and installed PLEXOS, you can simply load the following modules and activate the conda environment
+module purge
+module load comp-intel intel-mpi mkl conda
+conda activate plex1
+
This section is in regards to Point 3 in setting up the PLEXOS environment. +The following R libraries will need to be installed manually in this case.
+install.packages("data.table")
+install.packages("DBI")
+install.packages("dbplyr")
+install.packages("doParallel")
+install.packages("dplyr")
+install.packages("foreach")
+install.packages("lubridate")
+install.packages("magrittr")
+install.packages("parallel")
+install.packages("Rcpp")
+install.packages("RSQLite")
+install.packages("stringi")
+install.packages("tidyr")
+install.packages("knitr")
+install.packages("testthat")
+install.packages("ggplot2")
+install.packages("covr")
+install.packages("tidyverse")
+
After installing the above, follow the remainder of the installation starting with +point 4.
+ + + + + + + + +Documentation: link to documentation
+Write a brief description of the program here. Keep the italics.
+This section provides the minimum amount of information necessary to successfully run a basic job on an NREL Cluster. +This information should be as complete and self-contained as possible.
+Instructions should be step-by-step and include copy-and-pastable commands where applicable.
+For example, describe how the user can load the program module with module avail
and module load
:
module avail program
+ program/2.0.0 program/1.0.0
+
module load program/2.0.0
+
Include a section on how to run the job, e.g., with job script examples or commands for an interactive session.
+#!/bin/bash
+
+# In a comment summarize the hardware requested, e.g. number of nodes,
+ # number of tasks per node, and number of threads per task
+
+#SBATCH --time=
+#SBATCH --nodes=
+#SBATCH --ntasks-per-node=
+#SBATCH --cpus-per-task=
+#SBATCH --partition=
+#SBATCH --account=
+
+# include a section of relevant export and module load commands, e.g.:
+
+module load gcc/8.4.0
+
+export OMP_NUM_THREADS=
+
+# include a sample srun command or similar
+srun program.x
+
If the submit script for Vermillion differs from Kestrel, then include a Vermillion example script here.
+If the submit script does not differ, then remove this section (starting from the ??? example "Vermillion"
line)
If the submit script for Swift differs from Kestrel, then include a Swift example script here.
+If the submit script does not differ, then remove this section (starting from the ??? example "Swift"
line)
Here's a template of a collapsible example.
+You can include blocked sections
+
And unblocked sections.
+Note
+You can use a note to draw attention to information.
+Include instructions on how to submit the job script
+Kestrel | +Swift | +Vermillion | +
---|---|---|
0.0.0 | +0.0.0 | +0.0.0 | +
Include advanced user information about the code here (see BerkeleyGW page for some examples)
+One common "advanced case" might be that users want to build their own version of the code.
+Here, give detailed and step-by-step instructions on how to build the code, if this step is necessary. Include detailed instructions for how to do it on each applicable HPC system. Be explicit in your instructions. Ideally a user reading one of the build sections can follow along step-by-step +and have a functioning build by the end.
+If building from source is not something anyone would reasonably want to do, remove this section.
+Be sure to include where the user can download the source code
+Include here, for example, a Kestrel-specific makefile (see berkeleygw example page). This template assumes that we build the code with only one toolchain, which may not be the case. If someone might reasonably want to build with multiple toolchains, use the "Multiple toolchain instructions on Kestrel" template instead.
+Include relevant commands in blocks.
+
blocks
+Be sure to state how to set-up the necessary environment, e.g.:
+module load gcc/8.4.0
+module load openmpi/3.1.6/gcc-8.4.0
+module load hdf5/1.10.6/gcc-ompi
+
Give instructions on compile commands. E.g., to view the available make targets, type make
. To compile all program executables, type:
make cleanall
+make all
+
information on how to build on Vermillion
+information on how to build on Swift
+Include known problems and workarounds here, if applicable
+ + + + + + + + +Documentation: BerkeleyGW
+BerkeleyGW is a massively parallel many-body perturbation theory code capable of performing RPA, GW, and GW-BSE calculations, which can be used to investigate properties of materials with high accuracy.
+This section provides the minimum amount of information needed to run a BerkeleyGW job on an NREL cluster.
+First, see which versions of BerkeleyGW are available with module avail
and load your preferred version with module load
:
module avail berkeleygw
+ berkeleygw/3.0.1-cpu berkeleygw/3.0.1-gpu
+
module avail berkeleygw
command shows that two BerkeleyGW modules are available. To select the GPU-enabled version of BerkeleyGW, for example, we use the module load
command:
+module load berkeleygw/3.0.1-gpu
+
Next, create a job script. Below are example job scripts for the available NREL systems. Continuing the above example, we would select the "Kestrel GPU" example script.
+#!/bin/bash
+
+# This job requests 72 MPI tasks across 2 nodes (36 tasks/node) and no threading
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=36
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module load berkeleygw/3.0.1-cpu
+
+srun epsilon.cplx.x
+
Put job example here
+Put job example here
+Put job example here
+Save the submit file as bgw.in, and submit with the command:
+sbatch bgw.in
Kestrel | +Swift | +Vermillion | +
---|---|---|
2.0, 3.0 | +0.0.0 | +0.0.0 | +
For large systems, the wavefunction binary file format yields significantly slower read-in times relative to an HDF5-format wavefunction file. The BerkeleyGW code includes utilities to convert wavefunction binary files to HDF5 format and vice-versa called hdf2wfn.x and wfn2hdf.x (see documentation). It is recommended to use HDF5-formatted wavefunction files where possible.
+BerkeleyGW supports wavefunction files in HDF5 format and binary format. Wavefunction inputs to BerkeleyGW can become large depending on the system under investigation. Large (TODO: define large for Kestrel. Probably > 10 GB) HDF5 wavefunction files benefit from Lustre file striping, and the BerkeleyGW code can see major runtime speed-ups when using this feature.
+Tip
+Binary format wavefunction files do not benefit from Lustre file striping
+For more on Lustre file striping, see (TODO: documentation section on Lustre file striping?)
+Because multiple executables in BerkeleyGW require the WFN input files (WFN and WFNq), we can streamline the file linking inside a submission script. We can also include the Lustre file striping step in our submission script. The below example script shows how this can be done for the BerkeleyGW epsilon executable.
+This script assumes you build your own version of BerkeleyGW. If not, remove the BGW=/path/to/where/you/built/BerkeleyGW/bin
and ln -s $BGW/epsilon.cplx.x .
lines.
Be sure to load the proper modules (see Getting Started if not building your own version.)
+#!/bin/bash
+#SBATCH -t 00:20:00
+#SBATCH -N 8
+#SBATCH --gpus-per-node=4
+#SBATCH -C gpu
+#SBATCH -o BGW_EPSILON_%j.out
+#SBATCH --account=
+
+BGW=/path/to/where/you/built/BerkeleyGW/bin
+WFN_folder=/path/to/folder/that/contains/WFN/and/WFNq
+
+mkdir BGW_EPSILON_$SLURM_JOBID
+lfs setstripe -c 60 BGW_EPSILON_$SLURM_JOBID
+cd BGW_EPSILON_$SLURM_JOBID
+ln -s $BGW/epsilon.cplx.x .
+ln -s ../epsilon.inp .
+ln -sfn ${WFN_folder}/WFNq.h5 .
+ln -sfn ${WFN_folder}/WFN.h5 ./WFN.h5
+
+ulimit -s unlimited
+export OMP_PROC_BIND=true
+export OMP_PLACES=threads
+export BGW_WFN_HDF5_INDEPENDENT=1
+
+export OMP_NUM_THREADS=16
+srun -n 32 -c 32 --cpu-bind=cores epsilon.cplx.x
+
This script will create a directory "BGW_EPSILON_$SLURM_JOBID" (where $SLURM_JOBID
will be a numeric ID), stripe the directory with a stripe count of 60, link the epsilon executable, WFNq, and WFN files to the directory, and run BerkeleyGW with 32 GPUs.
First, download BerkeleyGW.
+Then, follow the build instructions in the "building" drop-downs below for the cluster you will be running on.
+The following arch.mk
file was used to build BerkeleyGW-3.0 on Kestrel on (date).
+ Copy this arch.mk file into your BerkeleyGW directory.
COMPFLAG = -DGNU
+PARAFLAG = -DMPI -DOMP
+MATHFLAG = -DUSESCALAPACK -DUNPACKED -DUSEFFTW3 -DHDF5
+
+FCPP = /usr/bin/cpp -C
+F90free = mpifort -ffree-form -ffree-line-length-none -fopenmp -fno-second-underscore -cpp
+LINK = mpifort -fopenmp
+# FHJ: -funsafe-math-optimizations breaks Haydock and doesn't give any significant speedup
+FOPTS = -O3 -funroll-loops
+FNOOPTS = $(FOPTS)
+MOD_OPT = -J
+INCFLAG = -I
+
+C_PARAFLAG = -DPARA
+CC_COMP = mpiCC
+C_COMP = mpicc
+C_LINK = mpicc
+C_OPTS = -O3 -ffast-math
+C_DEBUGFLAG =
+
+REMOVE = /bin/rm -f
+
+# Math Libraries
+FFTWPATH = /projects/scatter/mylibraries_CentOS77/
+#/nopt/nrel/apps/fftw/3.3.3-impi-intel/
+#FFTWLIB = $(FFTWPATH)/lib/libfftw3.a
+FFTWLIB = $(FFTWPATH)/lib/libfftw3_omp.a $(FFTWPATH)/lib/libfftw3.a
+FFTWINCLUDE = $(FFTWPATH)/include
+
+LAPACKLIB = /projects/scatter/mylibraries_CentOS77/lib/libopenblas.a
+
+SCALAPACKLIB = /projects/scatter/mylibraries_CentOS77/lib/libscalapack.a
+
+HDF5PATH = /nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hdf5-1.10.6-dj4jq2ffttkdxksimqe47245ryklau4a
+HDF5LIB = ${HDF5PATH}/lib/libhdf5hl_fortran.a \
+ ${HDF5PATH}/lib/libhdf5_hl.a \
+ ${HDF5PATH}/lib/libhdf5_fortran.a \
+ ${HDF5PATH}/lib/libhdf5.a /home/ohull/.conda-envs/bgw/lib/libsz.a -lz -ldl
+HDF5INCLUDE = ${HDF5PATH}/include
+
+PERFORMANCE =
+
+TESTSCRIPT =
+
Then, load the following modules:
+module load gcc/8.4.0
+module load openmpi/3.1.6/gcc-8.4.0
+module load hdf5/1.10.6/gcc-ompi
+
Choose whether to use the real or complex flavor of BerkeleyGW by copying the corresponding file to flavor.mk. For example, for the complex version:
+cp flavor_cplx.mk flavor.mk
Finally, compile the code. To view the available make targets, type make
. To compile all BerkeleyGW executables, type:
+
make cleanall
+make all
+
TODO: add Swift build instructions
+Include known problems and workarounds here, if applicable
+ + + + + + + + +The NREL Computational Science Center (CSC) maintains an Ansys Fluent computational fluid dynamics (CFD) license pool for general use, including two seats of CFD (cfd_base
) and four Ansys HPC Packs (anshpc_pack
) to support running a model on many cores/parallel solves.
The main workflow that we support has two stages. The first is interactive graphical usage, e.g., for interactively building meshes or visualizing boundary geometry. For this, Ansys should be run on a FastX desktop. The second stage is batch (i.e., non-interactive) parallel processing, which should be run on compute nodes via a Slurm job script. Of course, if you have Ansys input from another location ready to run in batch mode, the first stage is not needed. We unfortunately cannot support running parallel jobs on the DAV nodes, nor launching parallel jobs from interactive sessions on compute nodes.
+Shared License Etiquette
+License usage can be checked on Eagle with the command lmstat.ansys
. Network floating licenses are a shared resource. Whenever you open an Ansys Fluent window, a license is pulled from the pool and becomes unavailable to other Eagle users. Please do not keep idle windows open if you are not actively using the application, close it and return the associated licenses to the pool. Excessive retention of software licenses falls under the inappropriate use policy.
GUI access is provided through FastX desktops. Open a terminal, load, and launch the Ansys Fluent environment with:
+module load ansys/<version>
+vglrun runwb2
+
where <version>
will be replaced with an Ansys version/release e.g., 2021R2
. Press tab
to auto-suggest all available versions. Because FastX desktop sessions are supported from DAV nodes shared between multiple HPC users, limits are placed on how much memory and compute resources can be consumed by a single user/job. For this reason, it is recommended that the GUI be primarily used to define the problem and run small-scale tests to validate its operation before moving the model to a compute node for larger-scale runs.
To launch Ansys Fluent jobs in parallel batch mode, you can build on the batch script presented below.
+#!/bin/bash
+...
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=36
+
+cd $SLURM_SUBMIT_DIR
+module purge # purge everything else
+module load ansys/<version>
+module load intel-mpi/2018.0.3
+...
+unset I_MPI_PMI_LIBRARY
+srun hostname -s | sort -V > myhosts.txt
+...
+fluent 2ddp -g -t $SLURM_NTASKS -cnf=myhosts.txt -mpi=intel -pinfiniband -i input_file.jou
+
Once this script file (assumed to be named ansys-job.slurm
) is saved, it can be submitted to the job scheduler with
[user@el3 ~]$ sbatch ansys-job.slurm
+
In this example batch script, 2ddp
can be replaced with the version of FLUENT your job requires (2d
, 3d
, 2ddp
, or 3ddp
), -g
specifies that the job should run without the GUI, -t
specifies the number of processors to use (in this example, 2 x 36 processors), -cnf
specifies the hosts file (the list of nodes allocated to this job), -mpi
and -p<...>
specify the MPI implementation and interconnect, respectively, and-i
is used to specify the job input file. Note that generally speaking the generation of the hostname file,myhosts.txt
, must be repeated in the beginning of each job since the allocated nodes will likely change for each run.
A Note on Licenses and Job Scaling
+HPC Pack licenses are used to distribute Ansys batch jobs to run in parallel across many compute cores. The HPC Pack model is designed to enable exponentially more computational resources per each additional license, roughly 2x4^(num_hpc_packs). A table summarizing this relationship is shown below.
+HPC Pack Licenses Used | +Total Cores Enabled | +
---|---|
0 | +4 (0 hpc_pack + 4 solver) |
+
1 | +12 (8 hpc_pack + 4 solver) |
+
2 | +36 (32 hpc_pack + 4 solver) |
+
3 | +132 (128 hpc_pack + 4 solver) |
+
4 | +516 (512 hpc_pack + 4 solver) |
+
Additionally, Fluent allows you to use up to four cores without consuming any of the HPC Pack licenses. When scaling these jobs to more than four cores, the four cores are added to the total amount made available by the HPC Pack licenses. For example, a batch job designed to completely fill a node with 36 cores requires one cfd_base
license and two HPC Pack licenses (32 + 4 cores enabled).
For information about accessing licenses beyond CSC's base capability, please contact Emily Cousineau.
+ + + + + + + + +COMSOL Multiphysics is a versatile finite element analysis and simulation package. The COMSOL graphical user interface (GUI) environment is supported primarily for building and solving small models while operation in batch mode allows users to scale their models to larger, higher-fidelity studies.
+Currently, we host three floating network licenses and a number of additional modules. Issue the command lmstat.comsol
to see current license status and COMSOL modules available.
Extensive documentation is available in the menu: Help > Documentation. For beginners, it is highly recommended to follow the steps in Introduction to COMSOL Multiphysics found in Help > Documentation.
+For instructional videos, see the COMSOL website Video Gallery.
+Before beginning, it is good practice to check the license status with:
+[user@el3 ~]$ lmstat.comsol
+
When licenses are available, COMSOL can be used by starting the COMSOL GUI which allows you to build models, run the COMSOL computational engine, and analyze results. The COMSOL GUI can be accessed through a FastX desktop by opening a terminal and running the following commands:
+[user@ed3 ~]$ module purge
+[user@ed3 ~]$ module load comsol/6.0
+[user@ed3 ~]$ vglrun comsol
+
Because FastX desktop sessions are supported from DAV nodes shared between multiple HPC users, limits are placed on how much memory and compute resources can be consumed by a single user/job. For this reason, it is recommended that the GUI be primarily used to define the problem and run small-scale tests to validate its operation before moving the model to a compute node for larger-scale runs. For jobs that require both large-scale compute resources and GUI interactivity simultaneously, there is partial support for running the GUI from an X-enabled shell (ssh -Y ...) on a compute node by replacing the vglrun comosl
command with:
[user@r1i7n24 ~]$ comsol -3drend sw
+
However, the performance may be slow and certain display features may behave unexpectedly.
+You can save your model built in FastX+GUI mode into a file such as myinputfile.mph
. Once that's available, the following job script shows how to run a single process multithreaded job in batch mode:
#!/bin/bash
+#SBATCH --job-name=comsol-batch-1proc
+#SBATCH --time=00:20:00
+#SBATCH --nodes=1
+#SBATCH --account=<your-allocation-id>
+#SBATCH --output=comsol-%j.out
+#SBATCH --error=comsol-%j.err
+
+# This helps ensure your job runs from the directory
+# from which you ran the sbatch command
+cd $SLURM_SUBMIT_DIR
+
+# Set up environment, and list to stdout for verification
+module purge
+module load comsol/6.0
+echo " "
+module list
+echo " "
+
+inputfile=$SLURM_SUBMIT_DIR/myinputfile.mph
+outputfile=$SLURM_SUBMIT_DIR/myoutputfilename
+logfile=$SLURM_SUBMIT_DIR/mylogfilename
+
+# Run a COMSOL job with 36 threads.
+# -np = number of threads per rank
+
+comsol batch -np 36 -inputfile $inputfile -outputfile $outputfile –batchlog $logfile
+
Once this script file (assumed to be named comsol-job.slurm
) is saved, it can be submitted to the job scheduler with
[user@el3 ~]$ sbatch comsol-job.slurm
+
To configure a COMSOL job with multiple MPI ranks, required for any job where the number of nodes >1, you can build on the following template:
+#!/bin/bash
+#SBATCH --job-name=comsol-batch-4proc
+#SBATCH --time=00:20:00
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=2
+#SBATCH --account=<your-allocation-id>
+#SBATCH --output=comsol-%j.out
+#SBATCH --error=comsol-%j.err
+
+# This helps ensure your job runs from the directory
+# from which you ran the sbatch command
+cd $SLURM_SUBMIT_DIR
+
+# Set up environment, and list to stdout for verification
+module purge
+module load comsol/6.0
+echo " "
+module list
+echo " "
+
+inputfile=$SLURM_SUBMIT_DIR/myinputfile.mph
+outputfile=$SLURM_SUBMIT_DIR/myoutputfilename
+logfile=$SLURM_SUBMIT_DIR/mylogfilename
+
+# Run a 2-node, 4-rank parallel COMSOL job with 18 threads for each rank.
+# -nn = total number of MPI ranks
+# -nnhost = number of MPI ranks per "host", here equivalent to node
+# -np = number of threads per rank
+
+comsol –nn 4 -nnhost 2 batch -np 18 -inputfile $inputfile -outputfile $outputfile –batchlog $logfile
+
The job script is submitted to the scheduler just the same as above for the single-process example. For jobs that require >1 node, this approach, which uses multiple MPI ranks, must be used. Note that in this case, we choose 4 MPI ranks, 2 per node, each using 18 threads to demonstrate the available submission options not any optimal performance recommendation. A different arrangement, e.g., -nn 2 --nnhost 1 batch -np 36
, which translates to 2 MPI ranks, 1 per node, each using 36 threads may perform better for your application. The optimal configuration depends on your particular problem and choice of solver, so some experimentation may be required.
Documentation: FEniCS 2019.1.0, FEniCSx
+FEniCS is a collection of open-source software components designed to enable the automated solution of differential equations by the finite element method.
+Note
+There are two version of FEniCS. The original FEniCS ended in 2019 with version 2019.1.0 and development began on a complete refactor known as FEniCSx. FEniCS 2019.1.0 is still actively used and the main focus of this documentation. Since FEniCSx is in pre-release, HPC support is a work in progress.
+FEniCS is organized as a collection of interoperable components that together form the FEniCS Project. These components include the problem-solving environment DOLFIN, the form compiler FFC, the finite element tabulator FIAT, the just-in-time compiler Instant, the form language UFL, and a range of additional components.
+FEniCS can be programmed both in C++ and Python, but Python programming is the simplest approach to exploring FEniCS and can give high performance.
+Currently, FEniCS is supported through Anaconda. Users are required to build their own FEniCS environment with the following commands after loading the conda/anaconda module (see Example Job Scripts):
+module load conda
+conda create -n myfenics -c conda-forge fenics matplotlib scipy jupyter
+
The packages matplotlib
, scipy
, and jupyter
are not required, but they are very handy to have.
These commands will create a new environment named myfenics
which contains all necessary packages as well as some commonly-used packages for programming FEniCS simulations. By default, this Conda environment will be installed in the directory /home/<username>/.conda-envs/myfenics
. It will take roughly 3 GB of storage. Please make sure you have enough storage quota in the home directory before installation by running the du -hs ~
command (which will take a minute or two to complete).
FEniCSx can also be installed via conda using: +
conda create -n myfenics -c conda-forge fenics-dolfinx
+
#!/bin/bash
+
+# This test file is designed to run the Poisson demo on one node with a 4 cores
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module purge
+module load conda
+
+# This is to prevent FEniCS from unnecessarily attempting to multi-thread
+export OMP_NUM_THREADS=1
+
+cd /scratch/USERNAME/poisson_demo/
+srun -n 4 python poisson_demo.py
+
#!/bin/bash
+
+# This test file is designed to run the Poisson demo on one node with a 4 cores
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module purge
+module load anaconda3
+
+# This is to prevent FEniCS from unnecessarily attempting to multi-thread
+export OMP_NUM_THREADS=1
+
+cd /scratch/USERNAME/poisson_demo/
+srun -n 4 python poisson_demo.py
+
#!/bin/bash
+
+# This test file is designed to run the Poisson demo on one node with a 4 cores
+
+#SBATCH --time=01:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=
+
+module purge
+module load conda
+
+# This is to prevent FEniCS from unnecessarily attempting to multi-thread
+export OMP_NUM_THREADS=1
+
+cd /home/USERNAME/poisson_demo/
+srun -n 4 python poisson_demo.py
+
To run this script, first download the Poisson demo here and place it in a folder titled "poisson_demo" in your scratch directory (home for Swift). Next, replace "USERNAME" in the script with your username. Then save the script as "demo_script.sh" and submit it with sbatch demo_script.sh
. This demo is only supported by FEniCS 2019.1.0 and not FEniCSx.
Kestrel | +Vermilion | +Swift | +
---|---|---|
2019.1.0 | +2019.1.0 | +2019.1.0 | +
The General Algebraic Modeling System (GAMS) is a commercial high-level modeling system for mathematical programming and optimization. It is licensed software.
+GAMS includes a DSL compiler and also a stable of integrated high-performance solvers. GAMS is able to solve complex, large-scale modeling problems. For documentation, forums, and FAQs, see the GAMS website.
+A 60-user license of GAMS is made available to NREL users. This GAMS license requires users to be a member of the "gams workgroup." If you need the GAMS software package or a specific solver from GAMS, or if you have trouble running GAMS, please contact us.
+To initialize your environment to use GAMS, simply type module load gams/<version>
— see module avail gams
output to see available versions. GAMS is run with the command format gams <input filename>
. A file <input filename>.lst
will be created as the output file.
For a test run, in your home directory, type the following:
+module load gams/<version>
+cp /nopt/nrel/apps/gams/example/trnsport.gms .
+gams trnsport
+
A result of 153.675 should be found from screen output. More detailed output is in the file trnsport.lst
.
The available solvers for different procedures are shown in the following with the default solver being the first one:
+By typing gams <input_filename>
on the command line, the default procedure LP and the default solver Gurobi will be used. In order to override the default option to use, e.g., Soplex, you can try the following two methods:
Use the option statement in your GAMS input file. For example, if your model input uses LP procedure and you want to use Gurobi solver to solve it, just add option lp=soplex
to your input file.
Specify the solver in the command line, e.g., gams <input_filename> lp=soplex
.
A sample script for batch submission is provided here:
+Sample Submission Script
+#!/bin/bash --login
+#SBATCH --name gams_run
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=36
+#SBATCH --time=00:05:00
+#SBATCH --account=<allocation-id>
+#SBATCH --error=gams-%j.err
+#SBATCH --output=gams-%j.out
+
+# Ensure script location
+cd $SLURM_SUBMIT_DIR
+
+# Create runtime environment
+module purge
+module load gams/<version>
+
+# Run GAMS
+gams trnsport lp=gurobi
+
For a certain solver, necessary control parameters for the algorithm—such as convergence criteria—can be loaded from the option file named as <solver_name>.opt
in the directory that you run GAMS. For example, for the Gurobi solver, its option file would be "gurobi.opt". For the details of how to set those parameters, please see the GAMS Solver Manuals.
When using the Gurobi solver in GAMS, the user should NOT try to load the Gurobi module. Simply using "module load gams" will automatically load the Gurobi solver.
+In order to use GAMS python API, the environment parameter $PYTHONPATH
should include these two directories:
$GAMS_PYTHON_API_FILES/gams
+$GAMS_PYTHON_API_FILES/api_[version-of-python]
where version-of-python
= 27, 36, 37, or 38 for python version 2.7, 3.6, 3.7, or 3.8, respectively. The python version can be obtained by using command python --version
.
For example, for python 3.7 and the bash shell, $PYTHONPATH
can be set using the following script:
module purge
+module load gams/31.1.0
+if [ -z ${PYTHONPATH+x} ]
+then
+ export PYTHONPATH=$GAMS_PYTHON_API_FILES/api_37:$GAMS_PYTHON_API_FILES/gams
+else
+ export PYTHONPATH=$GAMS_PYTHON_API_FILES/api_37:$GAMS_PYTHON_API_FILES/gams:$PYTHONPATH
+fi
+
The GAMS API can be installed using Anaconda and Pip. Please follow the instruction on the GAMS website. Currently GAMS supports python version 3.7~3.11. In general, it can be installed using the following command:
+pip install gams[your choice of sub-module] --find-links $GAMS_PYTHON_API_FILES
+
Learn about the Gaussian16 electronic structure program and how to run Gaussian16 jobs at NREL.
+Important
+To run Gaussian16, users must be a member of the Gaussian user group. To be added to the group, contact HPC-Help. In your email message, include your username and copy the following text agreeing not to compete with Gaussian, Inc.: +
I am not actively developing applications for a competing software program, or for a project in
+collaboration with someone who is actively developing for a competing software program. I agree
+that Gaussian output cannot be provided to anyone actively developing for a competing software program.
+
+I agree to this statement.
+
NREL currently has Gaussian16 Revision C.01 installed, and the user manual can be found at the Gaussian website. Gaussian 16 C.01 also has an GPU version, and for instructions on how to run Gaussian 16 on GPU nodes, see GitHub.
+Previous Gaussian 09 users sometimes may feel Gaussian 16 runs slower than Gaussian 09. That's because Gaussian G16 has changed the default accuracy into Int=Acc2E=12 Grid=Ultrafine
, which means that individual SCF iterations will take longer with G16 than with G09.
Gaussian jobs typically write large amounts of information to temporary scratch files. When many Gaussian jobs are running, this can put a large traffic load on the Lustre parallel filesystem. To reduce this load, we recommend putting the first 5 GB or so of scratch files into a local (on-node) in-memory filesystem called /dev/shm
.
This scratch space is set automatically by the example script below. The Gaussian input file needs the following two directives to tell the program to put read-write files first in /dev/shm
(up to 5GB below), and to put data that exceeds 5GB into files in a directory on the /scratch
file system. An example script for batch submission is given below:
Gaussian may be configured on Eagle to run on one or more physical nodes, with or without shared memory parallelism. Distributed memory, parallel setup is taken care of automatically based on settings in the SLURM script example below.
+#!/bin/bash
+#SBATCH --time=48:00:00
+#SBATCH --nodes=3
+#SBATCH --job-name=G16test
+#SBATCH --output=std.out
+#SBATCH --error=std.err
+#SBATCH --account=hpcapps
+
+# Load Gaussian module to set environment
+module load gaussian
+cd $SLURM_SUBMIT_DIR
+
+# Set script variables
+INPUT_BASENAME=G16_test
+INPUT_FILE=$INPUT_BASENAME.com
+GAUSSIAN_EXEC=g16
+MEMSIZE=5GB
+SCRATCH=/tmp/scratch/$SLURM_JOB_ID
+SCRATCH2=/dev/shm
+#
+# Check on editing input file. If scratch directories
+# are listed then file is used un-changed, if 3-line
+# header not present, then script prepends these lines
+# to the input file to be used in execution line
+#
+NUMRWFLINES=`grep "RWF" $INPUT_FILE | wc -l`
+if [ $NUMRWFLINES -eq 1 ]; then
+ echo "standard file found"
+ cp $INPUT_FILE infile
+else
+ echo "prepending lines to input file"
+ echo "%RWF=$SCRATCH2/,$MEMSIZE,$SCRATCH/,-1" > infile
+ echo "%NoSave" >> infile
+ echo " " >> infile
+ cat $INPUT_FILE >> infile
+fi
+
+#
+# Run gaussian NREL script (performs much of the Gaussian setup)
+g16_nrel
+
+#
+# Set required Gaussian environment variables
+#
+if [ $SLURM_JOB_NUM_NODES -gt 1 ]; then
+ export GAUSS_LFLAGS='-vv -opt "Tsnet.Node.lindarsharg: ssh"'
+ export GAUSS_EXEDIR=$g16root/g16/linda-exe:$GAUSS_EXEDIR
+fi
+export GAUSS_SCRDIR=$SCRATCH2
+#
+# Gaussian needs scratch directories
+# Note: sometimes files may have been left behind in
+# on-node memory by other jobs that terminated incorrectly
+# so clean these to make sure there is enough space.
+#
+
+mkdir -p $SCRATCH
+rm $SCRATCH2/*
+
+# Run Gaussian job
+$GAUSSIAN_EXEC < infile >& $INPUT_BASENAME.log
+rm infile
+
+rm $SCRATCH/*
+rmdir $SCRATCH
+
This script and sample Gaussian input are located at /nopt/nrel/apps/gaussian/examples. The gaussian module is loaded by the script automatically, so the user does not need to have loaded the module before submitting the job. The g16_eagle python script edits the Default.Route file based on the SLURM environment set when the script is submitted to the queue. The user also must supply the name of the input file (INPUT_BASENAME
).
The user scratch space is set to a directory in the user's scratch space, with a name containing the job ID so different jobs will not overwrite the disk space. The script sets the directories for scratch files and environment variables needed by Gaussian (eg GAUSS_SCRDIR
).
Please note that if a template input file without the header lines containing %RWF
, and %NoSave
directives, the script will prepend these lines to the input file based on variables set in the script above.
Eagle currently has 50 computing nodes with dual NVIDIA Tesla V100 GPUs and Gaussian G16 C.01 has the capability to run on those nodes using GPUs. For detailed instructions on how to run Gaussian on GPU nodes, see GitHub.
+To submit a job with the example script, named g16.slurm, one would type:
+sbatch g16.slurm
The only difference to run on Swift is to change the scratch directory because Swift has no /tmp/scratch directory. Please change SCRATCH=/tmp/scratch/$SLURM_JOB_ID
in the above Eagle script into SCRATCH=/scratch/$USR/$SLURM_JOB_ID
.
Gurobi Optimizer is a suite of solvers for mathematical programming.
+For documentation, forums, and FAQs, see the Gurobi +website.
+Gurobi includes a linear programming solver (LP), quadratic programming solver +(QP), quadratically constrained programming solver (QCP), mixed-integer linear +programming solver (MILP), mixed-integer quadratic programming solver (MIQP), +and a mixed-integer quadratically constrained programming solver (MIQCP).
+Gurobi is available on the Eagle system, which hosts 6 general use (including
+commercial) and 18 academic/government standalone Gurobi licenses. After logging
+onto Eagle, load the Gurobi module using module load gurobi
. The Gurobi
+interactive shell is run by typing "gurobi.sh
". Gurobi can also be interfaced
+with C/C++/Java/MATLAB/R codes by linking with the Gurobi libraries.
For details on Gurobi programming, see the Gurobi Resource +Center and Gurobi +documentation.
+To use the Gurobi solver with MATLAB, make sure you have the Gurobi and MATLAB +environment modules loaded, then issue the following two commands from the +MATLAB prompt or your script:
+>> grb = getenv('GRB_MATLAB_PATH')
+>> path(path,grb)
+
The General Algebraic Modeling System (GAMS) is a high-level modeling system for +mathematical programming and optimization. The GAMS package installed at NREL +includes Gurobi solvers. For more information, see using GAMS.
+Note that the Gurobi license for this interface is separate from the standalone +Gurobi license, and supports far more instances.
+Important
+When using the Gurobi solver in GAMS, the user should NOT load the +Gurobi module. Simply using "module load gams" will be enough to load the +required Gurobi components and access rights.
+Learn how to use LAMMPS software — an open-source, classical molecular dynamics program designed for massively parallel systems. It is distributed by Sandia National Laboratories.
+LAMMPS has numerous built-in potentials for simulations of solid-state, soft matter, and coarse-grained systems. It can be run on a single processor or in parallel using MPI. To learn more, see the LAMMPS website.
+The most recent version of LAMMPS on Eagle and Swift at the time of this page being published is the 23Jun22 version. The following packages have been installed in this version: asphere, body, bocs, class2, colloid, dielectric, diffraction, dipole, dpd-basic, drude, eff, electrode, extra-fix, extra-pair, fep, granular, h5md, intel, interlayer, kspace, manifold, manybody, mc, meam, misc, molecule, mpiio, openmp, opt, python, phonon, qep, qmmm, reaction, reaxff, replica, rigid, shock, spin, voronoi.
+A sample Slurm script for LAMMPS is given below:
+#!/bin/bash
+#SBATCH --time=48:00:00
+#SBATCH --nodes=4
+#SBATCH --job-name=lammps_test
+#SBATCH --output=std.out
+#SBATCH --error=std.err
+
+module purge
+module load lammps/20220623
+cd $SLURM_SUBMIT_DIR
+
+srun -n 144 lmp -in lmp.in -l lmp.out
+
where lmp.inp
is the input and lmp.out
is the output. This runs LAMMPS using four nodes with 144 cores.
For instructions on running LAMMPS with OpenMP, see the HPC Github code repository.
+If you need other packages, please contact us.
+ + + + + + + + +Q-Chem is a comprehensive ab initio quantum chemistry package with special strengths in excited state methods, non-adiabatic coupling, solvation models, explicitly correlated wave-function methods, and cutting-edge density functional theory (DFT).
+The q-chem
module should be loaded to set up the necessary environment. The module help
output can provide more detail. In particular, the modulefile does not set the needed environment variable QCSCRATCH
, as this is likely unique for each run. QCLOCALSCR
is set by default to /tmp/scratch
, but one may wish to point to a more persistent location if files written to local scratch need to be accessed after the job completes. Users can easily do this in their Slurm scripts or at the command line via export
(Bash) or setenv
(csh).
The simplest means of starting a Q-Chem job is via the supplied qchem
wrapper. The general syntax is:
qchem -slurm <-nt number_of_OpenMP_threads> <input file> <output file> <savename>
For example, to run a job with 36 threads:
+qchem -slurm -nt 36 example.in
Note
+The Q-Chem input file must be in the same directory in which you issue the qchem command. In other words, qchem ... SOMEPATH/<input file>
won't work.
For a full list of which types of calculation are parallelized and the types of parallelism, see the Q-Chem User's Manual.
+To save certain intermediate files for, e.g., restart, a directory name needs to be provided. If not provided, all scratch files will be automatically deleted at job's end by default. If provided, a directory $QCSCRATCH/savename
will be created and will hold saved files. In order to save all intermediate files, you can add the -save
option.
A template Slurm script to run Q-Chem with 36 threads is:
+#SBATCH --job-name=my_qchem_job
+#SBATCH --account=my_allocation_ID
+#SBATCH --ntasks=36
+#SBATCH --time=01:00:00
+#SBATCH --mail-type=BEGIN,END,FAIL
+#SBATCH --mail-user=your_email@domain.name
+#SBATCH --output=std-%j.out
+#SBATCH --error=std-%j.err
+
+# Load the Q-Chem environment
+module load q-chem
+
+# Go to the location of job files, presumably from where this file was submitted
+cd $SLURM_SUBMIT_DIR
+
+# Set up scratch space
+SCRATCHY=/scratch/$USER/${SLURM_JOB_NAME:?}
+if [ -d $SCRATCHY ]
+then
+ rm -r $SCRATCHY
+fi
+mkdir -p $SCRATCHY
+export QCSCRATCH=$SCRATCHY
+
+# Move files over
+cp * $SCRATCHY/.
+cd $SCRATCHY
+
+# Start run. Keep restart files without intermediate temp files in directory called "my_save"
+qchem -nt 36 job.in job.out my_save
+
To run this script on Swift, the number of threads can be changed to 64.
+A large number of example Q-Chem input examples are available in /nopt/nrel/apps/q-chem/<version>/samples
.
For information about the software's features, see the STAR-CCM+ +website.
+STAR-CCM+ can be run interactively on Eagle using X windows. The network +licenses are checked out from the license server running on wind-lic.nrel.gov.
+STAR-CCM+ is not supported on Vemilion and Swift.
+First, build your simulation <your_simulation.sim>
on your workstation and
+copy to your /scratch/$USER/<sim_dir>
directory on Eagle:
ls /scratch/$USER/sim_dir
+your_simulation.sim
+
Create a Slurm script <your_scriptfile>
as shown below:
#!/bin/bash -l
+#SBATCH --time=2:00:00 # walltime limit of 2 hours
+#SBATCH --nodes=2 # number of nodes
+#SBATCH --ntasks-per-node=36 # number of tasks per node
+#SBATCH --ntasks=72 # total number of tasks
+#SBATCH --job-name=your_simulation # name of job
+#SBATCH --account=<allocation-id> # name of project allocation
+
+export TMPDIR="/scratch/$USER/<sim_dir>"
+scontrol show hostnames > nodelist
+module load starccm
+
+# Run Job
+
+echo "------ Running Starccm+ ------"
+
+date
+starccm+ -rsh "ssh -oStrictHostKeyChecking=no" -machinefile nodelist -np $SLURM_NTASKS -batch /scratch/$USER/<sim_dir>/your_simulation.sim >> simulation.log
+rm nodelist
+date
+
+echo "------ End of the job ------"
+
Note that you must give the full path of your input file in the script. +The simulation may be tested in an interactive job before being submitted to the +batch queue. +After the interactive job is allocated, type the commands from the Slurm script +and make sure the job runs:
+module load starccm
+export TMPDIR="/scratch/$USER/<sim_dir>"
+...
+echo $SLURM_JOB_NODELIST > nodelist
+...
+starccm+ -power -rsh "ssh -oStrictHostKeyChecking=no" -machinefile nodelist -np $SLURM_NTASKS -batch /scratch/$USER/<sim_dir>/your_simulation.sim >> simulation.log
+
If this succeeds, submit your job with:
+sbatch <your_scriptfile>
+
When the job completes, the output files are stored in the <sim_dir>
directory
+with your_simulation.sim file:
ls /scratch/$USER/<sim_dir>
+your_simulation.sim simulation.log slurm-12345.out
+
The Vienna Ab initio Simulation Package (VASP) is an application for atomic scale materials modelling from first principles. VASP computes an approximate solution to the many-body Schrödinger equation, either within density functional theory or within the Hartree-Fock approximation using pseudopotentials and plane wave basis sets. VASP can carry out a range of electronic structure and quantum-mechanical molecular dynamics calculations and has many features including hybrid functionals, Green's functions methods (GW quasiparticles, and ACFDT-RPA) and many-body perturbation theory (2nd-order Møller-Plesset). For a full list of capabilities, please see the About VASP page and for further details, documentation, forums, and FAQs, visit the VASP website.
+Important
+The VASP license requires users to be a member of a "workgroup" defined by the University of Vienna or Materials Design. If you are receiving "Permission denied" errors when trying to use VASP, you must be made part of the "vasp" Linux group first. To join, please contact HPC Help with the following information:
+- Your name
+- The workgroup PI
+- Whether you are licensed through Vienna (academic) or Materials Design, Inc. (commercial)
+- If licensed through Vienna:
+ - The e-mail address under which you are registered with Vienna as a workgroup member (this may not be the e-mail address you used to get an HPC account)
+ - Your VASP license ID
+- If licensed through Materials Design:
+ - Proof of current licensed status
+
VASP is available through modules on all HPC systems. Use the command module avail vasp
to view the versions of VASP available on each cluster, and module load vasp/<version>
to load a specific version. If no version is specified, the default module (marked with "(D)") will be loaded. In the following sections, we will give sample input scripts and recommendations for the different builds. To run VASP, the following 4 input files are needed: POSCAR, POTCAR, INCAR, KPOINTS. For more information about VASP input files, see the VASP wiki.
Attention
+If you would like to build your own VASP on Kestrel, please read our section Building VASP on Kestrel carefully before compiling on Kestrel's cray architecture.
+NREL offers modules for VASP 5 and VASP 6 on CPUs as well as GPUs on certain systems. See table below for current availability, as well as system specific documentation for more details on running different builds.
++ | Kestrel | +Eagle | +Swift | +Vermilion | +
---|---|---|---|---|
VASP 5 | +X | +X | ++ | X | +
VASP 6 | +X | +X | +X | +X | +
VASP 6 GPU | ++ | X | ++ | X | +
Each VASP module provides three executables where the correct one should be chosen for the type of job:
+vasp_std
is for general k-point meshes with collinear spins
vasp_ncl
is for general k-point meshes with non-collinear spins
vasp_gam
is for Gamma-point-only calculations
NREL also offers support for additional functionalities such as transition state theory tools from University of Texas-Austin, implicit solvation models from the University of Florida, and BEEF-vdw functionals. Please contact HPC-Help if a functionality you need is not present in one of our builds.
+There are modules for CPU builds of VASP 5 and VASP 6 each with solvation, transition state tools, and BEEF-vdW functionals. These modules can be loaded with module load vasp/<version>
. A sample job script is shown below.
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=104
+#SBATCH --time=2:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+module load vasp/<version>
+
+srun vasp_std |& tee out
+
Important
+On Kestrel, any modules you have loaded on the login node will be copied to a compute node, and there are many loaded by default for the cray programming environment. Make sure you are using what you intend to. Please see the Kestrel Environments for more details on programming environments.
+We recommend building vasp with a full intel toolchain and launching with the cray-mpich-abi at runtime. Additionally, you should build on a compute node so that you have the same architecture as at runtime:
+salloc -N 1 -t <time> -A <account>
+
module purge
+module load craype-x86-spr #specifies sapphire rapids architecture
+module load intel-oneapi-compilers
+module load intel-oneapi-mpi
+module load intel-oneapi-mkl
+
Sample makefiles for vasp5 and vasp6 on Kestrel can be found in our Kestrel Repo under the vasp folder.
+Important
+We have found that it is optimal to run an Intel toolchain build of VASP using cray-mpich-abi at runtime. Cray-mpich-abi has several dependencies on cray network modules, so the easiest way to load it is to first load PrgEnv-intel
and then swap the default cray-mpich module for the cray-mpich-abi module swap cray-mpich cray-mpich-abi
. You must then load your intel compilers and math libraries, and unload cray's libsci. A sample script showing all of this is in the dropdown below.
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --tasks-per-node=104
+#SBATCH --time=2:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+# Load cray-mpich-abi and its dependencies within PrgEnv-intel, intel compilers, mkl, and unload cray's libsci
+module purge
+module load PrgEnv-intel
+module load craype-x86-spr
+module swap cray-mpich cray-mpich-abi
+module unload cray-libsci
+module load intel-oneapi-compilers
+module load intel-oneapi-mkl
+
+export VASP_PATH=/PATH/TO/YOUR/vasp_exe
+
+srun ${VASP_PATH}/vasp_std |& tee out
+
#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --time=4:00:00
+#SBATCH --account=<your-account-name>
+#SBATCH --job-name=<your-job-name>
+
+module purge
+
+#Load module
+ml vasp/<version>
+
+srun -n 36 vasp_std &> out
+
The Intel MPI builds are recommended over the Open MPI builds as they exhibit fastest performance.
+If using the openmpi builds, you may see the following warning in the vasp output that can be ignored: +
Note: The following floating-point exceptions are signalling: IEEE_UNDERFLOW_FLAG IEEE_DENORMAL
+Note: The following floating-point exceptions are signalling: IEEE_UNDERFLOW_FLAG
+
#!/bin/bash
+#SBATCH --job-name=vasp_gpu
+#SBATCH --time=1:00:00
+#SBATCH --error=std.err
+#SBATCH --output=std.out
+#SBATCH --nodes=1
+#SBATCH --gpus-per-node=2
+#SBATCH --gpu-bind=map_gpu:0,1
+#SBATCH --account=myaccount
+
+#To run on multiple nodes, change the last two SBATCH lines:
+##SBATCH --nodes=4
+##SBATCH --gpu-bind=map_gpu:0,1,0,1,0,1,0,1 #one set of "0,1" per node
+
+module purge
+
+#Load the OpenACC build of VASP
+ml vasp/6.3.1-nvhpc_acc
+
+#Load some additional modules
+module use /nopt/nrel/apps/220511a/modules/lmod/linux-centos7-x86_64/gcc/12.1.0
+ml fftw nvhpc
+
+mpirun -npernode 2 vasp_std &> out
+
Warning: ieee_invalid is signaling
+Warning: ieee_divide_by_zero is signaling
+Warning: ieee_underflow is signaling
+Warning: ieee_inexact is signaling
+FORTRAN STOP
+
To run the Cuda build of VASP on Eagle's GPUs, we can call the vasp_gpu
executable in a module for a build of VASP older than 6.3.0. To use both GPUs per node, make sure to set #SBATCH --gpus-per-node=2
and #SBATCH --ntasks-per-node=2
.
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --nodes=1
+#SBATCH --gpus-per-node=2
+#SBATCH --ntasks-per-node=2
+
+module purge
+
+#Load Intel MPI VASP build
+ml vasp/6.1.2
+
+srun -n 2 vasp_gpu &> out
+
The OpenACC build shows significant performance improvement compared to the Cuda build, but is more susceptible to running out of memory. The OpenACC GPU-port of VASP was released with VASP 6.2.0, and the Cuda GPU-port of VASP was dropped in VASP 6.3.0.
+#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=64
+#SBATCH --nodes=1
+
+#Set --exclusive if you would like to prevent any other jobs from running on the same nodes (including your own)
+#You will be charged for the full node regardless of the fraction of CPUs/node used
+#SBATCH --exclusive
+
+module purge
+
+#Load Intel MPI VASP build and necessary modules
+ml vaspintel
+ml slurm/21-08-1-1-o2xw5ti
+ml gcc/9.4.0-v7mri5d
+ml intel-oneapi-compilers/2021.3.0-piz2usr
+ml intel-oneapi-mpi/2021.3.0-hcp2lkf
+ml intel-oneapi-mkl/2021.3.0-giz47h4
+
+srun -n 64 vasp_std &> out
+
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=64
+#SBATCH --nodes=1
+
+#Set --exclusive if you would like to prevent any other jobs from running on the same nodes (including your own)
+#You will be charged for the full node regardless of the fraction of CPUs/node used
+#SBATCH --exclusive
+
+module purge
+
+#Load OpenMPI VASP build and necessary modules
+ml vasp
+ml slurm/21-08-1-1-o2xw5ti
+ml openmpi/4.1.1-6vr2flz
+
+srun -n 64 vasp_std &> out
+
The following script launches two instances of srun vasp_std
on the same node using an array job. Each job will be constricted to 32 cores on the node.
+
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=32
+#SBATCH --nodes=1
+
+#Set --exclusive=user if you would like to prevent anyone else from running on the same nodes as you
+#You will be charged for the full node regardless of the fraction of CPUs/node used
+#SBATCH --exclusive=user
+
+#Set how many jobs you would like to run at the same time as an array job
+#In this example, an array of 2 jobs will be run at the same time. This script will be run once for each job.
+#SBATCH --array=1-2
+
+#The SLURM_ARRAY_TASK_ID variable can be used to modify the parameters of the distinct jobs in the array.
+#In the case of array=1-2, the first job will have SLURM_ARRAY_TASK_ID=1, and the second will have SLURM_ARRAY_TASK_ID=2.
+#For example, you could assign different input files to runs 1 and 2 by storing them in directories input_1 and input_2 and using the following code:
+
+mkdir run_${SLURM_ARRAY_TASK_ID}
+cd run_${SLURM_ARRAY_TASK_ID}
+cp ../input_${SLURM_ARRAY_TASK_ID}/POSCAR .
+cp ../input_${SLURM_ARRAY_TASK_ID}/POTCAR .
+cp ../input_${SLURM_ARRAY_TASK_ID}/INCAR .
+cp ../input_${SLURM_ARRAY_TASK_ID}/KPOINTS .
+
+#Now load vasp and run the job...
+
+module purge
+
+#Load Intel MPI VASP build and necessary modules
+ml vaspintel
+ml slurm/21-08-1-1-o2xw5ti
+ml gcc/9.4.0-v7mri5d
+ml intel-oneapi-compilers/2021.3.0-piz2usr
+ml intel-oneapi-mpi/2021.3.0-hcp2lkf
+ml intel-oneapi-mkl/2021.3.0-giz47h4
+
+srun -n 32 vasp_std &> out
+
The following script launches srun vasp_std
on only 32 cores on a single node. The other 32 cores remain open for other users to use. You will only be charged for half of the node hours.
#!/bin/bash
+#SBATCH --job-name="benchmark"
+#SBATCH --account=myaccount
+#SBATCH --time=4:00:00
+#SBATCH --ntasks-per-node=32
+#SBATCH --nodes=1
+
+#To make sure that you are only being charged for the CPUs your job is using, set mem=2GB*CPUs/node
+#--mem sets the memory used per node
+#SBATCH --mem=64G
+
+module purge
+
+#Load Intel MPI VASP build and necessary modules
+ml vaspintel
+ml slurm/21-08-1-1-o2xw5ti
+ml gcc/9.4.0-v7mri5d
+ml intel-oneapi-compilers/2021.3.0-piz2usr
+ml intel-oneapi-mpi/2021.3.0-hcp2lkf
+ml intel-oneapi-mkl/2021.3.0-giz47h4
+
+srun -n 32 vasp_std &> out
+
The Intel MPI builds are recommended over the Open MPI builds as they exhibit fastest performance.
+Use at most 64 cores/node. On Swift, each node has 64 physical cores, and each core is subdivided into two virtual cores in a process that is identical to hyperthreading. Because of this, up to 128 cores can be requested from a single Swift node, but each core will only represent half of a physical core.
+On Swift, VASP is most efficiently run on partially full nodes.
+Unlike on Eagle, multiple jobs can run on the same nodes on Swift. If you are only using a fraction of a node, other users' jobs could be assigned to the rest of the node, which might deteriorate the performance. Setting "#SBATCH --exclusive" in your run script prevents other users from using the same node as you, but you will be charged the full 5AUs/node, regardless of the number of CPUs/node you are using.
+#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+#SBATCH --error=std.err
+#SBATCH --output=std.out
+#SBATCH --partition=lg
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+module purge
+ml vasp/6.3.1
+
+source /nopt/nrel/apps/220525b/myenv.2110041605
+ml intel-oneapi-compilers/2022.1.0-k4dysra
+ml intel-oneapi-mkl/2022.1.0-akthm3n
+ml intel-oneapi-mpi/2021.6.0-ghyk7n2
+
+# some extra lines that have been shown to improve VASP reliability on Vermilion
+ulimit -s unlimited
+export UCX_TLS=tcp,self
+export OMP_NUM_THREADS=1
+ml ucx
+
+srun --mpi=pmi2 -n 60 vasp_std
+
+# If the multi-node calculations are breaking, replace the srun line with this line
+# I_MPI_OFI_PROVIDER=tcp mpirun -iface ens7 -np 60 vasp_std
+
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+#SBATCH --error=std.err
+#SBATCH --output=std.out
+#SBATCH --partition=lg
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+module purge
+ml gcc
+ml vasp/6.1.1-openmpi
+
+# some extra lines that have been shown to improve VASP reliability on Vermilion
+ulimit -s unlimited
+export UCX_TLS=tcp,self
+export OMP_NUM_THREADS=1
+ml ucx
+
+# lines to set "ens7" as the interconnect network
+module use /nopt/nrel/apps/220525b/level01/modules/lmod/linux-rocky8-x86_64/gcc/12.1.0
+module load openmpi
+OMPI_MCA_param="btl_tcp_if_include ens7"
+
+srun --mpi=pmi2 -n 60 vasp_std
+
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=1
+#SBATCH --time=8:00:00
+##SBATCH --error=std.err
+##SBATCH --output=std.out
+#SBATCH --partition=lg
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+module purge
+
+ml vasp/5.4.4
+
+source /nopt/nrel/apps/220525b/myenv.2110041605
+ml intel-oneapi-compilers/2022.1.0-k4dysra
+ml intel-oneapi-mkl/2022.1.0-akthm3n
+ml intel-oneapi-mpi/2021.6.0-ghyk7n2
+
+# some extra lines that have been shown to improve VASP reliability on Vermilion
+ulimit -s unlimited
+export UCX_TLS=tcp,self
+export OMP_NUM_THREADS=1
+ml ucx
+
+srun --mpi=pmi2 -n 60 vasp_std
+
+# If the multi-node calculations are breaking, replace the srun line with this line
+# I_MPI_OFI_PROVIDER=tcp mpirun -iface ens7 -np 60 vasp_std
+
On Vermilion, VASP runs more performantly on a single node. Many issues have been reported for running VASP on multiple nodes, especially when requesting all available cores on each node. In order for MPI to work reliably on Vermilion, it is necessary to specify the interconnect network that Vermilion should use to communicate between nodes. If many cores are needed for your VASP calculation, it is recommended to run VASP on a singe node in the lg partition (60 cores/node), which provides the largest numbers of cores per node and use the following settings that have been shown to work well for multi-node jobs on 2 nodes. The Open MPI multi-node jobs are more reliable on Vermilion, but Intel MPI VASP jobs show better runtime performance as usual.
+If your multi-node Intel MPI VASP job is crashing on Vermilion, try replacing your srun line with the following mpirun run line. -iface ens7
sets ens7 as the interconnect.
+
I_MPI_OFI_PROVIDER=tcp mpirun -iface ens7 -np 16 vasp_std
+
If your multi-node Open MPI VASP job is crashing on Vermilion, replace a call to load an openmpi module with the following lines. The OMPI_MCA_param variable sets ens7 as the interconnect.
+module use /nopt/nrel/apps/220525b/level01/modules/lmod/linux-rocky8-x86_64/gcc/12.1.0
+module load openmpi
+OMPI_MCA_param="btl_tcp_if_include ens7"
+
#!/bin/bash
+#SBATCH --job-name=vasp
+#SBATCH --nodes=2
+#SBATCH --time=1:00:00
+##SBATCH --error=std.err
+##SBATCH --output=std.out
+#SBATCH --partition=gpu
+#SBATCH --gpu-bind=map_gpu:0,1,0,1
+#SBATCH --exclusive
+#SBATCH --account=myaccount
+
+# Load the OpenACC build of VASP
+ml vasp/6.3.1-nvhpc_acc
+
+# Load some additional modules
+module use /nopt/nrel/apps/220421a/modules/lmod/linux-rocky8-x86_64/gcc/11.3.0/
+ml nvhpc
+ml fftw
+
+mpirun -npernode 1 vasp_std > vasp.$SLURM_JOB_ID
+
The OpenACC build shows significant performance improvement compared to the Cuda build, but is more susceptible to running out of memory. The OpenACC GPU-port of VASP was released with VASP 6.2.0, and the Cuda GPU-port of VASP was dropped in VASP 6.3.0.
+Documentation: Weather Research Framework (WRF) Model
+The WRF model is a state of the art mesoscale numerical weather prediction system designed for both atmospheric research and operational forecasting applications.
+This section provides the minimum amount of information necessary to +successfully run a WRF job on the NREL Kestrel cluster. First, we show +how to use WRF given that we may have different versions of WRF +in different toolchains already built and available as modules.
+% module avail wrf
+ wrf/4.2.2-cray (D)
+
The module avail wrf
command shows which WRF module(s) are available
+for different versions of WRF built with different toolchains, at a given time. At this time, the
+version 4.2.2 is built with the Cray toolchain as currently available. In the future, multiple
+versions built with different toolchains will be available. Users are
+then free to choose any of the module versions available for
+use. Currently there are no modules to run WRF on GPUs, but there is
+current effort underway to make that available on future systems.
Next, we look at how to use the WRF module. Below is an example job script:
+#!/bin/bash
+
+# This job requests 102 tasks per node. This may need to be adjusted based on system hardware.
+
+#SBATCH --time=12:00:00
+#SBATCH --nodes=4
+#SBATCH --ntasks-per-node=96
+#SBATCH --partition=<partition-name>
+#SBATCH --exclusive=user
+#SBATCH --account=<account-name>
+#SBATCH --export=ALL
+#SBATCH --job-name
+#SBATCH --output=out_%j
+
+module load 4.2.2-cray
+
+# Note that builds with different toolchains may require different modules and environments to be loaded
+
+export OMP_NUM_THREADS=1
+
+srun wrf.exe
+
To submit the above WRF jobscript named submit_wrf.sh
, do sbatch submit_wrf.sh
Kestrel | +
---|
4.2.2 | +
Any WRF version can be downloaded here. Pursuant to building WRF, appropriate versions of netcdf
, hdf5
and pnetcdf
(if parallel netcdf is required) must also be built since they are dependent for building WRF. These dependent supporting softwares may already be offered as modules. If not, then users need to build them first, instructions for which are not provided here.
The following configure.wrf
file was used to build WRF v4.2.2 on Kestral (to date). Copy this configure.wrf into your WRF build dir WRF-4.x.y, and do % cd WRF-4.x.y
.
+
# configure.wrf
+#
+# Original configure options used:
+# ./configure
+# Compiler choice: 47
+# Nesting option: 1
+#
+# This file was automatically generated by the configure script in the
+# top level directory. You may make changes to the settings in this
+# file but be aware they will be overwritten each time you run configure.
+# Ordinarily, it is necessary to run configure once, when the code is
+# first installed.
+#
+# To permanently change options, change the settings for your platform
+# in the file arch/configure.defaults then rerun configure.
+#
+SHELL = /bin/sh
+DEVTOP = `pwd`
+LIBINCLUDE = .
+.SUFFIXES: .F .i .o .f90 .c
+
+
+#### Get core settings from environment (set in compile script)
+#### Note to add a core, this has to be added to.
+
+COREDEFS = -DEM_CORE=$(WRF_EM_CORE) \
+-DNMM_CORE=$(WRF_NMM_CORE) -DNMM_MAX_DIM=2600 \
+-DDA_CORE=$(WRF_DA_CORE) \
+-DWRFPLUS=$(WRF_PLUS_CORE)
+
+#### Single location for defining total number of domains. You need
+#### at least 1 + 2*(number of total nests). For example, 1 coarse
+#### grid + three fine grids = 1 + 2(3) = 7, so MAX_DOMAINS=7.
+
+MAX_DOMAINS = 21
+
+#### DM buffer length for the configuration flags.
+
+CONFIG_BUF_LEN = 65536
+
+#### Size of bitmasks (in 4byte integers) of stream masks for WRF I/O
+
+MAX_HISTORY = 25
+
+IWORDSIZE = 4
+DWORDSIZE = 8
+LWORDSIZE = 4
+
+##############################################################################
+#### The settings in this section are defaults that may be overridden by the
+#### architecture-specific settings in the next section.
+##############################################################################
+
+##############################################################################
+#### NOTE: Do not modify these default values here. To override these
+#### values, make changes after "Architecture specific settings".
+##############################################################################
+
+#### Native size (in bytes) of Fortran REAL data type on this architecture ####
+#### Note: to change real wordsize (for example, to promote REALs from
+#### 4-byte to 8-byte), modify the definition of RWORDSIZE in the
+#### section following "Architecture specific settings". Do not
+#### change NATIVE_RWORDSIZE as is it architecture-specific.
+NATIVE_RWORDSIZE = 4
+
+#### Default sed command and script for Fortran source files ####
+#SED_FTN = sed -f $(WRF_SRC_ROOT_DIR)/arch/standard.sed
+SED_FTN = $(WRF_SRC_ROOT_DIR)/tools/standard.exe
+
+# Hack to work around $(PWD) not changing during OSF1 build.
+# $(IO_GRIB_SHARE_DIR) is reset during the OSF1 build only.
+IO_GRIB_SHARE_DIR =
+
+#### ESMF switches ####
+#### These are set up by Config.pl ####
+# switch to use separately installed ESMF library for coupling: 1==true
+ESMF_COUPLING = 0
+# select dependences on module_utility.o
+ESMF_MOD_DEPENDENCE = $(WRF_SRC_ROOT_DIR)/external/esmf_time_f90/module_utility.o
+# select -I options for external/io_esmf vs. external/esmf_time_f90
+ESMF_IO_INC = -I$(WRF_SRC_ROOT_DIR)/external/esmf_time_f90
+# select -I options for separately installed ESMF library, if present
+ESMF_MOD_INC = $(ESMF_IO_INC)
+# select cpp token for external/io_esmf vs. external/esmf_time_f90
+ESMF_IO_DEFS =
+# select build target for external/io_esmf vs. external/esmf_time_f90
+ESMF_TARGET = esmf_time
+
+# ESMFINCLUDEGOESHERE
+
+
+#### NETCDF4 pieces
+
+NETCDF4_IO_OPTS = -DUSE_NETCDF4_FEATURES -DWRFIO_NCD_LARGE_FILE_SUPPORT
+GPFS =
+CURL =
+HDF5 =
+ZLIB =
+DEP_LIB_PATH =
+NETCDF4_DEP_LIB = $(DEP_LIB_PATH) $(HDF5) $(ZLIB) $(GPFS) $(CURL)
+
+# NETCDF4INCLUDEGOESHERE
+
+
+##############################################################################
+
+LIBWRFLIB = libwrflib.a
+
+ LIB_BUNDLED = \
+ $(WRF_SRC_ROOT_DIR)/external/fftpack/fftpack5/libfftpack.a \
+ $(WRF_SRC_ROOT_DIR)/external/io_grib1/libio_grib1.a \
+ $(WRF_SRC_ROOT_DIR)/external/io_grib_share/libio_grib_share.a \
+ $(WRF_SRC_ROOT_DIR)/external/io_int/libwrfio_int.a \
+ $(ESMF_IO_LIB) \
+ $(WRF_SRC_ROOT_DIR)/external/RSL_LITE/librsl_lite.a \
+ $(WRF_SRC_ROOT_DIR)/frame/module_internal_header_util.o \
+ $(WRF_SRC_ROOT_DIR)/frame/pack_utils.o
+
+ LIB_EXTERNAL = \
+ -L$(WRF_SRC_ROOT_DIR)/external/io_netcdf -lwrfio_nf -L$(NETCDFPATH)/lib -lnetcdff -lnetcdf -L$(WRF_SRC_ROOT_DIR)/external/io_pnetcdf -lwrfio_pnf -L$(PNETCDFPATH)/lib -lpnetcdf -L$(HDF5PATH)/lib -lhdf5hl_fortran -lhdf5_hl -lhdf5_fortran -lhdf5 -lm -lz
+
+
+#### Architecture specific settings ####
+
+# Settings for Cray XE and XC CLE/Linux x86_64, Cray CCE compiler (dm+sm)
+# Use this for both XE6 systems with AMD Opteron and XC with Intel x86_64
+
+DESCRIPTION = CRAY CCE ($SFC/$SCC): Cray XE and XC
+# OpenMP is enabled by default for Cray CCE compiler
+# This turns it off
+NOOMP = -hnoomp
+DMPARALLEL = 1
+OMPCPP = -D_OPENMP
+OMP = -homp
+OMPCC = -homp
+SFC = ftn $(NOOMP)
+SCC = cc
+CCOMP = cc
+DM_FC = ftn $(NOOMP)
+DM_CC = cc
+FC = $(DM_FC)
+CC = $(DM_CC) -DFSEEKO64_OK
+LD = $(FC)
+RWORDSIZE = $(NATIVE_RWORDSIZE)
+PROMOTION = -s integer32 -s real`expr 8 \* $(RWORDSIZE)`
+ARCH_LOCAL = -DNONSTANDARD_SYSTEM_SUBR -DWRF_USE_CLM $(NETCDF4_IO_OPTS)
+CFLAGS_LOCAL = -O3 # -DRSL0_ONLY
+LDFLAGS_LOCAL =
+# uncomment this for wrfda build
+#LIB_LOCAL = -L$(WRF_SRC_ROOT_DIR)/external/fftpack/fftpack5 -lfftpack \
+# -L$(WRF_SRC_ROOT_DIR)/external/RSL_LITE -lrsl_lite
+CPLUSPLUSLIB =
+ESMF_LDFLAG = $(CPLUSPLUSLIB)
+FCOPTIM = # -Ofp3
+FCREDUCEDOPT = $(FCOPTIM)
+FCNOOPT = -O1 -Ofp1 -Oipa0 -Onomodinline
+FCDEBUG = # -g -O0 # -K trap=fp -R bc
+FORMAT_FIXED = -f fixed
+FORMAT_FREE = -f free
+FCSUFFIX =
+BYTESWAPIO = -h byteswapio
+FCBASEOPTS_NO_G = -N1023 $(FORMAT_FREE) $(BYTESWAPIO) #-ra
+FCBASEOPTS = $(FCBASEOPTS_NO_G) $(FCDEBUG)
+MODULE_SRCH_FLAG =
+TRADFLAG = -traditional-cpp $(NETCDF4_IO_OPTS)
+CPP = /lib/cpp -P -nostdinc
+AR = ar
+ARFLAGS = ru
+M4 = m4
+RANLIB = ranlib
+RLFLAGS =
+CC_TOOLS = gcc
+
+###########################################################
+######################
+# POSTAMBLE
+
+FGREP = fgrep -iq
+
+ARCHFLAGS = $(COREDEFS) -DIWORDSIZE=$(IWORDSIZE) -DDWORDSIZE=$(DWORDSIZE) -DRWORDSIZE=$(RWORDSIZE) -DLWORDSIZE=$(LWORDS
+IZE) \
+ $(ARCH_LOCAL) \
+ $(DA_ARCHFLAGS) \
+ -DDM_PARALLEL \
+ \
+ -DNETCDF \
+ -DPNETCDF \
+ -DPNETCDF_QUILT \
+ \
+ \
+ \
+ \
+ -DHDF5 \
+ -DLANDREAD_STUB=1 \
+ \
+ \
+ -DUSE_ALLOCATABLES \
+ -Dwrfmodel \
+ -DGRIB1 \
+ -DINTIO \
+ -DKEEP_INT_AROUND \
+ -DLIMIT_ARGS \
+ -DBUILD_RRTMG_FAST=0 \
+ -DBUILD_RRTMK=0 \
+ -DBUILD_SBM_FAST=1 \
+ -DSHOW_ALL_VARS_USED=0 \
+ -DCONFIG_BUF_LEN=$(CONFIG_BUF_LEN) \
+ -DMAX_DOMAINS_F=$(MAX_DOMAINS) \
+ -DMAX_HISTORY=$(MAX_HISTORY) \
+ -DNMM_NEST=$(WRF_NMM_NEST)
+CFLAGS = $(CFLAGS_LOCAL) -DDM_PARALLEL \
+ -DLANDREAD_STUB=1 \
+ -DMAX_HISTORY=$(MAX_HISTORY) -DNMM_CORE=$(WRF_NMM_CORE)
+FCFLAGS = $(FCOPTIM) $(FCBASEOPTS)
+ESMF_LIB_FLAGS =
+# ESMF 5 -- these are defined in esmf.mk, included above
+ ESMF_IO_LIB = -L$(WRF_SRC_ROOT_DIR)/external/esmf_time_f90 -lesmf_time
+ESMF_IO_LIB_EXT = -L$(WRF_SRC_ROOT_DIR)/external/esmf_time_f90 -lesmf_time
+INCLUDE_MODULES = $(MODULE_SRCH_FLAG) \
+ $(ESMF_MOD_INC) $(ESMF_LIB_FLAGS) \
+ -I$(WRF_SRC_ROOT_DIR)/main \
+ -I$(WRF_SRC_ROOT_DIR)/external/io_netcdf \
+ -I$(WRF_SRC_ROOT_DIR)/external/io_int \
+ -I$(WRF_SRC_ROOT_DIR)/frame \
+ -I$(WRF_SRC_ROOT_DIR)/share \
+ -I$(WRF_SRC_ROOT_DIR)/phys \
+ -I$(WRF_SRC_ROOT_DIR)/wrftladj \
+ -I$(WRF_SRC_ROOT_DIR)/chem -I$(WRF_SRC_ROOT_DIR)/inc \
+ -I$(NETCDFPATH)/include \
+
+REGISTRY = Registry
+CC_TOOLS_CFLAGS = -DNMM_CORE=$(WRF_NMM_CORE)
+
+LIB = $(LIB_BUNDLED) $(LIB_EXTERNAL) $(LIB_LOCAL) $(LIB_WRF_HYDRO) $(NETCDF4_DEP_LIB)
+LDFLAGS = $(OMP) $(FCFLAGS) $(LDFLAGS_LOCAL)
+ENVCOMPDEFS =
+WRF_CHEM = 0
+CPPFLAGS = $(ARCHFLAGS) $(ENVCOMPDEFS) -I$(LIBINCLUDE) $(TRADFLAG)
+NETCDFPATH = /opt/cray/pe/netcdf/default/CRAYCLANG/14.0
+HDF5PATH = /opt/cray/pe/hdf5/1.12.2.1/CRAYCLANG/14.0
+WRFPLUSPATH =
+RTTOVPATH =
+PNETCDFPATH = /nopt/nrel/apps/software/wrf/cray/installs/pnetcdf
+
+bundled: io_only
+external: io_only $(WRF_SRC_ROOT_DIR)/external/RSL_LITE/librsl_lite.a gen_comms_rsllite module_dm_rsllite $(ESMF_TARGET)
+io_only: esmf_time wrfio_nf wrfio_pnf \
+ wrf_ioapi_includes wrfio_grib_share wrfio_grib1 wrfio_int fftpack
+
+
+######################
+externals: io_only bundled external
+
+gen_comms_serial :
+ ( /bin/rm -f $(WRF_SRC_ROOT_DIR)/tools/gen_comms.c )
+
+module_dm_serial :
+ ( if [ ! -e module_dm.F ] ; then /bin/cp module_dm_warning module_dm.F ; cat module_dm_stubs.F >> module_dm.F ; fi )
+
+gen_comms_rsllite :
+ ( if [ ! -e $(WRF_SRC_ROOT_DIR)/tools/gen_comms.c ] ; then \
+ /bin/cp $(WRF_SRC_ROOT_DIR)/tools/gen_comms_warning $(WRF_SRC_ROOT_DIR)/tools/gen_comms.c ; \
+ cat $(WRF_SRC_ROOT_DIR)/external/RSL_LITE/gen_comms.c >> $(WRF_SRC_ROOT_DIR)/tools/gen_comms.c ; fi )
+
+module_dm_rsllite :
+ ( if [ ! -e module_dm.F ] ; then /bin/cp module_dm_warning module_dm.F ; \
+ cat $(WRF_SRC_ROOT_DIR)/external/RSL_LITE/module_dm.F >> module_dm.F ; fi )
+
+wrfio_nf :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_netcdf ; \
+ make $(J) NETCDFPATH="$(NETCDFPATH)" RANLIB="$(RANLIB)" CPP="$(CPP)" \
+ CC="$(SCC)" CFLAGS="$(CFLAGS)" \
+ FC="$(SFC) $(PROMOTION) $(OMP) $(FCFLAGS)" TRADFLAG="$(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS)" )
+
+wrfio_pnf :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_pnetcdf ; \
+ make $(J) NETCDFPATH="$(PNETCDFPATH)" RANLIB="$(RANLIB)" CPP="$(CPP) $(ARCHFLAGS)" \
+ FC="$(FC) $(PROMOTION) $(OMP) $(FCFLAGS)" TRADFLAG="$(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS)" )
+
+wrfio_grib_share :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_grib_share ; \
+ make $(J) CC="$(SCC)" CFLAGS="$(CFLAGS)" RM="$(RM)" RANLIB="$(RANLIB)" CPP="$(CPP)" \
+ FC="$(SFC) $(PROMOTION) -I. $(FCDEBUG) $(FCBASEOPTS) $(FCSUFFIX)" TRADFLAG="$(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS
+)" archive)
+
+wrfio_grib1 :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_grib1 ; \
+ make $(J) CC="$(SCC)" CFLAGS="$(CFLAGS)" RM="$(RM)" RANLIB="$(RANLIB)" CPP="$(CPP)" \
+ FC="$(SFC) $(PROMOTION) -I. $(FCDEBUG) $(FCBASEOPTS) $(FCSUFFIX)" TRADFLAG="$(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS
+)" archive)
+wrfio_grib2 :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_grib2 ; \
+ make $(J) CC="$(SCC)" CFLAGS="$(CFLAGS) " RM="$(RM)" RANLIB="$(RANLIB)" \
+ CPP="$(CPP)" \
+ FC="$(SFC) $(PROMOTION) -I. $(FCDEBUG) $(FCBASEOPTS) $(FCSUFFIX)" TRADFLAG="-traditional" AR="$(AR)" ARFLAGS="$(ARFLAG
+S)" \
+ FIXED="$(FORMAT_FIXED)" archive)
+
+wrfio_int :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_int ; \
+ make $(J) CC="$(CC)" CFLAGS_LOCAL="$(CFLAGS_LOCAL)" RM="$(RM)" RANLIB="$(RANLIB)" CPP="$(CPP)" \
+ FC="$(FC) $(PROMOTION) $(FCDEBUG) $(FCBASEOPTS) $(OMP)" FGREP="$(FGREP)" \
+ TRADFLAG="$(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS)" ARCHFLAGS="$(ARCHFLAGS)" all )
+
+esmf_time :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/esmf_time_f90 ; \
+ make $(J) FC="$(SFC) $(PROMOTION) $(FCDEBUG) $(FCBASEOPTS)" RANLIB="$(RANLIB)" \
+ CPP="$(CPP) -I$(WRF_SRC_ROOT_DIR)/inc -I. $(ARCHFLAGS) $(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS)" )
+
+fftpack :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/fftpack/fftpack5 ; \
+ make $(J) FC="$(SFC)" FFLAGS="$(PROMOTION) $(FCDEBUG) $(FCBASEOPTS)" RANLIB="$(RANLIB)" AR="$(AR)" \
+ ARFLAGS="$(ARFLAGS)" CPP="$(CPP)" CPPFLAGS="$(CPPFLAGS)" RM="$(RM)" )
+
+atm_ocn :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/atm_ocn ; \
+ make $(J) CC="$(SCC)" CFLAGS="$(CFLAGS) " RM="$(RM)" RANLIB="$(RANLIB)" \
+ CPP="$(CPP)" CPPFLAGS="$(CPPFLAGS)" \
+ FC="$(DM_FC) $(PROMOTION) -I. $(FCDEBUG) $(FCBASEOPTS) $(FCSUFFIX)" TRADFLAG="-traditional" AR="$(AR)" ARFLAGS="$(ARFL
+AGS)" \
+ FIXED="$(FORMAT_FIXED)" )
+
+$(WRF_SRC_ROOT_DIR)/external/RSL_LITE/librsl_lite.a :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/RSL_LITE ; make $(J) CC="$(CC) $(CFLAGS)" \
+ FC="$(FC) $(FCFLAGS) $(OMP) $(PROMOTION) $(BYTESWAPIO)" \
+ CPP="$(CPP) -I. $(ARCHFLAGS) $(OMPCPP) $(TRADFLAG)" AR="$(AR)" ARFLAGS="$(ARFLAGS)" ;\
+ $(RANLIB) $(WRF_SRC_ROOT_DIR)/external/RSL_LITE/librsl_lite.a )
+
+######################
+# Macros, these should be generic for all machines
+
+LN = ln -sf
+MAKE = make -i -r
+RM = rm -f
+
+
+# These sub-directory builds are identical across all architectures
+
+wrf_ioapi_includes :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/ioapi_share ; \
+ $(MAKE) NATIVE_RWORDSIZE="$(NATIVE_RWORDSIZE)" RWORDSIZE="$(RWORDSIZE)" AR="$(AR)" ARFLAGS="$(ARFLAGS)" )
+
+wrfio_esmf :
+ ( cd $(WRF_SRC_ROOT_DIR)/external/io_esmf ; \
+ make FC="$(FC) $(PROMOTION) $(FCDEBUG) $(FCBASEOPTS) $(ESMF_MOD_INC)" \
+ RANLIB="$(RANLIB)" CPP="$(CPP) $(POUND_DEF) " AR="$(AR)" ARFLAGS="$(ARFLAGS)" )
+
+# There is probably no reason to modify these rules
+
+.F.i:
+ $(RM) $@
+ sed -e "s/^\!.*'.*//" -e "s/^ *\!.*'.*//" $*.F > $*.G
+ $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $*.G > $*.i
+ mv $*.i $(DEVTOP)/pick/$*.f90
+ cp $*.F $(DEVTOP)/pick
+
+.F.o:
+ $(RM) $@
+ sed -e "s/^\!.*'.*//" -e "s/^ *\!.*'.*//" $*.F > $*.G
+ $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $(OMPCPP) $*.G > $*.bb
+ $(SED_FTN) $*.bb | $(CPP) $(TRADFLAG) > $*.f90
+ $(RM) $*.G $*.bb
+ @ if echo $(ARCHFLAGS) | $(FGREP) 'DVAR4D'; then \
+ echo COMPILING $*.F for 4DVAR ; \
+ $(WRF_SRC_ROOT_DIR)/var/build/da_name_space.pl $*.f90 > $*.f90.tmp ; \
+ mv $*.f90.tmp $*.f90 ; \
+ fi
+ $(FC) -o $@ -c $(FCFLAGS) $(OMP) $(MODULE_DIRS) $(PROMOTION) $(FCSUFFIX) $*.f90
+
+
+.F.f90:
+ $(RM) $@
+ sed -e "s/^\!.*'.*//" -e "s/^ *\!.*'.*//" $*.F > $*.G
+ $(SED_FTN) $*.G > $*.H
+ $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $*.H > $@
+ $(RM) $*.G $*.H
+
+.f90.o:
+ $(RM) $@
+ $(FC) -o $@ -c $(FCFLAGS) $(PROMOTION) $(FCSUFFIX) $*.f90
+
+setfeenv.o : setfeenv.c
+ $(RM) $@
+ $(CCOMP) -o $@ -c $(CFLAGS) $(OMPCC) $*.c
+
+.c.o:
+ $(RM) $@
+ $(CC) -o $@ -c $(CFLAGS) $*.c
+
+# A little more adventurous. Allow full opt on
+# mediation_integrate.o \
+# shift_domain_em.o \
+# solve_em.o <-- gets a little kick from SOLVE_EM_SPECIAL too, if defined
+# mediation_feedback_domain.o : mediation_feedback_domain.F
+# mediation_force_domain.o : mediation_force_domain.F
+# mediation_interp_domain.o : mediation_interp_domain.F
+
+# compile these without high optimization to speed compile
+track_driver.o : track_driver.F
+convert_nmm.o : convert_nmm.F
+init_modules_em.o : init_modules_em.F
+input_wrf.o : input_wrf.F
+module_io.o : module_io.F
+module_comm_dm.o : module_comm_dm.F
+module_comm_dm_0.o : module_comm_dm_0.F
+module_comm_dm_1.o : module_comm_dm_1.F
+module_comm_dm_2.o : module_comm_dm_2.F
+module_comm_dm_3.o : module_comm_dm_3.F
+module_comm_nesting_dm.o : module_comm_nesting_dm.F
+module_configure.o : module_configure.F
+module_domain.o : module_domain.F
+module_domain_type.o : module_domain_type.F
+module_alloc_space_0.o : module_alloc_space_0.F
+module_alloc_space_1.o : module_alloc_space_1.F
+module_alloc_space_2.o : module_alloc_space_2.F
+module_alloc_space_3.o : module_alloc_space_3.F
+module_alloc_space_4.o : module_alloc_space_4.F
+module_alloc_space_5.o : module_alloc_space_5.F
+module_alloc_space_6.o : module_alloc_space_6.F
+module_alloc_space_7.o : module_alloc_space_7.F
+module_alloc_space_8.o : module_alloc_space_8.F
+module_alloc_space_9.o : module_alloc_space_9.F
+module_tiles.o : module_tiles.F
+module_initialize.o : module_initialize.F
+module_physics_init.o : module_physics_init.F
+module_initialize_squall2d_x.o : module_initialize_squall2d_x.F
+module_initialize_squall2d_y.o : module_initialize_squall2d_y.F
+module_initialize_scm_xy.o : module_initialize_scm_xy.F
+module_integrate.o : module_integrate.F
+module_io_mm5.o : module_io_mm5.F
+module_io_wrf.o : module_io_wrf.F
+module_si_io.o : module_si_io.F
+module_wps_io_arw.o : module_wps_io_arw.F
+module_state_description.o : module_state_description.F
+output_wrf.o : output_wrf.F
+solve_interface.o : solve_interface.F
+start_domain.o : start_domain.F
+wrf_bdyin.o : wrf_bdyin.F
+wrf_bdyout.o : wrf_bdyout.F
+wrf_ext_read_field.o : wrf_ext_read_field.F
+wrf_ext_write_field.o : wrf_ext_write_field.F
+wrf_fddaobs_in.o : wrf_fddaobs_in.F
+wrf_histin.o : wrf_histin.F
+wrf_histout.o : wrf_histout.F
+wrf_inputin.o : wrf_inputin.F
+wrf_inputout.o : wrf_inputout.F
+wrf_restartin.o : wrf_restartin.F
+wrf_restartout.o : wrf_restartout.F
+wrf_tsin.o : wrf_tsin.F
+nl_get_0_routines.o : nl_get_0_routines.F
+nl_get_1_routines.o : nl_get_1_routines.F
+nl_set_0_routines.o : nl_set_0_routines.F
+nl_set_1_routines.o : nl_set_1_routines.F
+
+track_driver.o \
+convert_nmm.o \
+init_modules_em.o \
+module_initialize.o \
+module_initialize_squall2d_x.o \
+module_initialize_squall2d_y.o \
+module_initialize_scm_xy.o \
+module_integrate.o \
+module_io_mm5.o \
+module_io_wrf.o \
+module_si_io.o \
+module_wps_io_arw.o \
+module_tiles.o \
+output_wrf.o \
+solve_interface.o \
+start_domain.o \
+wrf_fddaobs_in.o \
+wrf_tsin.o :
+ $(RM) $@
+ $(SED_FTN) $*.F > $*.b
+ $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $(OMPCPP) $*.b > $*.f90
+ $(RM) $*.b
+ @ if echo $(ARCHFLAGS) | $(FGREP) 'DVAR4D'; then \
+ echo COMPILING $*.F for 4DVAR ; \
+ $(WRF_SRC_ROOT_DIR)/var/build/da_name_space.pl $*.f90 > $*.f90.tmp ; \
+ mv $*.f90.tmp $*.f90 ; \
+ fi
+ if $(FGREP) '!$$OMP' $*.f90 ; then \
+ if [ -n "$(OMP)" ] ; then echo COMPILING $*.F WITH OMP ; fi ; \
+ $(FC) -c $(PROMOTION) $(FCNOOPT) $(FCBASEOPTS) $(MODULE_DIRS) $(FCSUFFIX) $(OMP) $*.f90 ; \
+ else \
+ if [ -n "$(OMP)" ] ; then echo COMPILING $*.F WITHOUT OMP ; fi ; \
+ $(FC) -c $(PROMOTION) $(FCNOOPT) $(FCBASEOPTS) $(MODULE_DIRS) $(FCSUFFIX) $*.f90 ; \
+ fi
+
+#solve_em.o :
+# $(RM) $@
+# $(SED_FTN) $*.F > $*.b
+# $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $*.b > $*.f90
+# $(RM) $*.b
+# $(FC) -o $@ -c $(FCFLAGS) $(MODULE_DIRS) $(PROMOTION) $(FCSUFFIX) $(SOLVE_EM_SPECIAL) $(OMP) $*.f90
+
+module_sf_ruclsm.o : module_sf_ruclsm.F
+
+module_sf_ruclsm.o :
+ $(RM) $@
+ $(SED_FTN) $*.F > $*.b
+ $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $(OMPCPP) $*.b > $*.f90
+ $(RM) $*.b
+ if $(FGREP) '!$$OMP' $*.f90 ; then \
+ echo COMPILING $*.F WITH OMP ; \
+ if [ -n "$(OMP)" ] ; then echo COMPILING $*.F WITH OMP ; fi ; \
+ $(FC) -c $(PROMOTION) $(FCREDUCEDOPT) $(FCBASEOPTS) $(MODULE_DIRS) $(FCSUFFIX) $(OMP) $*.f90 ; \
+ else \
+ if [ -n "$(OMP)" ] ; then echo COMPILING $*.F WITHOUT OMP ; fi ; \
+ $(FC) -c $(PROMOTION) $(FCREDUCEDOPT) $(FCBASEOPTS) $(MODULE_DIRS) $(FCSUFFIX) $*.f90 ; \
+ fi
+
+# compile without OMP
+input_wrf.o \
+module_domain.o \
+module_domain_type.o \
+module_physics_init.o \
+module_io.o \
+wrf_bdyin.o \
+wrf_bdyout.o \
+wrf_ext_read_field.o \
+wrf_ext_write_field.o \
+wrf_histin.o \
+wrf_histout.o \
+wrf_inputin.o \
+wrf_inputout.o \
+wrf_restartin.o \
+wrf_restartout.o \
+module_state_description.o \
+module_alloc_space.o \
+module_alloc_space_0.o \
+module_alloc_space_1.o \
+module_alloc_space_2.o \
+module_alloc_space_3.o \
+module_alloc_space_4.o \
+module_alloc_space_5.o \
+module_alloc_space_6.o \
+module_alloc_space_7.o \
+module_alloc_space_8.o \
+module_alloc_space_9.o \
+module_comm_dm.o \
+module_comm_dm_0.o \
+module_comm_dm_1.o \
+ module_comm_dm_2.o \
+ module_comm_dm_3.o \
+module_comm_nesting_dm.o \
+module_configure.o :
+ $(RM) $@
+ $(CPP) -I$(WRF_SRC_ROOT_DIR)/inc $(CPPFLAGS) $(OMPCPP) $*.F > $*.bb
+ $(SED_FTN) $*.bb | $(CPP) $(TRADFLAG) > $*.f90
+ @ if echo $(ARCHFLAGS) | $(FGREP) 'DVAR4D'; then \
+ echo COMPILING $*.F for 4DVAR ; \
+ $(WRF_SRC_ROOT_DIR)/var/build/da_name_space.pl $*.f90 > $*.f90.tmp ; \
+ mv $*.f90.tmp $*.f90 ; \
+ fi
+ $(RM) $*.b $*.bb
+ $(FC) -c $(PROMOTION) $(FCSUFFIX) $(FCNOOPT) $(FCBASEOPTS) $(MODULE_DIRS) $*.f90
+
Alternatively, the configure.wrf
can also be generated by setting the below modules and environments, and executing % ./configure
and choosing the appropriate architecture option.
After loading the modules for the appropriate toolchain, supporting netcdf, hdf5, pnetcdf softwares, and setting the netcdf environments, compile the WRF software:
+```
+% export NETCDF=/opt/cray/pe/netcdf/default/CRAYCLANG/14.0
+% ./compile em_real
+```
+
Linux uses standard POSIX-style permissions to control who has the ability to read, write, or execute a file or a directory.
+Under this standard, all files and all directories have three types of permission that can be granted.
+The three permission types are:
+Files are also "owned" by both an individual user, and a user group. This ownership is used to provide varying levels of access to different +cohorts of users on the system.
+The cohorts of users to which file permissions may be assigned include:
+On most systems, every user is a member of their own personal group that has the same name as their username, and only that user has access +to their own personal group. Whenever a new file is created, the default is that it will be created with the user and group ownership of the +user that created the file. However, this may not always be the case, and the permissions of the directory in which the file is created can +have an effect on the group ownership. This will be discussed in a later section.
+The ls -l
command will show the ownership and permissions of a file, a list of files, or all files in a directory. Here's an example output with two files, two directories, and a symbolic link to another directory. The user in the example is "alice".
+
[alice@el1 ~]$ ls -l
+-rwxrwx---. 1 alice alice 49 Oct 13 2020 testjob.sh
+-rw-rw----. 1 alice alice 35 Mar 9 16:45 test.txt
+drwxrwx---. 3 alice alice 1536 Mar 31 2021 myscripts
+drwxrws---. 3 alice csc000 4096 Dec 14 2020 shared-data
+lrwxrwxrwx. 1 alice alice 16 Jan 30 2023 my_proj -> /projects/csc000
+
The first field of ls -l
output for each file consists of ten characters. These represent the permission bits for the file.
The first bit is reserved to describe the type of file. The three most common file types are:
+d
indicates that this is a directory (a type of "file" that stores a list of other files)l
indicates a symbolic link to another file/directoryThe next nine bits describe the file permissions that are set. These are always in the order of read, write, and execute.
+A letter indicates that this permission is granted, a -
indicates that the permission is not granted.
This "rwx" order repeats three times: the first triplet is for User permissions, the second triplet is for Group permissions, and the third triplet is for Other permissions.
+In the example above, testjob.sh
has the permissions -rwxrwx---
. This means that the User and Group owners have read, write, and execute permission. The last three characters are -
, which indicates that "Other" users do not have permissions to this file.
There also may be a dot (.
) or other character at the end of the permissions list, depending on the variety of Linux that is installed. The dot indicates that no further access controls are in place. A +
indicates that ACLs (Access Control Lists) are in place that provide additional permissions. ACLs are an extension of the file permission system that is present on some, but not all, NREL HPC systems, and may be used to provide more fine-grained access control on a per-user basis. If the system you are using supports ACLs, you may see man getfacl
and man setfacl
for more help on ACLs.
After the permissions flags is a number indicating the number of hard links to the file. It has no bearing on permissions and can be ignored.
+The next two fields are the User and Group with access rights to the file. A file may only be owned by one User and one Group at a time.
+An s
in the e(x)ecute bit field has a special meaning, depending on whether it's in the User or Group permissions. A t
in the "Others"
+e(x)ecute also has a special meaning.
In the Group permission bits, an s
for the eXecute bit indicates that SETGID
is enabled. This can be set for an individual file or for a directory, but
+is most common on a directory. When setgid is enabled on a directory, any files created in the directory will have a group ownership that corresponds to the
+group ownership of the directory itself, instead of the default group of the user who created the file. This is very useful when an entire directory is
+intended to be used for collaboration between members of a group, when combined with appropriate group read, write, and/or execute bits.
In the User permission bits, an s
for the eXecute bit indicates that SETUID
is enabled. This is only used for executable files, and means that
+regardless of the user who runs the program, the owner of the process that starts up will be changed to the owner of the file. This is very
+rarely used by regular users and can pose a considerable security risk, because a process that belongs to a user also has access to that user's
+files as though it had been run by that user. Setuid should almost never be used.
In the Other permission bits, a t
for the eXecute bit indicates that a "sticky bit" has been set. This is only used on directories. With the sticky bit
+set, files in that directory may only be deleted by the owner of the file or the owner of the directory. This is commonly used for directories that
+are globally writeable, such as /tmp or /tmp/scratch and will be set by a system administrator. It is otherwise rarely used by regular users.
Only the User that owns a file may change ownership or permissions.
+The chgrp
command is used to change the Group ownership of a file or directory.
The chmod
command is used to change the permissions of a file or directory.
The chown
command is used to change the User owner and/or Group owner of a file, but only system administrators may change the User owner, so this command will not be covered in this document. Please see man chown
for more information.
The chgrp
command is used to change the group ownership of a file. You must be a member of the group the file currently belongs to, as well as a
+member of the destination group.
chgrp -c group filename
The -c flag is recommended, as it explicitly shows any changes that are made to ownership.
+Filename can be a file, a list of files, a wildcard (e.g. *.txt
), or a directory.
Please see man chgrp
for more detailed information on this command.
The chmod command is used to change the permissions (also called file mode bits) of a file or directory. Using an alphabetic shorthand ("symbolic mode"), permissions can be changed for a file or directory, in the general format:
+chmod -c ugoa+-rwxst file
The cohort to which permissions should be applied is first: (u)ser, (g)roup, (o)ther, or (a)ll.
+The +
or -
following the cohort denotes whether the permissions should be added or removed, respectively.
After the +/- is the list of permissions to change: (r)ead, (w)rite, e(x)ecute are the primary attributes. (s)etuid or (s)etgid depend on the cohort +chosen: u+s is for setuid, g+s is for setgid. The s(t)icky bit may also be set.
+To add eXecute permission for the User owner of a file:
+chmod u+x myscript.sh
To add group read, write, and execute, and REMOVE read, write, execute from others:
+chmod g+rwx mydirectory
To remove write and execute from other users:
+chmod o-wx myscript.sh
You can also combine arguments, for example:
+chmod g+rwx,o-rwx myscript.sh
chmod ug+rwx,o+r,o-w myscript.sh
Please avoid setting global read, write, and execute permissions, as it is a security risk:
+chmod a+rwx myscript.sh
Chmod can also accept numeric arguments for permissions, instead of the symbolic permissions. This is called +"octal" mode, as it uses base 8 (numbers 0 through 7) for binary encoding. Symbolic permissions are now generally preferred for clarity, but octal +is sometimes used as a shorthand way of accomplishing the same thing.
+In octal mode, a three or sometimes four digit number is used to represent the permission bits. The octal equivalent to "ug+rwx" is:
+chmod 770 myscript.sh
The first position is User, the second is Group, and the last is Other.
+The following table describes the value of the bit and the corresponding permission.
+bit | +permission | +
---|---|
0 | +none | +
1 | +execute | +
2 | +write | +
4 | +read | +
The permission is set by the sum of the bits, from 0 to 7, with 0 being "no permissions" and 7 being "read, write, and execute."
+760 and 770 are the most common for data shared by a group of users. 700 is common for protected files that should only be viewed or edited by the User who owns the file.
+Occasionally there may be a fourth leading digit. This is used for setuid, setgid, or a sticky bit setting.
+The command chmod 777
is the equivalent of chmod a+rwx
, which grants read, write, and execute permission to ALL users on the system for the file(s) specified. Use of this command should be EXTREMELY rare, and any suggestions that it be applied should be examined closely, as it poses a major security risk to your files and data. Use your best judgement.
All of the command listed have manual pages available at the command line. See man <command>
for more information, or man man
for help with the manual page system itself.
Further documentation regarding file permissions and other Linux fundamentals is widely available online in text or video format, and many paper books are available.
+We do not endorse any particular source, site, or vendor. The following links may be helpful:
+When first created, all /projects directories will be owned by the allocation's HPC Lead User and the project's shared Group. The default permissions will typically be ug+rwx (chmod 770) or ug+rwx,o+rx (chmod 776), depending on the system. The setgid bit will also be set on the directory, so that all files created in the /projects directory will have a Group ownership of the project's group.
+The NREL HPC Support Team relies on allocation owners and users to be responsible for file permissions and ownership as a part of managing the allocation and its data, but the PI or HPC Leads of a project may request assistance in changing permissions or ownership of files that belong to the allocation by opening a support ticket with hpc-help@nrel.gov.
+ + + + + + + + +Documentation: https://cmake.org/documentation/
+CMake is a cross-platform build tool that is used to manage software compilation and testing. From the CMake web site:
+++CMake is an open-source, cross-platform family of tools designed to build, test and package software. CMake is used to control the software compilation process using simple platform and compiler independent configuration files, and generate native makefiles and workspaces that can be used in the compiler environment of your choice.
+
On the NREL HPC systems, CMake is available through:
+module load cmake
+
New users are encouraged to refer to the documentation linked above, in particular the CMake tutorial. To build software that includes a CMakeLists.txt
file, the steps often follow a pattern similar to:
mkdir build
+cd build
+# Reference the path to the CMakeLists.txt file:
+CC=<c_compiler> CXX=<c++_compiler> cmake ..
+make
+
Here the CC
and CXX
environment variables are used to explicitly specify the C and C++ compiler that CMake should use. If not specified, CMake will determine a default compiler to use.
Learn how to set up and use the Git software tool for development on the HPC systems
+Git is used locally to track incremental development and modifications to a collection of files. GitHub is a git-repository hosting web-service, which serves as a synchronized, common access point for the file collections. GitHub also has social aspects, like tracking who changed what and why. There are other git hosting services like GitLab which are similar to GitHub but offer slightly different features.
+NREL has a Github Enterprise server (github.nrel.gov) for internally-managed repos. Please note that github.nrel.gov is only available internally using the NREL network or VPN. NREL's git server uses SAML/SSO for logging into GitHub Enterprise. To get help accessing the server or creating a repository, please contact NREL ITS.
+The git software tool is already installed on the HPC systems.
+Git needs to know your user name and an email address at a minimum:
+$ git config --global user.name "Your name"
+$ git config --global user.email "your.name@nrel.gov"
+
Github does not accept account passwords for authenticated Git operations. Instead, token-based authentication (PAT or SSH key) is required.
+Users already have SSH keys created on the HPC systems. To set up Github SSH authorization, you can add the existing SSH (secure shell) key(s) to your Github profile. You will also need to change any remote repo URL to use SSH instead of HTTPS.
+git remote set-url origin <git@github.nrel.gov:username/my-projectname.git>.
+
Warning
+Please do not alter or delete the key pair that exists on the HPC systems in ~/.ssh/. You can copy the public key to Github.
+A git repository is an independent grouping of files to be tracked. A git repo has a "root" which is the directory that it sits in, and tracks further directory nesting from that. A single repo is often thought of as a complete project or application, though it's not uncommon to nest modules of an application as child repositories to isolate the development history of those submodules.
+A commit, or "revision", is an individual change to a file (or set of files). It's like when you save a file, except with Git, every time you save it creates a unique ID (a.k.a. the "SHA" or "hash") that allows you to keep record of what changes were made when and by who. Commits usually contain a commit message which is a brief description of what changes were made.
+A fork is a personal copy of another user's repository that lives on your account. Forks allow you to freely make changes to a project without affecting the original. Forks remain attached to the original, allowing you to submit a pull request to the original's author to update with your changes. You can also keep your fork up to date by pulling in updates from the original.
+Pull refers to when you are fetching in changes and merging them. For instance, if someone has edited the remote file you're both working on, you'll want to pull in those changes to your local copy so that it's up to date.
+Pull requests are proposed changes to a repository submitted by a user and accepted or rejected by a repository's collaborators. Like issues, pull requests each have their own discussion forum.
+Pushing refers to sending your committed changes to a remote repository, such as a repository hosted on GitHub. For instance, if you change something locally, you'd want to then push those changes so that others may access them.
+A branch is a new/separate version of the repository. Use branches when you want to work on a new feature, but don't want to mess-up the main branch while testing your ideas.
+For example, you could create a local working copy of the "test_repo" repo (puts it in a folder in your current directory): +
cd /some/project/dir
+git clone <git@github.nrel.gov:username/test_repo.git>
+
git status
git add <filename(s)-you-changed>
+git commit -m "A comment about the changes you just made."
+git push
+
If you collaborate with others in this repo, you'll want to pull their changes into your copy of the repo. You may want to do this first-thing when you sit down to work on something to minimize the number of merges you'll need to handle:
+git pull
mkdir my.projectname
+cd my.projectname
+git init
+touch README.txt
+git add README.txt
+git commit -m 'first commit'
+# Push the repo to Github
+git remote add origin git@hpc/my.projectname.git
+git push origin main
+
You can use git revert
to remove unwanted changes.
+Find the hash of the commit that you need to undo:
+git log
+Once you have the hash:
+git revert <hash of commit to undo>
+The git revert
command will undo only the changes associated with the chosen commit, even if it is not the most recent commit. The reverted commit will still be stored in the history of changes, so it can still be accessed or reviewed in the future.
Create a local branch called "experimental" based on the current master branch: +
git checkout master #Switch to the master branch
+git branch experimental
+
Use Your Branch +(start working on that experimental branch....): +
git checkout experimental
+# If this branch exists on the remote repo, pull in new changes:
+git pull origin experimental
+# work, work, work, commit....:
+
Send local branch to the repo:
+git push origin experimental
Get the remote repo and its branches:
+git fetch origin
Merge the branch into the master branch:
+
git checkout master
+git merge experimental
+
git diff
git add
and git commit
your changes.
+Once you've merged a branch and you are done with it, you can delete it:
+
git branch --delete <branchName> # deletes branchName from your local repo
+git push origin --delete <branchName> # deletes the remote branch if you pushed it to the remote server
+
You can use git log
to see when the commits happened, and then git diff
has some options that can help identify changes.
+What changed between two commits (hopefully back to back commits):
+git diff 57357fd9..4f890708 > my.patch
+Just the files that changed:
+git diff --name-only 57357fd9 4f890708
You can tag a set of code in git, and use a specific tagged version.
+List tags:
+git tags -l
+Set a tag:
+git tag -a "2.2" -m "Tagging current rev at 2.2"
+Push your tag:
+git push --tags
+Use tag tagname:
+git checkout tagname
To revert your file back to your last commit and discard current changes, use the output from git status
to easily un-modify it.
+
$ git status
+# Changes not staged for commit:
+# (use "git add <file>..." to update what will be committed)
+# (use "git restore <file>..." to discard changes in working directory)
+ # modified: modified_code.py
+
+# Run the command in the above output to discard changes:
+$ git restore modified_code.py
+
git status
again you will see that the changes have been reverted. Just be sure that you want to revert the file before doing so, because all current changes will not be recoverable.
+For example, you may need to do this if you were working on code from a repo that was checked-out from Github.com, and you want to check that code into a repository on NREL's github server. Once you've requested a new NREL git repo from ITS and it's configured, you can:
+
git remote set-url origin git@github.nrel.gov:hpc/my.<newprojectname>.git
+
git help remote
for more details or you can just edit .git/config
and change the URLs there.
+This shouldn't cause any lost repo history, but if you want to be sure, you can make a copy of your repo until the url change is confirmed.
+You can export a copy of your code to your $HOME directory using the following command:
+git archive master --prefix=my.projectname/ --output=~/my.projectname.tgz
Spack is an HPC-centric package manager for acquiring, building, and managing HPC applications as well as all their dependencies, down to the compilers themselves. Like frameworks such as Anaconda, it is associated with a repository of both source-code and binary packages. Builds are fully configurable through a DSL at the command line as well as in YAML files. Maintaining many build-time permutations of packages is simple through an automatic and user-transparent hashing mechanism. The Spack system also automatically creates (customizable) environment modulefiles for each built package.
+Multiple installations of Spack can easily be kept, and each is separate from the others by virtue of the environment variable SPACK_ROOT
.
+All package, build, and modulefile content is kept inside the SPACK_ROOT
path, so working with different package collections is as simple as setting SPACK_ROOT
to the appropriate location.
+The only exception to this orthogonality are YAML
files in $HOME/.spack/<platform>
.
+Installing a Spack instance is as easy as
git clone https://github.com/spack/spack.git
Once the initial Spack instance is set up, it is easy to create new ones from it through
+spack clone <new_path>
SPACK_ROOT
will need to point to <new_path>
in order to be consistent.
Spack environment setup can be done by sourcing $SPACK_ROOT/share/spack/setup-env.sh
, or by simply adding $SPACK_ROOT/bin
to your PATH.
source $SPACK_ROOT/share/spack/setup-env.sh
+or
+export PATH=$SPACK_ROOT/bin:$PATH
Spack is able to find certain compilers on its own, and will add them to your environment as it does.
+In order to obtain the list of available compilers on Eagle the user can run module avail
, the user can then load the compiler of interest using module use <compiler>
.
+To see which compilers your Spack collections know about, type
spack compilers
To add an existing compiler installation to your collection, point Spack to its location through
+spack add compiler <path to Spack-installed compiler directory with hash in name>
The command will add to $HOME/.spack/linux/compilers.yaml
.
+To configure more generally, move changes to one of the lower-precedence compilers.yaml
files (paths described below in Configuration section).
+Spack has enough facility with standard compilers (e.g., GCC, Intel, PGI, Clang) that this should be all that’s required to use the added compiler successfully.
Command |
+Description | +
---|---|
spack list |
+all available packages by name. Dumps repo content, so if use local repo, this should dump local package load. | +
spack list <pattern> |
+all available packages that have <pattern> somewhere in their name. <pattern> is simple, not regex. |
+
spack info <package_name> |
+available versions classified as safe, preferred, or variants, as well as dependencies. Variants are important for selecting certain build features, e.g., with/without Infiniband support. | +
spack versions <package_name> |
+see which versions are available | +
Command |
+Description | +
---|---|
spack find |
+list all locally installed packages | +
spack find --deps <package> |
+list dependencies of <package> |
+
spack find --explicit |
+list packages that were explicitly requested via spack install | +
spack find --implicit |
+list packages that were installed as a dependency to an explicitly installed package | +
spack find --long |
+include partial hash in package listing. Useful to see distinct builds | +
spack find --paths |
+show installation paths | +
Finding how an installed package was built does not seem as straightforward as it should be.
+Probably the best way is to examine <install_path>/.spack/build.env
, where <install_path>
is the Spack-created directory with the hash for the package being queried.
+The environment variable SPACK_SHORT_SPEC
in build.env
contains the Spack command that can be used to recreate the package (including any implicitly defined variables, e.g., arch).
+The 7-character short hash is also included, and should be excluded from any spack install command.
Symbols |
+Description | +
---|---|
@ |
+package versions. Can use range operator “:”, e.g., X@1.2:1.4 . Range is inclusive and open-ended, e.g., “X@1.4:” matches any version of package X 1.4 or higher. | +
% |
+compiler spec. Can include versioning, e.g., X%gcc@4.8.5 | +
+,-,~ |
+build options. +opt, -opt, “~” is equivalent to “-“ | +
name=value |
+build options for non-Boolean flags. Special names are cflags, cxxflags, fflags, cppflags, ldflags, and ldlibs | +
target=value |
+for defined CPU architectures, e.g., target=haswell | +
os=value |
+for defined operating systems | +
^ |
+dependency specification, using above specs as appropriate | +
^/<hash> |
+specify dependency where <hash> is of sufficient length to resolve uniquely |
+
Sometimes dependencies are expected to be resolved through a package that is installed as part of the host system, or otherwise outside of the Spack database. +One example is Slurm integration into MPI builds. +If you were to try to add a dependency on one of the listed Slurms in the Spack database, you might see, e.g.,
+[$user@el2 ~]$ spack spec openmpi@3.1.3%gcc@7.3.0 ^slurm@19-05-3-2
+Input spec
+--------------------------------
+openmpi@3.1.3%gcc@7.3.0
+ ^slurm@19-05-3-2
+
+Concretized
+--------------------------------
+==> Error: The spec 'slurm' is configured as not buildable, and no matching external installs were found
+
Given that something like Slurm is integrated deeply into the runtime infrastructure of our local environment, we really want to point to the local installation.
+The way to do that is with a packages.yaml
file, which can reside in the standard Spack locations (see Configuration below).
+See the Spack docs on external packages for more detail.
+In the above example at time of writing, we would like to build OpenMPI against our installed Slurm 19.05.2
.
+So, you can create file ~/.spack/linux/packages.yaml
with the contents
packages:
+ slurm:
+ paths:
+ slurm@18-08-0-3: /nopt/slurm/18.08.3
+ slurm@19-05-0-2: /nopt/slurm/19.05.2
+
that will enable builds against both installed Slurm versions. +Then you should see
+[$user@el2 ~]$ spack spec openmpi@3.1.3%gcc@7.3.0 ^slurm@19-05-0-2
+Input spec
+--------------------------------
+openmpi@3.1.3%gcc@7.3.0
+ ^slurm@19-05-0-2
+
+Concretized
+--------------------------------
+openmpi@3.1.3%gcc@7.3.0 cflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" cxxflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" fflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" +cuda+cxx_exceptions fabrics=verbs ~java~legacylaunchers~memchecker+pmi schedulers=slurm ~sqlite3~thread_multiple+vt arch=linux-centos7-x86_64
+-
+ ^slurm@19-05-0-2%gcc@7.3.0 cflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" cxxflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" fflags="-O2 -march=skylake-avx512 -mtune=skylake-avx512" ~gtk~hdf5~hwloc~mariadb+readline arch=linux-centos7-x86_64
+
where the Slurm dependency will be satisfied with the installed Slurm (cflags, cxxflags, and arch are coming from site-wide configuration in /nopt/nrel/apps/base/2018-12-02/spack/etc/spack/compilers.yaml
; the variants string is likely coming from the configuration in the Spack database, and should be ignored).
It is possible to specify some packages for which multiple options are available at a higher level.
+For example, mpi
is a virtual package specifier that can resolve to mpich, openmpi, Intel MPI, etc.
+If a package's dependencies are spec'd in terms of a virtual package, Spack will choose a specific package at build time according to site preferences.
+Choices can be constrained by spec, e.g.,
spack install X ^mpich@3
would satisfy package X’s mpi dependency with some version 3 of MPICH. +You can see available providers of a virtual package with
+spack providers <vpackage>
In many cases, frameworks have sub-package installations in standard locations within their own installations.
+A familiar example of this is Python and its usual module location in lib(64)/python<version>/site-packages
, and pointed to via the environment variable PYTHONPATH
.
To find available extensions
+spack extensions <package>
Extensions are just packages, but they are not enabled for use out of the box. To do so (e.g., so that you could load the Python module after installing), you can either load the extension package’s environment module, or
+spack use <extension package>
This only lasts for the current session, and is not of general interest. A more persistent option is to activate the extension:
+spack activate <extension package>
This takes care of dependencies as well. The inverse operation is deactivation.
+Command |
+Description | +
---|---|
spack deactivate <extension package> |
+deactivates extension alone. Will not deactivate if dependents exist | +
spack deactivate --force <extension package> |
+deactivates regardless of dependents | +
spack deactivate --all <extension package> |
+deactivates extension and all dependencies | +
spack deactivate --all <parent> |
+deactivates all extensions of parent (e.g., <python> ) |
+
Spack can auto-create environment modulefiles for the packages that it builds, both in Tcl for “environment modules” per se, and in Lua for Lmod.
+Auto-creation includes each dependency and option permutation, which can lead to excessive quantities of modulefiles.
+Spack also uses the package hash as part of the modulefile name, which can be somewhat disconcerting to users.
+These default behaviors can be treated in the active modules.yaml file, as well as practices used for support.
+Tcl modulefiles are created in $SPACK_ROOT/share/spack/modules
by default, and the equivalent Lmod location is $SPACK_ROOT/share/spack/lmod
.
+Only Tcl modules are created by default.
+You can modify the active modules.yaml file in the following ways to affect some example behaviors:
modules:
+ enable:
+ - tcl
+ - lmod
+
modules:
+ tcl:
+ naming_scheme: ‘{name}/{version}/{compiler.name}-{compiler.version}
+
would achieve the Eagle naming scheme.
+modules:
+ tcl:
+ all:
+ filter:
+ environment_blacklist: [‘CPATH’]
+
Note that this would affect Tcl modulefiles only; if Spack also creates Lmod files, those would still contain default CPATH modification behavior.
+modules:
+ tcl:
+ whitelist: [‘gcc’]
+ blacklist: [‘%gcc@4.8.5’]
+
This would create modules for all versions of GCC built using the system compiler, but not for the system compiler itself. +There are a great many further behaviors that can be changed, see https://spack.readthedocs.io/en/latest/module_file_support.html#modules for more.
+For general user support, it is not a bad idea to keep the modules that are publicly visible separate from the collection that Spack auto-generates. This involves some manual copying, but is generally not onerous as all rpaths are included in Spack-built binaries (i.e., you don’t have to worry about satisfying library dependencies for Spack applications with an auto-built module, since library paths are hard-coded into the application binaries). This separation also frees one from accepting Spack’s verbose coding formats within modulefiles, should you decide to maintain certain modulefiles another way.
+Spack uses hierarchical customization files. +Every package is a Python class, and inherits from the top-level class Package. +Depending on the degree of site customization, you may want to fork the Spack repo to create your own customized Spack package. +There are 4 levels of configuration. In order of increasing precedence,
+$SPACK_ROOT/etc/spack/default
/etc/spack
$SPACK_ROOT/etc/spack
$HOME/.spack
Spack configuration uses YAML files, a subset of JSON native to Python. +There are 5 main configuration files.
+compilers.yaml
. Customizations to the Spack-known compilers for all builds
i. Use full path to compilers
+ii. Additional rpaths beyond the Spack repo
+iii. Additional modules necessary when invoking compilers
+iv. Mixing toolchains
+v. Optimization flags
+vi. Environment modifications
+config.yaml
. Base functionality of Spack itself
i. install_tree: where to install packages
+ii. build_stage: where to do compiles. For performance, can specify a local SSD or a RAMFS.
+iii. modules_roots: where to install modulefiles
+modules.yaml
. How to create modulefiles
i. whitelist/blacklist packages from having their own modulefiles created
+ii. adjust hierarchies
+packages.yaml
. Specific optimizations, such as multiple hardware targets.
i. dependencies, e.g., don’t build OpenSSL (usually want sysadmins to handle updates, etc.)
+ii. mark specific packages as non-buildable, e.g., vendor MPIs
+iii. preferences, e.g., BLAS -> MKL, LAPACK -> MKL
+repos.yaml
i. Directory-housed, not remote
+ii. Specify other package locations
+iii. Can then spec build in other configs (e.g., binary, don’t build)
+iv. Precedence in YAML file order, but follows Spack precedence order (user > site > system > default)
+spack edit …
-- opens Python file for package, can easily write new variants
spack providers
-- virtual packages, e.g., blas, mpi, etc. Standards, not implementations. Abstraction of an implementation (blas/mkl, mpi/mpich, etc.)
MirrorTopLevel
+ package_a
+ package_a-version1.tar.gz
+ package_a-version2.tar.gz
+ package_b
+ ⋮
+
spack mirror
to manage mirrors
packages
+ repo.yaml
+ alpha
+ hotfix-patch-ABC.patch
+ package.py
+ package.pyc
+ beta
+ theta
+
In order to add HPE installed compilers to Kestrel, we can edit the compilers.yaml
file as discussed earlier.
+We can add the 3 PrgEnv of choice (Cray, Intel, Gnu) using the following lines:
- compiler:
+ spec: intel@=2023.2.0
+ modules:
+ - PrgEnv-intel
+ - intel/2023.2.0
+ paths:
+ cc: cc
+ cxx: CC
+ f77: ftn
+ fc: ftn
+ flags: {}
+ operating_system: rhel8
+ target: x86_64
+ environment: {}
+ extra_rpaths: []
+- compiler:
+ spec: cce@=14.0.4
+ modules:
+ - PrgEnv-cray
+ - cce/14.0.4
+ paths:
+ cc: cc
+ cxx: CC
+ f77: ftn
+ fc: ftn
+ flags: {}
+ operating_system: rhel8
+ target: x86_64
+ environment: {}
+ extra_rpaths: []
+- compiler:
+ spec: gcc@=12.1.0
+ modules:
+ - PrgEnv-gnu
+ - gcc/12.1.0
+ paths:
+ cc: cc
+ cxx: CC
+ f77: ftn
+ fc: ftn
+ flags: {}
+ operating_system: rhel8
+ target: x86_64
+ environment: {}
+ extra_rpaths: []
+
Similarly, we can add the HPE provided MPIs (Cray-MPICH) by editing the packages.yaml
file and adding the following:
cray-mpich:
+ externals:
+ - spec: "cray-mpich@8.1.23%intel@2023.2.0"
+ modules:
+ - intel/2023.2.0
+ - cray-dsmml/0.2.2
+ - craype-network-ofi
+ - cray-libsci/22.12.1.1
+ - craype-x86-spr
+ - craype/2.7.19
+ - libfabric/1.15.2.0
+ - cray-mpich/8.1.23
+ - PrgEnv-intel/8.3.3
+ prefix: /opt/cray/pe/mpich/8.1.23/ofi/intel/19.0
+ - spec: "cray-mpich@8.1.23%gcc@12.1.0"
+ modules:
+ - cray-dsmml/0.2.2
+ - craype-network-ofi
+ - cray-libsci/22.12.1.1
+ - craype-x86-spr
+ - craype/2.7.19
+ - libfabric/1.15.2.0
+ - cray-mpich/8.1.23
+ - PrgEnv-gnu/8.3.3
+ - cray-mpich/8.1.23
+ prefix: /opt/cray/pe/mpich/8.1.23/ofi/gnu/9.1
+ - spec: "cray-mpich@8.1.23%cce@14.0.4"
+ modules:
+ - cray-dsmml/0.2.2
+ - craype-network-ofi
+ - cray-libsci/22.12.1.1
+ - craype-x86-spr
+ - craype/2.7.19
+ - libfabric/1.15.2.0
+ - cray-mpich/8.1.23
+ - PrgEnv-cray/8.3.3
+ - cray-mpich/8.1.23
+ prefix: /opt/cray/pe/mpich/8.1.23/ofi/cray/10.0
+ buildable: False
+
This document describes some of the important command line options for various compilers. This includes gcc, gfortran, g++, Intel, Fortran, C, C++, as well as the Cray compilers. The infomation contained herein is not complete but only a small subset of what is available in man pages and full documentation. For example, the man page for gcc is over 21,000 lines long.
+The topics covered include:
+This discussion is for version 12.x. Most options are supported for recent versions of the compilers. Also, most command line options for gcc and g++ are supported for each compiler. It is recommended that C++ programs be compiled with g++ and C programs with gcc.
+# Compile and link a program with the executable sent to the indicated
+ file
+gcc mycode.c -o myexec
+g++ mycode.C -o myexec
+
+# Compile a file but don't link
+gcc -c mycode.c
+g++ -c mycode.C
+
The default optimization level is -O0 on most systems. It is possible that a compiler might be configured to have a different default. One easy way to determine the default is to build a simple application without specifying an optimization level and compare its size to a version compiled with optimization on.
+-O1 Optimize. Optimizing compilation takes somewhat more time, and a
+ lot more memory for a large function.
+
+-O2 Optimize even more. GCC performs nearly all supported
+ optimizations that do not involve a space-speed tradeoff.
+
+-O3 Optimize yet more.
+
+-Ofast Disregard strict standards compliance. -Ofast enables all -O3
+ optimizations. It also enables optimizations that are not valid
+ for all standard-compliant programs.
+
You can discover which optimizations are at various levels of optimization as shown below. The last command will show all potential optimization flags, over 250.
+gcc -c -Q -O3 --help=optimizers > /tmp/O3-opts
+gcc -c -Q -O2 --help=optimizers > /tmp/O2-opts
+diff /tmp/O2-opts /tmp/O3-opts | grep enabled
+
+gcc -Q --help=optimizers
+
-Og Optimize debugging experience. Use instead of -O0. Does sopme
+ optimization but maintains debug information
+
+-g Produce debugging information
+
+gcc -Og -g myprog.c
+
+-p,-pg Generate extra code to write profile information suitable for
+ the analysis program prof (for -p) or gprof
+
file.c
+ C source code that must be preprocessed.
+
+file.i
+ C source code that should not be preprocessed.
+
+file.ii
+ C++ source code that should not be preprocessed.
+
+file.cc
+file.cp
+file.cxx
+file.cpp
+file.CPP
+file.c++
+file.C
+ C++ source code that must be preprocessed.
+
You can specify explicitly the language for file indepenent of the extension using the -x option. For example gcc -x c file.cc will complie the program as C instead of C++.
+-ansi This is equivalent to -std=c90. In C++ mode, it is equivalent to -std=c++98.
+
+
+-std=
+
+c90
+ Support all ISO C90 programs
+
+iso9899:199409
+ ISO C90 as modified in amendment 1.
+
+c99
+ ISO C99.
+
+c11
+ ISO C11, the 2011 revision of the ISO C standard.
+
+c18
+ ISO C17, the 2017 revision of the ISO C standard
+ (published in 2018).
+
+c2x The next version of the ISO C standard, still under
+ development. The support for this version is
+ experimental and incomplete.
+
+
+c++98 The 1998 ISO C++ standard plus the 2003 technical
+ corrigendum and some additional defect reports. Same as
+ -ansi for C++ code.
+
+c++11
+ The 2011 ISO C++ standard plus amendments.
+
+c++14
+ The 2014 ISO C++ standard plus amendments.
+
+c++17
+ The 2017 ISO C++ standard plus amendments.
+
This is a subset of all of the options. There are "gnu" specific versions of many of these which give slight variations. Also, some fo these can be specified in various deprecated flags. The dialects available for the compilers are highly version dependent. Older versions of compiler will not support newer dialects.
+Unless explicitly disabled by the file extension as described above files are preprocessed. If you pass the -E option the file will be preprocessed only and will not be compiled. The output is sent to the standard output
+-fopenmp
+ Enable handling of OpenMP directives
+-fopenmp-simd
+ Enable handling of OpenMP's SIMD directives
+-mgomp
+ Generate code for use in OpenMP offloading
+
Offlading will not work on all platforms and may require additional options.
+ -fopenacc
+ Enable handling of OpenACC directives
+
+ -fopenacc-dim=geom
+ Specify default compute dimensions for parallel offload
+ regions that do not explicitly specify
+ ```
+
+Offlading will not work on all platforms and may require additional options.
+
-Wall + This enables all the warnings about constructions that some + users consider questionable, and that are easy to avoid (or + modify to prevent the warning)
+-Wextra + This enables some extra warning flags that are not enabled by + -Wall.
+This discussion is for version 12.x. Most options are supported for recent versions of the compilers. Also, most command line options for gcc and g++ are supported for gfortran.
+# Compile and link a program with the executable sent to the indicated
+ file
+gfortran mycode.f90 -o myexec
+
+# Compile a file but don't link
+gfortran -c mycode.f90
+
The default optimization level is -O0 on most systems. It is possible that a compiler might be configured to have a different default. One easy way to determine the default is to build a simple application without specifying an optimization level and compare its size to a version compiled with optimization on.
+-O1 Optimize. Optimizing compilation takes somewhat more time, and a
+ lot more memory for a large function.
+
+-O2 Optimize even more. GCC performs nearly all supported
+ optimizations that do not involve a space-speed tradeoff.
+
+-O3 Optimize yet more.
+
+-Ofast Disregard strict standards compliance. -Ofast enables all -O3
+ optimizations. It also enables optimizations that are not valid
+ for all standard-compliant programs.
+
You can discover which optimizations are at various levels of optimization as shown below. The last command will show all potential optimization flags, over 250.
+gfortran -c -Q -O3 --help=optimizers > /tmp/O3-opts
+gfortran -c -Q -O2 --help=optimizers > /tmp/O2-opts
+diff /tmp/O2-opts /tmp/O3-opts | grep enabled
+
+gfortran -Q --help=optimizers
+
-Og Optimize debugging experience. Use instead of -O0. Does sopme
+ optimization but maintains debug information
+
+-g Produce debugging information
+
+-fbacktrace Try to print a back trace on error
+
+-fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion>.
+ Perform various runtime checks. This will slow your program
+ down.
+
+gfortran -Og -g -fbacktrace -fcheck=all myprog.c
+
+-fcheck=<all|array-temps|bits|bounds|do|mem|pointer|recursion>
+ Perform various runtime checks
+
+-p,-pg Generate extra code to write profile information suitable for
+ the analysis program prof (for -p) or gprof
+
.F, .FOR, .FTN, .fpp, .FPP, .F90, .F95, .F03
+ preprocessor is run automatically
+
+.f, .for, .ftn, .f90, .f95, .f03
+ preprocessor is not run automatically
+
f95, f2003, f2008, f2018 Specify strict conformance to the various
+ standards
+
+gnu 2018 with gnu extensions
+
+legacy Older codes
+
+-ffree-form / -ffixed-form The source is in Free / Fixed form
+
The Fortran 90 standard does not indicate the status of variables that leave scope. That is in general, a variable defined in a subroutine may or may not be defined when the subroutine is reentered. There are exceptions for variables in common blocks and those defined in modules.
+For Fortran 95 and later local allocatable variables are automatically deallocated upon exit from a subroutine.
+The flags -fautomatic and -fno-automatic change this behavior.
+-fautomatic Automatically deallocate variables on exit independent of
+ standard setting
+
+-fno-automatic Do not automatically deallocate variables on exit
+ independent of standard setting
+
+-fmax-stack-var-size With this value set to some small value, say 1
+ it appears that variables are not deallocated. A program
+ compiled with this option would in general be nonconformnet.
+
The above applies to allocatable arrays. It is not clean what happens to scalers.
+Some code contains calls to external procedures with mismatches +between the calls and the procedure definition, or with +mismatches between different calls. Such code is non-conforming, +and will usually be flagged with an error. This options degrades +the error to a warning, which can only be disabled by disabling +all warnings via -w. Only a single occurrence per argument is +flagged by this warning. -fallow-argument-mismatch is implied by +-std=legacy.
+It is recomended that source code be modified to have interfaces for routines that are called iwth various types of arguments. Fortran 2018 allows for a generic type for such interfaces. For example here is an interface for MPI_Bcast
+module bcast
+interface
+ subroutine MPI_BCAST(BUF, COUNT, DATATYPE, DEST, COMM, IERROR)
+ type(*),intent(inout) :: BUF
+ !type(*), dimension(..), intent(in) :: BUF
+ integer, intent(in) :: COUNT, DATATYPE, DEST, COMM
+ integer, intent(out) :: IERROR
+ end subroutine
+end interface
+end module
+
Gfortran does not produce listings.
+Automatic preprocessing is determined by the file name extension as discussed above. You can manually turn it on/off via the options
+-cpp - Preprocess
+-nocpp - Don't preprocess
+-cpp -E - Preprocess and send output to standard out. Don't compile
+
-fopenmp Enable handling of OpenMP directives
+-fopenmp-simd Enable handling of OpenMP's SIMD directives
+-mgomp Generate code for use in OpenMP offloading
+
Offlading will not work on all platforms and may require additional options.
+ -fopenacc Enable handling of OpenACC directives
+
+ -fopenacc-dim=geom Specify default compute dimensions for parallel offload
+ regions that do not explicitly specify
+
-fimplicit-none
+ Produce and error message if there are explicitly typed variables.
+
+-fdefault-real-8
+ Set the default real type to an 8 byte wide type. This option also affects the kind of non-double real constants like 1.0.
+
+-pedantic
+ Issue warnings for uses of extensions to Fortran.
+
+ -fall-intrinsics
+ This option causes all intrinsic procedures (including the GNU-specific extensions) to be accepted. This can
+ be useful with -std= to force standard-compliance but get access to the full range of intrinsics available
+ with gfortran.
+
This discussion is for version 2021.6.0. Icc and icpc will be replaced with clang based alternatives in the near future, icx and icpx. In the Cray environment if PrgEnv-intel is loaded the "cc" maps to icc.
+Compile and link a program with the executable sent to the indicated
+ file
+icc mycode.c -o myexec
+icpc mycode.C -o myexec
+
+Compile a file but don't link
+icc -c mycode.c
+icpc -c mycode.C
+
NOTE: The icpc command uses the same compiler options as the icc command. Invoking the compiler using icpc compiles .c and .i files as C++. Invoking the compiler using icc compiles .c and .i files as C. Using icpc always links in C++ libraries. Using icc only links in C++ libraries if C++ source is provided on the command line.
+The default optimization level is -O2.
+-O0 Disables all optimizations.
+
+-O1 Enables optimizations for speed.
+
+-O2 Optimize even more.
+
+-O Same ans -O2
+
+-O3 Optimize yet more.
+
+-Ofast -O3, -no-prec-div, and -fp-model
+
+-no-prec-div enables optimizations that give slightly less precise
+ results than full IEEE division
+
+-fp-model slight decrease in the accuracy of math library functions
+
+-opt_report Generate and optimization report
+
You can learn more about optimizations are at various levels of optimization as shown below.
+icc -V -help opt
+
-g[n]
+ 0 Disables generation of symbolic debug information.
+ 1 Produces minimal debug information for performing stack traces.
+ 2 Produces complete debug information. This is the same as specifying -g with no n.
+ 3 Produces extra information that may be useful for some tools.
+
+-Os Generate extra code to write profile information suitable for
+ the analysis program gprof
+
file.c
+ C source code that must be preprocessed.
+
+file.i
+ C source code that should not be preprocessed.
+
+file.ii
+ C++ source code that should not be preprocessed.
+
+file.cc
+file.cp
+file.cxx
+file.cpp
+file.CPP
+file.c++
+file.C
+ C++ source code that must be preprocessed.
+
You can specify explicitly the language for file indepenent of the extension using the -x option. For example icc -x c file.cc will complie the program as C instead of C++. +
#### Language standard settings (Dialect)
+
c99 + conforms to ISO/IEC 9899:1999 standard for C programs
+c11 + conforms to ISO/IEC 9899:2011 standard for C programs
+c17 + conforms to ISO/IEC 9899:2017 standard for C programs
+c18 + conforms to ISO/IEC 9899:2018 standard for C programs
+c++11 + enables C++11 support for C++ programs
+c++14 + enables C++14 support for C++ programs
+c++17 + enables C++17 support for C++ programs
+c++20 + enables C++20 support for C++ programs
+c89 + conforms to ISO/IEC 9899:1990 standard for C programs
+gnu89 + conforms to ISO C90 plus GNU extensions
+gnu99 + conforms to ISO C99 plus GNU extensions
+gnu++98 + conforms to 1998 ISO C++ standard plus GNU extensions
+gnu++11 + conforms to 2011 ISO C++ standard plus GNU extensions
+gnu++14 + conforms to 2014 ISO C++ standard plus GNU extensions
+gnu++17 + conforms to 2017 ISO C++ standard plus GNU extensions
+gnu++20 c + onforms to 2020 ISO C++ standard plus GNU extensions
+-strict-ansi + Implement a strict ANSI conformance dialect
+```
+
Unless explicitly disabled by the file extension as described above files are preprocessed. If you pass the -E option the file will be preprocessed only and will not be compiled. The output is sent to the standard output
+-fopenmp
+ Enable handling of OpenMP directives
+-qopenmp-stubs
+ Compile OpenMP programs in sequential mode
+-parallel
+ Auto parallelize
+
Not supported
+
Offlading will not work on all platforms and may require additional options.
+-Wall
+ This enables all the warnings about constructions that some
+ users consider questionable, and that are easy to avoid (or
+ modify to prevent the warning)
+
+-Wextra
+ This enables some extra warning flags that are not enabled by
+ -Wall.
+
+-help [category] print full or category help message
+
+Valid categories include
+ advanced - Advanced Optimizations
+ codegen - Code Generation
+ compatibility - Compatibility
+ component - Component Control
+ data - Data
+ deprecated - Deprecated Options
+ diagnostics - Compiler Diagnostics
+ float - Floating Point
+ help - Help
+ inline - Inlining
+ ipo - Interprocedural Optimization (IPO)
+ language - Language
+ link - Linking/Linker
+ misc - Miscellaneous
+ opt - Optimization
+ output - Output
+ pgo - Profile Guided Optimization (PGO)
+ preproc - Preprocessor
+ reports - Optimization Reports
+
+ openmp - OpenMP and Parallel Processing
+
This discussion is for version 2021.6.0. Ifort will be replaced with a clang backend based alternative in the near future, ifx. Ifx will have most of the same options as ifort with some clang additions. In the Cray environment if PrgEnv-intel is loaded the "cc" maps to icc.
+# Compile and link a program with the executable sent to the indicated
+ file
+ifort mycode.f90 -o myexec
+
+# Compile a file but don't link
+ifort -c mycode.c
+
The default optimization level is -O2.
+-O1 optimize for maximum speed, but disable some optimizations which
+ increase code size for a small speed benefit
+
+-O2 optimize for maximum speed (DEFAULT)
+
+-O3 optimize for maximum speed and enable more aggressive
+ optimizations that may not improve performance on some programs
+
+-O same as -O2
+
+-Os enable speed optimizations, but disable some optimizations which
+ increase code size for small speed benefit
+
+-O0 disable optimizations
+
+-Ofast enable -O3 -no-prec-div -fp-model fast=2 optimizations
+
+-fno-alias assume no aliasing in program
+
+-fno-fnalias assume no aliasing within functions, but assume
+ aliasing across calls
+
+-fast enable -xHOST -ipo -no-prec-div -O3 -static -fp-model=fast=2
+ optimizations
+
+-opt_report Generate and optimization report
+
You can learn more about optimizations are at various levels of optimization as shown below.
+ifort -V -help opt
+
-g[n]
+ 0 Disables generation of symbolic debug information.
+ 1 Produces minimal debug information for performing stack traces.
+ 2 Produces complete debug information. This is the same as specifying -g with no n.
+ 3 Produces extra information that may be useful for some tools.
+
+none Disables all check options.
+
+arg_temp_created Determines whether checking occurs for actual
+ arguments copied into temporary storage before routine calls.
+
+assume Determines whether checking occurs to test that the
+ scalar-Boolean-expression in the ASSUME directive is true, or
+ that the addresses in the ASSUME_ALIGNED directive are aligned
+ on the specified byte boundaries.
+
+bounds Determines whether checking occurs for array subscript and
+ character s ubstring expressions.
+
+contiguous Determines whether the compiler checks pointer
+ contiguity at pointer-assignment time.
+
+format Determines whether checking occurs for the data type of an
+ item being formatted for output.
+
+output_conversion Determines whether checking occurs for the fit
+ of data items within a designated format descriptor field.
+
+pointers Determines whether checking occurs for certain
+ disassociated or uninitialized pointers or unallocated
+ allocatable objects.
+
+shape Determines whether array conformance checking is performed.
+
+stack Determines whether checking occurs on the stack frame.
+
+teams Determines whether the run-time system diagnoses
+ non-standard coarray team usage.
+
+udio_iostat Determines whether conformance checking occurs when
+ user-defined derived type input/output routines are executed.
+
+uninit Determines whether checking occurs for uninitialized
+ variables.
+
+ all Enables all check options.
+
+-Os Generate extra code to write profile information suitable for
+ the analysis program gprof
+
Filenames with the suffix .f90 are interpreted as free-form Fortran
+ 95/90 source files.
+
+Filenames with the suffix .f, .for, or .ftn are interpreted as
+ fixed-form Fortran source files.
+
+Filenames with the suffix .fpp, .F, .FOR, .FTN, or .FPP are
+ interpreted as fixed-form Fortran source files, which must be
+ preprocessed by the fpp preprocessor before being compiled.
+
+Filenames with the suffix .F90 are interpreted as free-form Fortran
+ source files, which must be pre-processed by the fpp preprocessor
+ before being compiled.
+
You can specify explicitly the language for file indepenent of the extension using the -x option. For example icc -x c file.cc will complie the program as C instead of C++.
+-stand
+
+none Tells the compiler to issue no messages for nonstandard
+ language elements. This is the same as specifying nostand.
+
+f90 Tells the compiler to issue messages for language elements
+ that are not standard in Fortran 90.
+
+f95 Tells the compiler to issue messages for language elements
+ that are not standard in Fortran 95.
+
+f03 Tells the compiler to issue messages for language elements
+ that are not standard in Fortran 2003.
+
+f08 Tells the compiler to issue messages for language elements
+ that are not standard in Fortran 2008.
+
+f18 Tells the compiler to issue messages for language elements
+ that are not standard in Fortran 2018. This option is set if you
+ specify warn stderrors.
+
-list
+
Unless explicitly enabled by the file extension as described above files are not preprocessed. If you pass the -E option the file will be preprocessed only and will not be compiled. The output is sent to the standard output. The option **-fpp ** will force running the preprocessor.
+-fopenmp
+ Enable handling of OpenMP directives
+-qopenmp-stubs
+ Compile OpenMP programs in sequential mode
+-parallel
+ Auto parallelize
+
Not supported
+
-coarray[=keyword] Enables the coarray feature where keyword
+ Specifies the memory system where the coarrays will be
+ implemented. Possible values are:
+
+shared Indicates a shared memory system. This is the default.
+
+distributed Indicates a distributed memory system.
+
+single Indicates a configuration where the image does not
+ contain self-replication code. This results in an executable with
+ a single running image. This configuration can be useful for
+ debugging purposes, even though there are no inter-image
+ interactions.
+
-save Causes variables to be placed in static memory.
+
+
+Default: This option saves all variables in static allocation
+ except local variables within a recursive routine and variables
+ declared as AUTOMATIC.
+
+-auto-scalar Scalar variables of intrinsic types INTEGER, REAL,
+ COMPLEX, and LOGICAL are allocated to the run-time stack unless
+ the routine is recursive of OpenMP For Fortran 95 and later
+ variables are not saved by default and allocatable arrays are
+ deallocated. This appears to be true ifort even if the standard
+ is set to f90. However, it is poor practice to rely on this
+ behavior.
+
+
+-Wall. This enables all the warnings about constructions that some
+ users consider questionable, and that are easy to avoid (or
+ modify to prevent the warning)
+
+-warn declarations Generate warnings for variables that are not
+ explicitly typed.
+
+-Wextra This enables some extra warning flags that are not
+ enabled by -Wall.
+-save Causes variables to be placed in static memory.
+
+
+Default: This option saves all variables in static allocation
+ except local variables within a recursive routine and variables
+ declared as AUTOMATIC.
+
+-auto-scalar Scalar variables of intrinsic types INTEGER, REAL,
+ COMPLEX, and LOGICAL are allocated to the run-time stack unless
+ the routine is recursive of OpenMP For Fortran 95 and later
+ variables are not saved by default and allocatable arrays are
+ deallocated. This appears to be true ifort even if the standard
+ is set to f90. However, it is poor practice to rely on this
+ behavior.
+
+
+-Wall. This enables all the warnings about constructions that some
+ users consider questionable, and that are easy to avoid (or
+ modify to prevent the warning)
+
+-warn declarations Generate warnings for variables that are not
+ explicitly typed.
+
+-Wextra This enables some extra warning flags that are not
+ enabled by -Wall.
+
+
+-help [category] print full or category help message
+
+Valid categories include
+ advanced - Advanced Optimizations
+ codegen - Code Generation
+ compatibility - Compatibility
+ component - Component Control
+ data - Data
+ deprecated - Deprecated Options
+ diagnostics - Compiler Diagnostics
+ float - Floating Point
+ help - Help
+ inline - Inlining
+ ipo - Interprocedural Optimization (IPO)
+ language - Language
+ link - Linking/Linker
+ misc - Miscellaneous
+ opt - Optimization
+ output - Output
+ pgo - Profile Guided Optimization (PGO)
+ preproc - Preprocessor
+ reports - Optimization Reports
+
+ openmp - OpenMP and Parallel Processing
+
In the Cray environment cc is a generic call for several different compilers. The compile actually called is determined by the modules loaded. Here we discuss Cray C : Version 14.0.4. cc will detect if the program being compiled calls MPI routines. If so, it will call the program as MPI. Cray C : Version 14.0.4 is clang based with Cray enhancements
+# Compile and link a program with the executable sent to the indicated
+ file
+cc mycode.c -o myexec
+
+# Compile a file but don't link
+cc -c mycode.c
+
The default optimization level is -O0.
+-O0, -O1, -O2, -O3, -Ofast, -Os, -Oz, -Og, -O, -O4 Specify which
+ optimization level to use:
+
+-O0 Means "no optimization": this
+ level compiles the fastest and generates the most debuggable
+ code.
+
+-O1 Somewhere between -O0 and -O2.
+
+-O2 Moderate level of optimization which enables most
+ optimizations.
+
+-O3 Like -O2, except that it enables optimizations that take
+ longer to perform or that may generate larger code (in an attempt
+ to make the program run faster).
+
+-Ofast Enables all the optimizations from -O3 along with other
+ aggressive optimizations that may violate strict compliance with
+ language standards.
+
+-Os Like -O2 with extra optimizations to reduce code size.
+
+-Oz Like -Os (and thus -O2), but reduces code size further.
+
+-Og Like -O1. In future versions, this option might disable
+ different optimizations in order to improve debuggability.
+
+-O Equivalent to -O1.
+
+-O4 and higher Currently equivalent to -O3
+
Option Run Time (sec)
+-O0 10.30
+-O1 3.19
+-O2 2.99
+-O3 2.04
+-Ofast 1.88
+-Ofast -flto 1.49
+-Os 3.19
+-Oz 3.31
+-Og 3.19
+-O 3.20
+
-fstandalone-debug
+ Turn off the stripping of some debug information that might be useful to some debuggers
+
+-feliminate-unused-debug-types
+ By default, Clang does not emit type information for types that are defined but not
+ used in a program. To retain the debug info for these unused types, the negation
+ -fno-eliminate-unused-debug-types can be used.
+
+-fexceptions
+ Enable generation of unwind information. This allows exceptions to be thrown through
+ Clang compiled stack frames. This is on by default in x86-64.
+
+-ftrapv
+ Generate code to catch integer overflow errors. Signed integer overflow is undefined
+ in C. With this flag, extra code is generated to detect this and abort when it happens.
+
file.c
+ C source code that must be preprocessed.
+
+file.i
+ C source code that should not be preprocessed.
+
+file.ii
+ C++ source code that should not be preprocessed.
+
+file.cc
+file.cp
+file.cxx
+file.cpp
+file.CPP
+file.c++
+file.C
+ C++ source code that must be preprocessed.
+file.upc
+ UPC
+
Standards are determined by the file extension as given above. Some addttional checks can be performed.
+ -std=<standard>
+
Specify the language standard to compile for.
+ISO C 1999 with GNU extensions
+iso9899:1990
+ISO C 2011
+ISO C 2011 with GNU extensions
+ISO C 2017
+ISO C 2017 with GNU extensions
+The default C language standard is gnu17
+ISO C++ 1998 with amendments
+ISO C++ 1998 with amendments and GNU extensions
+ISO C++ 2011 with amendments
+ISO C++ 2011 with amendments and GNU extensions
+ISO C++ 2014 with amendments
+ISO C++ 2014 with amendments and GNU extensions
+ISO C++ 2017 with amendments
+ISO C++ 2017 with amendments and GNU extensions
+Working draft for ISO C++ 2020
+Working draft for ISO C++ 2020 with GNU extensions
+The default OpenCL language standard is cl1.0.
+-fsave-loopmark Generate a loopmark listing file (.lst) that shows which optimizations
+ were applied to which parts of the source code.
+
+-floopmark-style=<style> Specifies the style of the loopmark listing file.
+
+ Valid values for <style> are:
+ ''grouped'' Places all messages at the end of the listing.
+ ''interspersed'' Places each message after the relevant source code line.
+
Automatic preprocessing is determined by the file name extension as discussed above. You can manually turn it on/off via the options
+ -E with output going to standard out
+
The compiler predefines the macro cray in addition to all of the usual Clang predefined macros.
+-fopenmp Enables OpenMP and links in OpenMP libraries
+
Not suported
+
Unified Parallel C (UPC) Options
+-hupc, -hdefault -hupc Configures the compiler driver to expect
+ UPC source code. Source files with a .upc extension are
+ automatically treated as UPC code, but this option permits a file
+ with any other extension (typically .c) to be understood as UPC
+ code. -hdefault cancels this behavior; if both -hupc and
+ -hdefault appear in a command line, whichever appears last takes
+ precedence and applies to all source files in the command line.
+
+-fupc-auto-amo, -fno-upc-auto-amo Automatically use network
+ atomics for remote updates to reduce latency. For example, x +=
+ 1 can be performed as a remote atomic add. If an update is
+ recognized as local to the current thread, then no atomic is
+ used. These atomics are intended as a performance optimization
+ only and shall not be relied upon to prevent race conditions.
+ Enabled at -O1 and above.
+
+-fupc-buffered-async, -fno-upc-buffered-async Set aside memory in
+ the UPC runtime library for aggregating random remote accesses
+ designated with "#pragma pgas buffered_async". Disabled by
+ default.
+
+-fupc-pattern, -fno-upc-pattern Identify simple communication
+ loops and aggregate the remote accesses into a single function
+ call which replaces the loop. Enabled at -O1 and above.
+
+-fupc-threads=<N> Set the number of threads for a static THREADS
+ translation. This option causes __UPC_STATIC_THREADS__ to be
+ defined instead of __UPC_DYNAMIC_THREADS__ and replaces all uses
+ of the UPC keyword THREADS with the value N.
+
In the Cray environment ftn is a generic call for several different compilers. The compile actually called is determined by the modules loaded. Here we discuss Cray Fortran : Version 14.0.4. Ftn will detect if the program being compiled calls MPI routines. If so, it will call the program as MPI.
+# Compile and link a program with the executable sent to the indicated
+ file
+ftn mycode.f90 -o myexec
+
+# Compile a file but don't link
+ftn -c mycode.f90
+
The default optimization level is -O 2.
+-O
+
+0 Specifies no automatic cache management; all memory
+ references are allocated to cache. Both automatic cache blocking
+ and manual cache blocking (by use of the BLOCKABLE directive) are
+ shut off. Characteristics include low compile time. This option
+ is compatible with all optimization levels.
+
+1 Specifies conservative automatic cache management.
+ Characteristics include moderate compile time. Symbols are
+ placed in the cache when the possibility of cache reuse exists
+ and the predicted cache footprint of the symbol in isolation is
+ small enough to experience reuse.
+
+2 Specifies moderately aggressive automatic cache management.
+ Characteristics include moderate compile time. Symbols are
+ placed in the cache when the possibility of cache reuse exists
+ and the pre†dicted state of the cache model is such that the
+ symbol will be reused. (Default)
+
+3 Specifies aggressive automatic cache management.
+ Characteristics include potentially high compile time. Symbols
+ are placed in the cache when the possibility of cache reuse
+ exists and the allocation of the symbol to the cache is
+ predicted to increase the number of cache hits.
+
+fast Same as 3.
+
-G (level)
+
+ 0 Full information is available for debugging, but at the cost
+ of a slower and larger executable. Breakpoints can be set at
+ each line. Most optimizations are disabled.
+
+ 1 Most information is available with partial optimization. Some
+ optimizations make tracebacks and limited breakpoints available
+ in the debugger. Some scalar optimizations and all loop nest
+ re†structuring is disabled, but the source code will be
+ visible and most symbols will be available.
+
+ 2 Partial information. Most optimizations, tracebacks and very
+ limited breakpoints are available in the debugger. The source
+ code will be visible and some symbols will be available.
+
+
+-R runchk Specifies any of a group of runtime checks for your
+ program. To specify more than one type of checking, specify
+ consecutive runchk arguments, as follows: -R bs.
+
+
+ b Enables checking of array bounds. Bounds checking is not
+ performed on arrays dimensioned as (1). Enables -Ooverindex.
+
+ c Enables conformance checking of array operands in array
+ expressions.
+
+ d Enables a run time check for the !dir$ collapse directive and
+ checks the validity of the loop_info count information.
+
+ p Generates run time code to check the association or allocation
+ status of referenced POINTER variables, ALLOCATABLE arrays, or
+ assumed-shape arrays.
+
+ s Enables checking of character substring bounds.
+
The default is fixed for source files that have .f, .F, .for, or .FOR
+The default is free for source files that have .f90, .F90, .f95, .F95, .f03, .F03, .f08, .F08, .f18, .F18, .ftn, or .FTN
+The upper-case file extensions, .F, .FOR, .F90, .F95, .F03, .F08, .F18, or .FTN, will enable source preprocessing by default.
+Standards are determined by the file extension as given above. Some addttional checks can be performed.
+-e enable
+
+ b If enabled, issue a warning message rather than an error
+ message when the compiler detects a call to a procedure
+ with one or more dummy arguments having the TARGET,
+ VOLATILE or ASYNCHRONOUS attribute and there is not an
+ explicit interface definition.
+
+
+ c Interface checking: use Cray system modules to check
+ library calls in a compilation. If you have a procedure
+ with the same name as one in the library, you will get
+ errors, as the compiler does not skip user- specified
+ procedures when performing checks.
+
+
+ C Enable/disable some types of standard call site
+ checking. The current Fortran standard requires that the
+ number and types of arguments must agree between the caller
+ and callee. These constraints are enforced in cases where
+ the compiler can detect them, however, specifying -dC
+ disables some of this error-checking, which may be
+ necessary in order to get some older Fortran codes to
+ compile.
+
+-f source_form free or fixed
+
-e v Allocate variables to static storage. These variables
+ are treated as if they had appeared in a SAVE statement. Variables
+ that are explicitly or implicitly defined as automatic variables are
+ not allocated to static storage. The following types of variables are
+ not allocated to static storage: automatic variables (explicitly or
+ implicitly stated), variables declared with the AUTOMATIC attribute,
+ variables allocated in an ALLOCATE statement, and local
+ variables in explicit recursive procedures. Variables with the
+ ALLOCATABLE attribute remain allocated upon procedure exit, unless
+ explicitly deallocated, but they are not allocated in static memory.
+ Variables in explicit recursive procedures consist of those in
+ functions, in subroutines, and in internal procedures within
+ functions and subroutines that have been defined with the RECURSIVE
+ attribute. The STACK compiler directive overrides this option.
+
-h list=a
+Automatic preprocessing is determined by the file name extension as discussed above. You can manually turn it on/off via the options
+ -E Preprocess and compile
+ -eZ Preprocess and compile
+ -eP Preprocess don' compile
+
gfortran -cpp -E file.F90 > file.f90
+ftn file.f80
+
-homp Enables OpenMP and links in OpenMP libraries when possible
+ using CCE-Classic.
+
+-hnoomp Disables OpenMP and links in non-OpenMP libraries when
+ using CCE-classic.
+
+THE FOLLOWING APPLIE IF THE BACKEND COMPILER IS NOT CRAY FORTRAN.
+
+-fopenmp Enables OpenMP and links in OpenMP libraries when possible
+ using CCE, AOCC, and GNU.
+
+-openmp Enables OpenMP and links in OpenMP libraries when
+ possible.
+
+-noopenmp Disables OpenMP.
+
+-mp Enables OpenMP and links in OpenMP libraries when
+ possible using PGI.
+
+-Mnoopenmp Disables OpenMP and links in non-OpenMP libraries when
+ using PGI.
+
+-qopenmp Enables OpenMP and links in OpenMP libraries when
+ possible when using Intel.
+
+-qno-openmp Disables OpenMP and links in non-OpenMP libraries
+ when possible when using Intel.
+
-h acc
+
The -h pgas_runtime option directs the compiler driver to link with the runtime libraries required when linking programs that use UPC or coarrays. In general, a resource manager job launcher such as aprun or + srun must be used to launch the resulting executable.
+-e I Treat all variables as if an IMPLICIT NONE statement had been specified.
+
Singularity has been deprecated in favor of a new container application called Apptainer. Apptainer is a decendent of singularity. Apptainer will run Singularity containers and it supports Singularity commands. On Kestrel, singularity
is an alias for apptainer
and the two commands can be used interchangeably in most instances. However, since Singularity is deprecated, it is advised to use Apptainer.
More information about Apptainer can be found at https://apptainer.org.
+Apptainer is installed on compute nodes and is accessed via a module named apptainer
.
The directory /nopt/nrel/apps/software/apptainer/1.1.9/examples
+holds a number of containers and an example script that shows how to run containers hosting MPI programs across multiple nodes.
Before we get to more complicated examples, we'll first look at downloading and working with a simple remote image.
+Input commands are preceded by a $
.
$ ssh <username>@kestrel.hpc.nrel.gov
+[$kuser@kl1 ~]$ salloc --exclusive --mem=0 --tasks-per-node=104 --nodes=1 --time=01:00:00 --account=MYACCOUNT --partition=debug
+[$kuser@x1000c0s0b0n0 ~]$ cat /etc/redhat-release
+Red Hat Enterprise Linux release 8.6 (Ootpa)
+
[$kuser@x1000c0s0b0n0 ~]$ module purge
+[$kuser@x1000c0s0b0n0 ~]$ module load apptainer
+
[$kuser@x1000c0s0b0n0 ~]$ cd /scratch/$USER
+[$kuser@x1000c0s0b0n0 $kuser]$ mkdir -p apptainer-images
+[$kuser@x1000c0s0b0n0 $kuser]$ cd apptainer-images
+[$kuser@x1000c0s0b0n0 apptainer-images]$ apptainer pull --name hello-world.simg shub://vsoch/hello-world
+Progress |===================================| 100.0%
+
[$kuser@x1000c0s0b0n0 apptainer-images]$ apptainer inspect hello-world.simg # Shows labels
+{
+ "org.label-schema.usage.apptainer.deffile.bootstrap": "docker",
+ "MAINTAINER": "vanessasaur",
+ "org.label-schema.usage.apptainer.deffile": "apptainer",
+ "org.label-schema.schema-version": "1.0",
+ "WHATAMI": "dinosaur",
+ "org.label-schema.usage.apptainer.deffile.from": "ubuntu:14.04",
+ "org.label-schema.build-date": "2017-10-15T12:52:56+00:00",
+ "org.label-schema.usage.apptainer.version": "2.4-feature-squashbuild-secbuild.g780c84d",
+ "org.label-schema.build-size": "333MB"
+}
+[$kuser@x1000c0s0b0n0 apptainer-images]$ apptainer inspect -r hello-world.simg # Shows the script run
+#!/bin/sh
+
+exec /bin/bash /rawr.sh
+
[$kuser@x1000c0s0b0n0 apptainer-images]$ apptainer run hello-world.simg
+RaawwWWWWWRRRR!! Avocado!
+
As mentioned above, there is a script in the apptainer directory that shows how MPI applications built inside a container image can be run on multiple nodes. We'll look at 5 containers with different versions of MPI. Each container has two MPI programs installed, a glorified Hello World (phostone) and PingPong (ppong). The 5 versions of MPI are:
+"ch*" can be thought as a "lower level" communications protocol. A MPICH container might be built with either but we have found that ch4 is considerably faster on Kestrel.
+The script can be found at /nopt/nrel/apps/software/apptainer/1.1.9/examples/script and at https://github.com/NREL/HPC/blob/master/kestrel/apptainer/script
+Here is a copy:
+#!/bin/bash
+#SBATCH --job-name="apptainer"
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=2
+#SBATCH --exclusive
+#SBATCH --export=ALL
+#SBATCH --time=02:00:00
+#SBATCH --output=apptainer.log
+#SBATCH --mem=0
+
+export STARTDIR=`pwd`
+export CDIR=/nopt/nrel/apps/software/apptainer/1.1.9/examples
+mkdir $SLURM_JOB_ID
+cd $SLURM_JOB_ID
+
+cat $0 > script
+printenv > env
+
+touch warnings
+touch output
+
+module load apptainer
+which apptainer >> output
+
+echo "hostname" >> output
+hostname >> output
+
+echo "from alpine.sif" >> output
+ apptainer exec $CDIR/alpine.sif hostname >> output
+echo "from alpine.sif with srun" >> output
+srun -n 1 --nodes=1 apptainer exec $CDIR/alpine.sif cat /etc/os-release >> output
+
+
+export OMP_NUM_THREADS=2
+
+$CDIR/tymer times starting
+
+MPI=pmix
+for v in openmpi intel mpich_ch4 mpich_ch4b mpich_ch3; do
+ srun --mpi=$MPI apptainer exec $CDIR/$v.sif /opt/examples/affinity/tds/phostone -F > phost.$v 2>>warnings
+ $CDIR/tymer times $v
+ MPI=pmi2
+ unset PMIX_MCA_gds
+done
+
+MPI=pmix
+#skip mpich_ch3 because it is very slow
+for v in openmpi intel mpich_ch4 mpich_ch4b ; do
+ srun --mpi=$MPI apptainer exec $CDIR/$v.sif /opt/examples/affinity/tds/ppong> ppong.$v 2>>warnings
+ $CDIR/tymer times $v
+ MPI=pmi2
+ unset PMIX_MCA_gds
+done
+
+$CDIR/tymer times finished
+
+mv $STARTDIR/apptainer.log .
+
We set the variable CDIR
which points to the directory from which we will get our containers.
We next create a directory for our run and go there. The cat
and printenv
commands give us a copy of our script and the environment in which we are running. This is useful for debugging.
Before we run the MPI containers, we run the command hostname
from inside a very simple container alpine.sif
. We show containers can be run without/with srun
. In the second instance we cat /etc/os-release
to show we are running a different OS.
Then we get into the MPI containers. This is done in a loop over containers containing the MPI versions: openmpi, intelmpi, mpich_ch4, mpich_ch4b, and mpich_ch3.
+The application tymer is a simple wall clock timer.
+The --mpi=
option on the srun line instructs slurm how to launch jobs. The normal option is --mpi=pmi2
. However, containers using OpenMPI might need to use the option --mpi=pmix
as we do here.
The first loop just runs a quick "hello world" example. The second loop runs a pingpong test. We skip the mpich_ch3 pingpong test because it runs very slowly.
+You can see example output from this script in the directory:
+/nopt/nrel/apps/software/apptainer/1.1.9/examples/output/
+
The directory /nopt/nrel/apps/software/apptainer/1.1.9/examples/defs
contains the recipes for the containers. The containers apptainer.sif
and intel.sif
were built in two steps using app_base.def - apptainer.def and mods_intel.def - intel.def. They can also be found in the HPC code examples repository.
The script sif2def
can be used to generate a recipe from a container. It has not been extensively tested, so it may not work for all containers and is provided here "as is."
Images can be generated from a recipe.
+This example shows how to create a Ubuntu singularity image with openmpi installed. The recipe is shown in pieces to make it easier to describe what each section does. The complete recipe can be found in the defs
directory. Building containers normally requires root/admin priviledges so the build process must be run on a user's computer with apptainer installed. After creation, the image can be copied to Kestrel and run.
Bootstrap: docker
+from: ubuntu:latest
+
%environment
+ export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
+ export PMIX_MCA_gds=^ds12
+
%post
+ echo "Installing basic development packages..."
+ export DEBIAN_FRONTEND=noninteractive
+ apt-get update
+ apt-get install -y bash gcc g++ gfortran make curl python3
+
echo "Installing OPENMPI..."
+ curl https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.gz --output openmpi-4.1.5.tar.gz
+ mkdir -p /opt/openmpi/src
+ tar -xzf openmpi-4.1.5.tar.gz -C /opt/openmpi/src
+ cd /opt/openmpi/src/*
+ ./configure
+ make install
+
echo "Build OPENMPI example..."
+ export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
+ cd /opt/openmpi/src/*/examples
+ mpicc ring_c.c -o /usr/bin/ring
+
/usr/bin/ring
+
sudo $(type -p apptainer) build small.sif ubuntu-mpi.def
+
[kuser@kl1 ~]$ salloc --exclusive --mem=0 --tasks-per-node=104 --nodes=2 --time=01:00:00 --account=MYACCOUNT --partition=debug
+salloc: Granted job allocation 90367
+salloc: Waiting for resource configuration
+salloc: Nodes x3000c0s25b0n0,x3000c0s27b0n0 are ready for job
+[kuser@x3000c0s25b0n0 ~]$ module load apptainer
+[kuser@x3000c0s25b0n0 ~]$ srun -n 8 --tasks-per-node=4 --mpi=pmix apptainer run small.sif
+Process 2 exiting
+Process 3 exiting
+Process 0 sending 10 to 1, tag 201 (8 processes in ring)
+Process 0 sent to 1
+Process 0 decremented value: 9
+Process 0 decremented value: 8
+Process 0 decremented value: 7
+Process 0 decremented value: 6
+Process 0 decremented value: 5
+Process 0 decremented value: 4
+Process 0 decremented value: 3
+Process 0 decremented value: 2
+Process 0 decremented value: 1
+Process 0 decremented value: 0
+Process 0 exiting
+Process 1 exiting
+Process 5 exiting
+Process 6 exiting
+Process 7 exiting
+Process 4 exiting
+[kuser@x3000c0s25b0n0 ~]$
+
Containers provide a method of packaging your code so that it can be run anywhere you have a container runtime. This enables you to create a container on your local laptop and then run it on an HPC system or other computing resource. Containers provide an alternative way of isolating and packaging your code from solutions such as Conda environments.
+The most common container runtime outside of HPC is Docker. Docker is not suited for HPC environments and is therefore not available on the system currently. Singularity is an alternative container tool which is provided.
+Singularity is able to run most Docker images, but Docker is unable to run Singularity images. A key consideration when deciding to containerize an application is which container engine to build with. A suggested best practice is to build images with Docker when possible, as this provides more flexibility. Sometimes this is not possible though, and you may have to build with Singularity or maintain separate builds for each container engine.
+Both Singularity and Docker provide the ability to use hardware based features on the HPC systems such as GPUs. A common usage for containers is packaging of GPU enabled tools such as TensorFlow. Singularity enables access to the GPU and driver on the host. In principle, the MPI installations can be also be accessed from correctly configured containers, but care is needed to ensure compatibility between the libraries on the host and container.
+Containers are built from a container specification file, Dockerfiles for Docker or Singularity Definition File in Singularity. These files specify the steps necessary to create the desired package and the additional software packages to install and configure in this environment. +
FROM ubuntu:20.04
+
+RUN apt-get -y update && apt-get install -y python3
+
The above Dockerfile illustrates the build steps to create a simple image. Images are normally built from a base image indicated by FROM
, in this case Ubuntu. The ability to use a different base image provides a way to use packages which may work more easily on one Linux Distribution. For example the Linux distribution on Eagle is CentOS, building the above image would allow the user to install packages from Ubuntu repositories.
The RUN
portion of the above Dockerfile indicates the command to run, in this example it installs the Python 3 package. Additional commands such as COPY
, ENV
, and others enable the customization of your image to suit your compute environment requirements.
Singularity definition files have a similar format, as described in the documentation.
+Note that building Docker or Singularity images requires root/admin privileges and cannot be done on the HPC systems. Docker is available on most platforms, and users with admin privileges on a local machine can build Docker images locally. The Docker image file can then be pushed to a registry and pulled on the HPC system using Singularity as described here, or a tool such as Docker2Singularity may be used to convert the image to a Singularity format. Alternatively, users with admin privileges on a Linux system can run Singularity locally to build images. Another option is to use Singularity Container Service, which provides free accounts with a limited amount of container build time.
+ + + + + + + + +Container registries enable users to store container images. An overview of the steps to use each fo the main container registries available to NREL users is provided below. Registries can enable reproducibility by storing tagged versions of containers, and also facilitate transferring images easily between different computational resources.
+Docker is not supported on NREL's HPC systems including Eagle. Instead Singularity is the container engine provided as a module. Singularity is able to pull Docker images and convert them to Singularity images. Although not always possible, we suggest creating Docker images when possible to ensure portability between compute resources and using Singularity to convert the image if it is to be run on an HPC system.
+Registry | +Eagle Access | +AWS Access | +Docker Support | +Singularity Support | +
---|---|---|---|---|
Harbor | +Yes | +No | +Yes | +Yes | +
AWS ECR | +Yes | +Yes | +Yes | +No* | +
DockerHub | +Yes | +Yes | +Yes | +No* | +
*for DockerHub and AWS ECR it may be possible to push images using ORAS, but this was not found to be a streamlined process in testing. | ++ | + | + | + |
AWS ECR can be utilized by projects with a cloud allocation to host containers. ECR primarily can be used with Docker containers, although Singularity should also be possible.
+NREL's Harbor is a registry hosted by ITS that supports both Docker and Singularity containers.
+**NREL ITS is currently evaluating a replacement to internally hosted Harbor (likely moving to Enterprise DockerHub)
+On your local machine to push a container to the registry. +
docker login harbor.nrel.gov
+
docker tag SOURCE_IMAGE[:TAG] harbor.nrel.gov/REPO/IMAGE[:TAG]
+
docker push harbor.nrel.gov/REPO/IMAGE[:TAG]
+
Pull and convert container to Singularity on Eagle.
+Note: --nohttps
is not optimal but need to add certs for NREL otherwise there is a cert error.
+
singularity pull --nohttps --docker-login docker://harbor.nrel.gov/REPO/IMAGE[:TAG]
+
The container should now be downloaded and usable as usual
+Under your User Profile in Harbor obtain and export the following information +
export SINGULARITY_DOCKER_USERNAME=<harbor username>
+export SINGULARITY_DOCKER_PASSWORD=<harbor CLI secret>
+
singularity push <image>.sif oras://harbor.nrel.gov/<PROJECT>/<IMAGE>:<TAG>
+
singularity pull oras://harbor.nrel.gov/<PROJECT>/<IMAGE>:<TAG>
+
Currently under testing, and not generally available
+To get the needed credentials for NREL Dockerhub, select your username in the top right -> Account -> Security -> Create a new access token.
+The dialog box will describe how to use the security token with docker login
to enable pulling and pushing containers.
As discussed in Intro to Containers, Singularity is a platform designed specifically for running containers on HPC systems. Images can be built locally and copied to the HPC system or pulled from an online registry. For more information about building containers, see here.
+The table below shows the appropriate commands for loading Singularity on each system:
+System | +Module command | +
---|---|
Eagle | +module load singularity-container |
+
Swift | +module load singularity |
+
Vermilion | +module load singularity |
+
Kestrel | +module load apptainer |
+
Note
+Singularity has been deprecated in favor of a new container application called Apptainer. For more information about Apptainer and using it on Kestrel, see Apptainer.
+The following example shows how to download and run a simple "hello-world" container based on Ubuntu. The example is written for Eagle but can be adapated to other systems by using the appropriate module command.
+Note
+Input commands in the following examples are preceded by a $
.
Step 1: Log into compute node, checking it is running CentOS 7
+$ ssh eagle.hpc.nrel.gov
+[$USER@el1 ~]$ srun -A MYALLOCATION -t 60 -N 1 --pty $SHELL
+[$USER@r1i3n18 ~]$ cat /etc/redhat-release
+CentOS Linux release 7.7.1908 (Core)
+
Step 2: Load the singularity-container
module
[$USER@r1i3n18 ~]$ module purge
+[$USER@r1i3n18 ~]$ module load singularity-container
+
Step 3: Retrieve hello-world
image. Be sure to use /scratch
, as images are typically large
[$USER@r1i3n18 ~]$ cd /scratch/$USER
+[$USER@r1i3n18 $USER]$ mkdir -p singularity-images
+[$USER@r1i3n18 $USER]$ cd singularity-images
+[$USER@r1i3n18 singularity-images]$ singularity pull --name hello-world.simg shub://vsoch/hello-world
+Progress |===================================| 100.0%
+Done. Container is at: /lustre/eaglefs/scratch/$USER/singularity-images/hello-world.simg
+
Step 4: Run image default script
+[$USER@r1i3n18 singularity-images]$ singularity run hello-world.simg
+RaawwWWWWWRRRR!! Avocado.
+
Note
+Running the image may produces errors such as:
+ERROR: ld.so: object '/nopt/xalt/xalt/lib64/libxalt_init.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
+
This can be resolved by unsetting LD_PRELOAD
:
$ unset LD_PRELOAD
+
Step 5: Run in singularity bash shell
+[$USER@r1i3n18 singularity-images]$ cat /etc/redhat-release
+CentOS Linux release 7.7.1908 (Core)
+[$USER@r1i3n18 singularity-images]$ cat /etc/lsb-release
+cat: /etc/lsb-release: No such file or directory
+
+[$USER@r1i3n18 singularity-images]$ singularity shell hello-world.simg
+Singularity: Invoking an interactive shell within container...
+
+Singularity hello-world.simg:~> cat /etc/lsb-release
+DISTRIB_ID=Ubuntu
+DISTRIB_RELEASE=14.04
+DISTRIB_CODENAME=trusty
+DISTRIB_DESCRIPTION="Ubuntu 14.04.5 LTS"
+Singularity hello-world.simg:~> cat /etc/redhat-release
+cat: /etc/redhat-release: No such file or directory
+
Documentation: GDB
+GDB is GNU's command line interface debugging tool.
+GDB is available on NREL machines and supports a number of languages, including C, C++, and Fortran.
+When using GDB, make sure the program you are attempting to debug has been compiled with the -g
debug flag and with the -O0
optimization flag to achieve the best results.
Run GDB with the following command: gdb --args my_executable arg1 arg 2 arg3
+This will launch gdb running my_executable
, and passes arguments arg1
, arg2
, and arg3
to my_executable
.
For links to in-depth tutorials and walkthroughs of GDB features, please see Resources.
+Eagle | +Swift | +Vermilion | +
---|---|---|
gdb/7.6.1* | +gdb/8.2* | +gdb/12.1, gdb/8.2* | +
* Located in /usr/bin
. Do not need to use module load
.
A web app for interactive Python in a browser
+import chart_studio.plotly as py
+import plotly.figure_factory as ff
+import pandas as pd
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+x = np.linspace(0, 5, 10)
+y = x ** 2
+n = np.array([0,1,2,3,4,5])
+xx = np.linspace(-0.75, 1., 100)
+
+fig, axes = plt.subplots(1, 4, figsize=(12,3))
+
+axes[0].scatter(xx, xx + 1.25*np.random.randn(len(xx)))
+#axes[0].scatter(xx, xx + 0.25*np.random.randn(len(xx)))
+axes[0].set_title("scatter")
+
+axes[1].step(n, n**2.0, lw=2)
+axes[1].set_title("step")
+
+axes[2].bar(n, n**2, align="center", width=0.5, alpha=0.5)
+axes[2].set_title("bar")
+
+axes[3].fill_between(x, x**2.5, x**3, color="green", alpha=0.5);
+axes[3].set_title("fill_between");
+
* Multi-user "backend" server
+* Controls launching the single-user Jupyter server
+* NREL's "Europa" runs Jupyterhub
+
(In general, don't worry about JupyterHub--unless you're a sysadmin)
+* The single-user server/web interface
+* Create/save/load .ipynb notebook files
+* What users generally interact with
+
* An individual .pynb file
+* Contains your Python code and visualizations
+* Sharable/downloadable
+
* A "nicer" web interface for Jupyter - "notebooks 2.0"
+* Preferred by some
+* Lacking some features of "classic" notebooks
+
* The Python environment used by a notebook
+* More on kernels later
+
We run a Jupyterhub server that is available.
+* Fast and easy access
+* Use regular Eagle credentials
+* Great for light to moderate processing/debugging/testing
+
* Limited resource: 8 cores/128GB RAM per user beefore automatic throttling
+* Compete with other users for CPU/RAM on a single machine
+* No custom environments (for now)
+
- Visit Europa at (https://europa.hpc.nrel.gov/)
+
+- Log in using your HPC credentials
+
+- Opens a standard "notebooks" interface
+
+- Change url end /tree to /lab for Lab interface
+
* Custom environments
+* 36 cores and up to ~750GB RAM
+* No competing with other users for cores
+
* Compete with other users for nodes
+* Costs AU
+
Yes... please see our advanced Jupyter documentation.
+ssh eagle.hpc.nrel.gov
[user@el1:]$ srun -A <account> -t 02:00:00 --pty /bin/bash
[user@r2i7n35]$ module load conda
source activate myjupenv
jupyter-notebook --no-browser --ip=$(hostname -s)
note the node name (r2i7n35 in this example)
+and the url, e.g. http://127.0.0.1:8888/?token=<alphabet soup>
[user@laptop]$ ssh -N -L 8888:<nodename>:8888 username@eagle.hpc.nrel.gov
copy full url from jupyter startup into your web browser. e.g.:
+http://127.0.0.1:8888/?token=<alphabet soup>
Automation makes life better!
+Full directions included in the Jupyter repo.
+Download sbatch_jupyter.sh and auto_launch_jupyter.sh
+Edit sbatch_jupyter.sh to change:
+--account=*yourallocation*
--time=*timelimit*
Run auto_launch_jupyter.sh and follow directions
+That's it!
+Yes, you can run jupyter directly on a login node.
+Should you run jupyter directly on a login node?
+* Heavy lifting should be done via Europa or compute nodes
+* Using a highly shared resource (login nodes)
+ * Competition for cycles
+ * arbiter2 will throttle moderate to heavy usage
+
conda create -n myjupyter -c conda-forge jupyter ipykernel
source activate myjupyter
conda install -c conda-forge scipy numpy matplotlib
python -m ipykernel install --user --name=myjupyter
Restart your jupyter server
+New kernel will appear in drop-down as an option
+jupyter kernelspec list
jupyter kernelspec remove myoldjupyter
Magic commands are "meta commands" that add extra functionality.
+Magic commands begin with % or %%.
+* %lsmagic - list all magic commands
+* %run _file.py_ - run an external python script
+* %%time - placed at top of cell, prints execution time
+* %who - list all defined variables in notebook
+
%lsmagic
+
Available line magics:
+%alias %alias_magic %autoawait %autocall %automagic %autosave %bookmark %cat %cd %clear %colors %conda %config %connect_info %cp %debug %dhist %dirs %doctest_mode %ed %edit %env %gui %hist %history %killbgscripts %ldir %less %lf %lk %ll %load %load_ext %loadpy %logoff %logon %logstart %logstate %logstop %ls %lsmagic %lx %macro %magic %man %matplotlib %mkdir %more %mv %notebook %page %pastebin %pdb %pdef %pdoc %pfile %pinfo %pinfo2 %pip %popd %pprint %precision %prun %psearch %psource %pushd %pwd %pycat %pylab %qtconsole %quickref %recall %rehashx %reload_ext %rep %rerun %reset %reset_selective %rm %rmdir %run %save %sc %set_env %store %sx %system %tb %time %timeit %unalias %unload_ext %who %who_ls %whos %xdel %xmode
+
+Available cell magics:
+%%! %%HTML %%SVG %%bash %%capture %%debug %%file %%html %%javascript %%js %%latex %%markdown %%perl %%prun %%pypy %%python %%python2 %%python3 %%ruby %%script %%sh %%svg %%sx %%system %%time %%timeit %%writefile
+
+Automagic is ON, % prefix IS NOT needed for line magics.
+
You can also run shell commands inside a cell. For example:
+!conda list
- see the packages installed in the environment you're using
!pwd
+!ls
+
/home/tthatche/jup
+auto_launch_jupyter.sh Jupyter Presentation.ipynb slurm-6445885.out
+geojsondemo.ipynb old sshot1.png
+Interesting Graphs.ipynb sbatch_jupyter.sh sshot2.png
+jup-logo.png slurm
+
Prior to using Jupyterhub, you will have had to have logged into Eagle via the command line at least once.
+Given that, to start using Jupyterhub on Eagle, go to Europa in your local machine's browser, and log in with your Eagle username and password. +You should land in your home directory, and see everything there via the standard Jupyter file listing.
+From the "New" pulldown on the right hand side, you can start a notebook, open a terminal, or create a file or folder. +The default installation is Python version 3, and a variety of Conda modules are installed already. You can start a +Python3 notebook right away, and access the Python modules that are already present. To see what's installed, from a notebook +you can use the following command:
+!conda list
+
Alternatively, you can start a Terminal, and use the usual conda commands from the shell.
+Start a Terminal session, and follow the instructions on the HPC website +to create an environment. Now, to make this environment visible from your future notebooks, run the following command:
+source activate <myenv>
+python -m ipykernel install --user --name <myenv> --display-name "How-you-want-your-custom-kernel-to-appear-in-the-notebook-pulldown (<myenv>)"
+
where <myenv>
is the argument to -n
you used in your conda create
command.
After running this command, when you open a new notebook, you should see as an option your new environment, and once loaded +be able to access all Python modules therein.
+To use inside Eagle, the Jupyterhub server exists on the internal network @ https://europa-int/.
+JupyterHub provides the ability to use custom kernels including ones for other popular programming languages such as Julia and R. NREL's custom kernels documentation provides more information on how to setup JupyterHub with other languages.
+ + + + + + + + +This document is derived from an HTML page written at the San Diego Supercomper Center many years ago. Its purpose is to Introduce Fortran 90 concepts to Fortran 77 programers. It does this by presenting an example program and introducing concepts as various routines of the program are presented. The original web page has been used over the years and has been translated into several languages.
+Note: See our Fortran Overview page for basic getting started instructions and compiler/toolchain information.
+Famous Quote: "I don't know what the technical characteristics of + the standard language for scientific and engineering + computation in the year 2000 will be... but I know it + will be called Fortran." John Backus.
+Note: He claimed that he never said this.
+Start of real Fortran 90 discussion
+ function ran1(idum)
+ real ran1
+ integer idum
+ real r(97)
+ parameter ( m1=259200,ia1=7141,ic1=54773)
+ parameter ( m2=134456,ia2=8121,ic2=28411)
+ parameter ( m3=243000,ia3=4561,ic3=51349)
+ integer j
+ integer iff,ix1,ix2,ix3
+ data iff /0/
+ if (idum.lt.0.or.iff.eq.0)then
+ rm1=1.0/m1
+ rm2=1.0/m2
+ iff=1
+ ix1=mod(ic1-idum,m1)
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix2=mod(ix1,m2)
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix3=mod(ix1,m3)
+ do 11 j=1,97
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix2=mod(ia2*ix2+ic2,m2)
+ r(j)=(real(ix1)+real(ix2)*rm2)*rm1
+ 11 continue
+ idum=1
+ endif
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix2=mod(ia2*ix2+ic2,m2)
+ ix3=mod(ia3*ix3+ic3,m3)
+ j=1+(97*ix3)/m3
+ if(j.gt.97.or.j.lt.1)then
+ write(*,*)' error in ran1 j=',j
+ stop
+ endif
+ ran1=r(j)
+ r(j)=(real(ix1)+real(ix2)*rm2)*rm1
+ return
+ end
+
module ran_mod
+contains
+ function ran1(idum)
+ use numz
+ implicit none !note after use statement
+ real (b8) ran1
+ integer , intent(inout), optional :: idum
+ real (b8) r(97),rm1,rm2
+ integer , parameter :: m1=259200,ia1=7141,ic1=54773
+ integer , parameter :: m2=134456,ia2=8121,ic2=28411
+ integer , parameter :: m3=243000,ia3=4561,ic3=51349
+ integer j
+ integer iff,ix1,ix2,ix3
+ data iff /0/
+ save ! corrects a bug in the original routine
+ if(present(idum))then
+ if (idum.lt.0.or.iff.eq.0)then
+ rm1=1.0_b8 m1
+ rm2=1.0_b8 m2
+ iff=1
+ ix1=mod(ic1-idum,m1)
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix2=mod(ix1,m2)
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix3=mod(ix1,m3)
+ do j=1,97
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix2=mod(ia2*ix2+ic2,m2)
+ r(j)=(real(ix1,b8)+real(ix2,b8)*rm2)*rm1
+ enddo
+ idum=1
+ endif
+ endif
+ ix1=mod(ia1*ix1+ic1,m1)
+ ix2=mod(ia2*ix2+ic2,m2)
+ ix3=mod(ia3*ix3+ic3,m3)
+ j=1+(97*ix3)/m3
+ if(j.gt.97.or.j.lt.1)then
+ write(*,*)' error in ran1 j=',j
+ stop
+ endif
+ ran1=r(j)
+ r(j)=(real(ix1,b8)+real(ix2,b8)*rm2)*rm1
+ return
+ end function ran1
+
The following are available in Fortran 90. On the other hand, the concept of "obsolescence" is introduced. This means that some constructs may be removed in the future.
+program darwin
+ real a(10), b(10), c(10), d(10), e(10), x, y
+ integer odd(5),even(5)
+! this line is continued by using "&"
+ write(*,*)"starting ",&
+ "darwin" ! this line in a continued from above
+! multiple statement per line --rarely a good idea
+ x=1; y=2; write(*,*)x,y
+ do i=1,10 ! statement lable is not required for do
+ e(i)=i
+ enddo
+ odd= (/ 1,3,5,7,9 /) ! array assignment
+ even=(/ 2,4,6,8,10 /) ! array assignment
+ a=1 ! array assignment, every element of a = 1
+ b=2
+ c=a+b+e ! element by element assignment
+ c(odd)=c(even)-1 ! can use arrays of indices on both sides
+ d=sin(c) ! element by element application of intrinsics
+ write(*,*)d
+ write(*,*)abs(d) ! many intrinsic functions are generic
+ a_do_loop : do i=1,10
+ write(*,*)i,c(i),d(i)
+ enddo a_do_loop
+ do
+ if(c(10) .lt. 0.0 ) exit
+ c(10)=c(10)-1
+ enddo
+ write(*,*)c(10)
+ do while (c(9) .gt. 0)
+ c(9)=c(9)-1
+ enddo
+ write(*,*)c(9)
+end program
+
Motivation
+One variable can have several attributes
+integer,parameter :: in2 = 14
+ real, parameter :: pi = 3.141592653589793239
+ real, save, dimension(10) :: cpu_times,wall_times
+!**** the old way of doing the same ****!
+!**** real cpu_times(10),wall_times(10) ****!
+!**** save cpu_times, wall_times ****!
+
Machine | +Real | +Double Precision | +
---|---|---|
IBM (SP) | +6 | +15 | +
Cray (T90) | +15 | +33 | +
Cray (T3E) | +15 | +15 | +
program darwin
+! e has at least 4 significant digits
+ real(selected_real_kind(4))e
+! b8 will be used to define reals with 14 digits
+ integer, parameter:: b8 = selected_real_kind(14)
+ real(b8), parameter :: pi = 3.141592653589793239_b8 ! note usage of _b8
+! with a constant
+! to force precision
+ e= 2.71828182845904523536
+ write(*,*)"starting ",& ! this line is continued by using "&"
+ "darwin" ! this line in a continued from above
+ write(*,*)"pi has ",precision(pi)," digits precision ",pi
+ write(*,*)"e has ",precision(e)," digits precision ",e
+end program
+
sp001 % darwin
+ starting darwin
+ pi has 15 digits precision 3.14159265358979312
+ e has 6 digits precision 2.718281746
+sp001 %
+
Motivation:
+Modules can contain:
+You "include" a module with a "use" statement
+module numz
+ integer,parameter:: b8 = selected_real_kind(14)
+ real(b8),parameter :: pi = 3.141592653589793239_b8
+ integergene_size
+end module
+ program darwin
+ use numz
+ implicit none ! now part of the standard, put it after the use statements
+ write(*,*)"pi has ",precision(pi),"
+digits precision ",pi
+ call set_size()
+ write(*,*)"gene_size=",gene_size
+ end program
+subroutine set_size
+ use numz
+ gene_size=10
+end subroutine
+
pi has 15 digits precision 3.14159265358979312
+ gene_size=10
+
Motivation:
+Example is a random number package: +
module ran_mod
+! module contains three functions
+! ran1 returns a uniform random number between 0-1
+! spread returns random number between min - max
+! normal returns a normal distribution
+contains
+ function ran1() !returns random number between 0 - 1
+ use numz
+ implicit none
+ real(b8) ran1,x
+ call random_number(x) ! built in fortran 90 random number function
+ ran1=x
+ end function ran1
+ function spread(min,max) !returns random # between min/max
+ use numz
+ implicit none
+ real(b8) spread
+ real(b8) min,max
+ spread=(max - min) * ran1() + min
+ end function spread
+ function normal(mean,sigma) !returns a normal distribution
+ use numz
+ implicit none
+ real(b8) normal,tmp
+ real(b8) mean,sigma
+ integer flag
+ real(b8) fac,gsave,rsq,r1,r2
+ save flag,gsave
+ data flag /0/
+ if (flag.eq.0) then
+ rsq=2.0_b8
+ do while(rsq.ge.1.0_b8.or.rsq.eq.0.0_b8) ! new from for do
+ r1=2.0_b8*ran1()-1.0_b8
+ r2=2.0_b8*ran1()-1.0_b8
+ rsq=r1*r1+r2*r2
+ enddo
+ fac=sqrt(-2.0_b8*log(rsq)/rsq)
+ gsave=r1*fac
+ tmp=r2*fac
+ flag=1
+ else
+ tmp=gsave
+ flag=0
+ endif
+ normal=tmp*sigma+mean
+ return
+ end function normal end module ran_mod
+
Exersize 1: Write a program that returns 10 uniform random numbers.
+Motivation:
+Allocatable arrays allow us to set the size at run time
+module numz
+ integer, parameter:: b8 = selected_real_kind(14)
+ integer gene_size,num_genes
+ integer,allocatable :: a_gene(:),many_genes(:,:)
+end module
+program darwin
+ use numz
+ implicit none
+ integer ierr
+ call set_size()
+ allocate(a_gene(gene_size),stat=ierr) !stat= allows for an error code return
+ if(ierr /= 0)write(*,*)"allocation error" ! /= is .ne.
+ allocate(many_genes(gene_size,num_genes),stat=ierr) !2d array
+ if(ierr /= 0)write(*,*)"allocation error"
+ write(*,*)lbound(a_gene),ubound(a_gene) ! get lower and upper bound
+ ! for the array
+ write(*,*)size(many_genes),size(many_genes,1) !get total size and size
+ !along 1st dimension
+ deallocate(many_genes) ! free the space for the array and matrix
+ deallocate(a_gene)
+ allocate(a_gene(0:gene_size)) ! now allocate starting at 0 instead of 1
+ write(*,*)allocated(many_genes),allocated(a_gene) ! shows if allocated
+ write(*,*)lbound(a_gene),ubound(a_gene)
+end program
+ subroutine set_size
+ use numz
+ write(*,*)'enter gene size:'
+ read(*,*)gene_size
+ write(*,*)'enter number of genes:'
+ read(*,*)num_genes
+end subroutine set_size
+
enter gene size:
+10
+ enter number of genes:
+20
+ 1 10
+ 200 10
+ F T
+ 0 10
+
subroutine arrays(an_explicit_shape_array,&
+ i ,& !note we pass all bounds except the last
+ an_assumed_size_array ,&
+ an_assumed_shape_array)
+! Explicit shape
+ integer, dimension(8,8)::an_explicit_shape_array
+! Assumed size
+ integer, dimension(i,*)::an_assumed_size_array
+! Assumed Shape
+ integer, dimension(:,:)::an_assumed_shape_array
+ write(*,*)sum(an_explicit_shape_array)
+ write(*,*)lbound(an_assumed_size_array) ! why does sum not work here?
+ write(*,*)sum(an_assumed_shape_array)
+end subroutine
+
module numz
+ integer, parameter:: b8 = selected_real_kind(14)
+ integer,allocatable :: a_gene(:),many_genes(:,:)
+end module module face
+ interface fitness
+ function fitness(vector)
+ use numz
+ implicit none
+ real(b8) fitness
+ integer, dimension(:) :: vector
+ end function fitness
+ end interface
+end module program darwin
+ use numz
+ use face
+ implicit none
+ integer i
+ integer vect(10) ! just a regular array
+ allocate(a_gene(10));allocate(many_genes(3,10))
+ a_gene=1 !sets every element of a_gene to 1
+ write(*,*)fitness(a_gene)
+ vect=8
+ write(*,*)fitness(vect) ! also works with regular arrays
+ many_genes=3 !sets every element to 3
+ many_genes(1,:)=a_gene !sets column 1 to a_gene
+ many_genes(2,:)=2*many_genes(1,:)
+ do i=1,3
+ write(*,*)fitness(many_genes(i,:))
+ enddo
+ write(*,*)fitness(many_genes(:,1)) !go along other dimension
+!!!!write(*,*)fitness(many_genes)!!!!does not work
+end program
+function fitness(vector)
+ use numz
+ implicit none
+ real(b8) fitness
+ integer, dimension(:):: vector ! must match interface
+ fitness=sum(vector)
+end function
+
Exersize 2: Run this program using the "does not work line". +Why? Using intrinsic functions make it work?
+Exersize 3: Prove that f90 does not "pass by address".
+integer :: my_seed
+
integer, optional :: my_seed
+
Used like this:
+! ran1 returns a uniform random number between 0-1
+! the seed is optional and used to reset the generator
+contains
+ function ran1(my_seed)
+ use numz
+ implicit none
+ real(b8) ran1,r
+ integer, optional ,intent(in) :: my_seed ! optional argument not changed in the routine
+ integer,allocatable :: seed(:)
+ integer the_size,j
+ if(present(my_seed))then ! use the seed if present
+ call random_seed(size=the_size) ! how big is the intrisic seed?
+ allocate(seed(the_size)) ! allocate space for seed
+ do j=1,the_size ! create the seed
+ seed(j)=abs(my_seed)+(j-1) ! abs is generic
+ enddo
+ call random_seed(put=seed) ! assign the seed
+ deallocate(seed) ! deallocate space
+ endif
+ call random_number(r)
+ ran1=r
+ end function ran1
+end module program darwin
+ use numz
+ use ran_mod ! interface required if we have
+ ! optional or intent arguments
+ real(b8) x,y
+ x=ran1(my_seed=12345) ! we can specify the name of the argument
+ y=ran1()
+ write(*,*)x,y
+ x=ran1(12345) ! with only one optional argument we don't need to
+ y=ran1()
+ write(*,*)x,y
+end program
+
Motivation:
+Example
+module galapagos
+ use numz
+ type thefit !the name of the type
+ sequence ! sequence forces the data elements
+ ! to be next to each other in memory
+ ! where might this be useful?
+ real(b8) val ! our result from the fitness function
+ integer index ! the index into our collection of genes
+ end type thefit
+end module
+
program darwin
+ use numz
+ use galapagos ! the module that contains the type definition
+ use face ! contains various interfaces
+ implicit none
+! define an allocatable array of the data type
+! than contains an index and a real value
+ type (thefit),allocatable ,target :: results(:)
+! create a single instance of the data type
+ type (thefit) best
+ integer,allocatable :: genes(:,:) ! our genes for the genetic algorithm
+ integer j
+ integer num_genes,gene_size
+ num_genes=10
+ gene_size=10
+ allocate(results(num_genes)) ! allocate the data type
+ ! to hold fitness and index
+ allocate(genes(num_genes,gene_size)) ! allocate our collection of genes
+ call init_genes(genes) ! starting data
+ write(*,'("input")' ) ! we can put format in write statement
+ do j=1,num_genes
+ results(j)%index =j
+ results(j)%val =fitness(genes(j,:)) ! just a dummy routine for now
+ write(*,"(f10.8,i4)")results(j)%val,results(j)%index
+ enddo
+end program
+
Motivation
+Example:
+Two step process to define operators
+module sort_mod
+!defining the interfaces
+ interface operator (.lt.) ! overloads standard .lt.
+ module procedure theless ! the function that does it
+ end interface interface operator (.gt.) ! overloads standard .gt.
+ module procedure thegreat ! the function that does it
+ end interface interface operator (.ge.) ! overloads standard .ge.
+ module procedure thetest ! the function that does it
+ end interface interface operator (.converged.) ! new operator
+ module procedure index_test ! the function that does it
+ end interface
+ contains ! our module will contain
+ ! the required functions
+ function theless(a,b) ! overloads .lt. for the type (thefit)
+ use galapagos
+ implicit none
+ type(thefit), intent (in) :: a,b
+ logical theless ! what we return
+ if(a%val .lt. b%val)then ! this is where we do the test
+ theless=.true.
+ else
+ theless=.false.
+ endif
+ return
+ end function theless function thegreat(a,b) ! overloads .gt. for the type (thefit)
+ use galapagos
+ implicit none
+ type(thefit), intent (in) :: a,b
+ logical thegreat
+ if(a%val .gt. b%val)then
+ thegreat=.true.
+ else
+ thegreat=.false.
+ endif
+ return
+ end function thegreat
+ function thetest(a,b) ! overloads .gt.= for the type (thefit)
+ use galapagos
+ implicit none
+ type(thefit), intent (in) :: a,b
+ logical thetest
+ if(a%val >= b%val)then
+ thetest=.true.
+ else
+ thetest=.false.
+ endif
+ return
+end function thetest
+ function index_test(a,b) ! defines a new operation for the type (thefit)
+ use galapagos
+ implicit none
+ type(thefit), intent (in) :: a,b
+ logical index_test
+ if(a%index .gt. b%index)then ! check the index value for a difference
+ index_test=.true.
+ else
+ index_test=.false.
+ endif
+ return
+end function index_test
+
Notes
+Motivation
+ function findmin(array)
+ is size of array 1?
+ min in the array is first element
+ else
+ find minimum in left half of array using findmin function
+ find minimum in right half of array using findmin function
+ global minimum is min of left and right half
+ end function
+
recursive function realmin(ain) result (themin)
+! recursive and result are required for recursive functions
+ use numz
+ implicit none
+ real(b8) themin,t1,t2
+ integer n,right
+ real(b8) ,dimension(:) :: ain
+ n=size(ain)
+ if(n == 1)then
+ themin=ain(1) ! if the size is 1 return value
+ return
+ else
+ right=n/2
+ t1=realmin(ain(1:right)) ! find min in left half
+ t2=realmin(ain(right+1:n)) ! find min in right half
+ themin=min(t1,t2) ! find min of the two sides
+ endif
+end function
+
!this routine works with the data structure thefit not reals
+recursive function typemin(ain) result (themin)
+ use numz
+ use sort_mod
+ use galapagos
+ implicit none
+ real(b8) themin,t1,t2
+ integer n,right
+ type (thefit) ,dimension(:) :: ain ! this line is different
+ n=size(ain)
+ if(n == 1)then
+ themin=ain(1)%val ! this line is different
+ return
+ else
+ right=n/2
+ t1=typemin(ain(1:right))
+ t2=typemin(ain(right+1:n))
+ themin=min(t1,t2)
+ endif
+end function
+
Motivation
+Notes
+First pointer example
+recursive function pntmin(ain) result (themin) ! return a pointer
+ use numz
+ use galapagos
+ use sort_mod ! contains the .lt. operator for thefit type
+ implicit none
+ type (thefit),pointer:: themin,t1,t2
+ integer n,right
+ type (thefit) ,dimension(:),target :: ain
+ n=size(ain)
+ if(n == 1)then
+ themin=>ain(1) !this is how we do pointer assignment
+ return
+ else
+ right=n/2
+ t1=>pntmin(ain(1:right))
+ t2=>pntmin(ain(right+1:n))
+ if(t1 .lt. t2)then; themin=>t1; else; themin=>t2; endif
+ endif
+end function
+
Exercise 4: Carefully write a recursive N! program.
+Motivation
+Notes:
+ recursive function realmin(ain) result (themin)
+ real(b8) ,dimension(:) :: ain recursive function typemin(ain) result (themin)
+ type (thefit) ,dimension(:) :: ain
+
! note we have two functions within the same interface
+! this is how we indicate function overloading
+! both functions are called "findmin" in the main program
+interface findmin
+! the first is called with an array of reals as input
+ recursive function realmin(ain) result (themin)
+ use numz
+ real(b8) themin
+ real(b8) ,dimension(:) :: ain
+ end function ! the second is called with a array of data structures as input
+ recursive function typemin(ain) result (themin)
+ use numz
+ use galapagos
+ real(b8) themin
+ type (thefit) ,dimension(:) :: ain
+ end function
+ end interface
+
program darwin
+ use numz
+ use ran_mod
+ use galapagos ! the module that contains the type definition
+ use face ! contains various interfaces
+ use sort_mod ! more about this later it
+ ! contains our sorting routine
+ ! and a few other tricks
+ implicit none
+! create an allocatable array of the data type
+! than contains an index and a real value
+ type (thefit),allocatable ,target :: results(:)
+! create a single instance of the data type
+ type (thefit) best
+! pointers to our type
+ type (thefit) ,pointer :: worst,tmp
+ integer,allocatable :: genes(:,:) ! our genes for the ga
+ integer j
+ integer num_genes,gene_size
+ real(b8) x
+ real(b8),allocatable :: z(:)
+ real(b8),pointer :: xyz(:) ! we'll talk about this next
+ num_genes=10
+ gene_size=10
+ allocate(results(num_genes)) ! allocate the data type to
+ allocate(genes(num_genes,gene_size)) ! hold our collection of genes
+ call init_genes(genes) ! starting data
+ write(*,'("input")')
+ do j=1,num_genes
+ results(j)%index=j
+ results(j)%val=fitness(genes(j,:)) ! just a dummy routine
+ write(*,"(f10.8,i4)")results(j)%val,results(j)%index
+ enddo allocate(z(size(results)))
+ z=results(:)%val ! copy our results to a real array ! use a recursive subroutine operating on the real array
+ write(*,*)"the lowest fitness: ",findmin(z)
+! use a recursive subroutine operating on the data structure
+ write(*,*)"the lowest fitness: ",findmin(results)
+end program
+
! we show two other methods of getting the minimum fitness
+! use the built in f90 routines on a real array
+ write(*,*)"the lowest fitness: ",minval(z),minloc(z)
+
! use a recursive subroutine operating on the data
+! structure and returning a pointer to the result
+ worst=>pntmin(results) ! note pointer assignment
+! what will this line write?
+ write(*,*)"the lowest fitness: ",worst
+
Motivation
+Usage
+! This code will print "true" when we find a match,
+! that is the pointers point to the same object
+ do j=1,num_genes
+ tmp=>results(j)
+ write(*,"(f10.8,i4,l3)")results(j)%val, &
+ results(j)%index, &
+ associated(tmp,worst)
+ enddo
+ nullify(tmp)
+
module Merge_mod_types
+ use galapagos
+ type(thefit),allocatable :: work(:) ! a "global" work array
+ type(thefit), pointer:: a_pntr(:) ! this will be the pointer to our input array
+end module Merge_mod_types
+ subroutine Sort(ain, n)
+ use Merge_mod_types
+ implicit none
+ integer n
+ type(thefit), target:: ain(n)
+ allocate(work(n))
+ nullify(a_pntr)
+ a_pntr=>ain ! we assign the pointer to our array
+ ! in RecMergeSort we reference it just like an array
+ call RecMergeSort(1,n) ! very similar to the findmin functions
+ deallocate(work)
+ return
+end subroutine Sort
+
! our sort routine is also recursive but
+! also shows a new usage for pointers
+ call sort(results,num_genes)
+ do j=1,num_genes
+ write(*,"(f10.8,i4)")results(j)%val, &
+ results(j)%index
+ enddo
+
! we can copy a whole structure
+! with a single assignment
+ best=results(1)
+ write(*,*)"best result ",best
+
! using the user defined operator to see if best is worst
+! recall that the operator .converged. checks to see if %index matches
+ worst=>pntmin(results)
+ write(*,*)"worst result ",worst
+ write(*,*)"converged=",(best .converged. worst)
+
Motivation
+Default lower bound within a subroutine is 1
+May want to use a different lower bound
+ if(allocated(z))deallocate(z)
+ allocate(z(-10:10)) ! a 21 element array
+ do j=-10,10
+ z(j)=j
+ enddo ! pass z and its lower bound
+! in this routine we give the array a specific lower
+! bound and show how to use a pointer to reference
+! different parts of an array using different indices
+ call boink1(z,lbound(z,1)) ! why not just lbound(z) instead of lbound(z,1)?
+ ! lbound(z) returns a rank 1 array
+ subroutine boink1(a,n)
+ use numz
+ implicit none
+ integer,intent(in) :: n
+ real(b8),dimension(n:):: a ! this is how we set lower bounds in a subroutine
+ write(*,*)lbound(a),ubound(a)
+ end subroutine
+
call boink2(z,lbound(z,1))
+
+subroutine boink2(a,n)
+use numz
+implicit none
+integer,intent(in) :: n
+real(b8),dimension(n:),target:: a
+real(b8),dimension(:),pointer::b
+b=>a(n:) ! b(1) "points" to a(-10)
+write(*,*)"a(-10) =",a(-10),"b(1) =",b(1)
+b=>a(0:) ! b(1) "points" to a(0)
+write(*,*)"a(-6) =",a(-6),"b(-5) =",b(-5)
+end subroutine
+
module numz
+ integer, parameter:: b8 = selected_real_kind(14)
+end module
+program bla
+ use numz
+ real(b8), dimension(:) ,pointer :: xyz
+ interface boink
+ subroutine boink(a)
+ use numz
+ implicit none
+ real(b8), dimension(:), pointer :: a
+ end subroutine
+ end interface
+ nullify(xyz) ! nullify sets a pointer to null
+ write(*,'(l5)')associated(xyz) ! is a pointer null, should be
+ call boink(xyz)
+ write(*,'(l5)',advance="no")associated(xyz)
+ if(associated(xyz))write(*,'(i5)')size(xyz)
+end program
+subroutine boink(a)
+ use numz
+ implicit none
+ real(b8),dimension(:),pointer:: a
+ if(associated(a))deallocate(a)
+ allocate(a(10))
+end subroutine
+
F
+ T
+10
+
Given a fixed number of colors, M, and a description of a map of a collection +of N states.
+Find a coloring of the map such that no two states that share a boarder +have the same coloring.
+22
+ar ok tx la mo xx
+az ca nm ut nv xx
+ca az nv or xx
+co nm ut wy ne ks xx
+ia mo ne sd mn xx
+id wa or nv ut wy mt xx
+ks ne co ok mo xx
+la tx ar xx
+mn ia sd nd xx
+mo ar ok ks ne ia xx
+mt wy id nd xx
+nd mt sd wy xx
+ne sd wy co ks mo ia xx
+nm az co ok tx mn xx
+nv ca or id ut az xx
+ok ks nm tx ar mo xx
+or ca wa id xx
+sd nd wy ne ia mn xx
+tx ok nm la ar xx
+ut nv az co wy id xx
+wa id or mt xx
+wy co mt id ut nd sd ne xx
+
Our fitness function takes a potential coloring, that is, an integer +vector of length N and a returns the number of boarders that have states +of the same coloring
+Motivation
+List of adjoining states will be different lengths for different states
+Solution + - Linked list are a good way to handle such situations
+module list_stuff
+type llist
+integer index ! data
+type(llist),pointer::next ! pointer to the
+! next element
+end type llist
+end module
+
One way to fill a linked list is to use a recursive function
+`fortran
+recursive subroutine insert (item, root)
+use list_stuff
+implicit none
+type(llist), pointer :: root
+integer item
+if (.not. associated(root)) then
+allocate(root)
+nullify(root%next)
+root%index = item
+else
+call insert(item,root%next)
+endif
+end subroutine
+
- - -
+- - -
+
+## Our map representation
+- An array of the derived data type states
+ - State is name of a state
+ - Linked list containing boarders
+
+```fortran
+ type states
+ character(len=2)name
+ type(llist),pointer:: list
+ end type states
+
Motivation
+May want to know the date and time of your program
+Two functions
+! all arguments are optional
+call date_and_time(date=c_date, & ! character(len=8) ccyymmdd
+ time=c_time, & ! character(len=10) hhmmss.sss
+ zone=c_zone, & ! character(len=10) +/-hhmm (time zone)
+ values=ivalues) ! integer ivalues(8) all of the above
+ call system_clock(count=ic, & ! count of system clock (clicks)
+ count_rate=icr, & ! clicks / second
+ count_max=max_c) ! max value for count
+
Motivation
+We read the states using the two character identification
+One line per state and do not know how many boarder states per line
+Note: Our list of states is presorted +
character(len=2) a ! we have a character variable of length 2
+read(12,*)nstates ! read the number of states
+allocate(map(nstates)) ! and allocate our map
+do i=1,nstates
+ read(12,"(a2)",advance="no")map(i)%name ! read the name
+ !write(*,*)"state:",map(i)%name
+ nullify(map(i)%list) ! "zero out" our list
+ do
+ read(12,"(1x,a2)",advance="no")a ! read list of states
+ ! without going to the
+ ! next line
+ if(lge(a,"xx") .and. lle(a,"xx"))then ! if state == xx
+ backspace(12) ! go to the next line
+ read(12,"(1x,a2)",end=1)a ! go to the next line
+ exit
+ endif
+ 1 continue
+ if(llt(a,map(i)%name))then ! we only add a state to
+ ! our list if its name
+ ! is before ours thus we
+ ! only count boarders 1 time
+ ! what we want put into our linked list is an index
+ ! into our map where we find the bordering state
+ ! thus we do the search here
+ ! any ideas on a better way of doing this search?
+ found=-1
+ do j=1,i-1
+ if(lge(a,map(j)%name) .and. lle(a,map(j)%name))then
+ !write(*,*)a
+ found=j
+ exit
+ endif
+ enddo
+ if(found == -1)then
+ write(*,*)"error"
+ stop
+ endif
+ ! found the index of the boarding state insert it into our list
+ ! note we do the insert into the linked list for a particular state
+ call insert(found,map(i)%list)
+ endif
+ enddo
+enddo
+
Motivation
+May need to create strings on the fly
+May need to convert from strings to reals and integers
+Similar to sprintf and sscanf
+How it works
+Create a string
+Do a normal write except write to the string instead of file number
+Example 1: creating a date and time stamped file name
+character (len=12)tmpstr
+
+write(tmpstr,"(a12)")(c_date(5:8)//c_time(1:4)//".dat") ! // does string concatination
+write(*,*)"name of file= ",tmpstr
+open(14,file=tmpstr)
+name of file= 03271114.dat
+
! test_vect is an array that we do not know its length until run time
+nstate=9 ! the size of the array
+write(fstr,'("(",i4,"i1,1x,f10.5)")')nstates
+write(*,*)"format= ",fstr
+write(*,fstr)test_vect,fstr
+format= ( 9i1,1x,f10.5)
+
integer ht,minut,sec
+read(c_time,"(3i2)")hr,minut,sec
+
Inquire statement has two forms
+Example: find the size of your real relative to the "standard" real
+inquire(iolength=len_real)1.0
+inquire(iolength=len_b8)1.0_b8
+write(*,*)"len_b8 ",len_b8
+write(*,*)"len_real",len_real
+iratio=len_b8/len_real
+select case (iratio)
+ case (1)
+ my_mpi_type=mpi_real
+ case(2)
+ my_mpi_type=mpi_double_precision
+ case default
+ write(*,*)"type undefined"
+ my_mpi_type=0
+end select
+
len_b8 2
+len_real 1
+
Notes:
+Example: +
integer ncolor
+logical force
+namelist /the_input/ncolor,force
+ncolor=4
+force=.true.
+read(13,the_input)
+write(*,the_input)
+
& THE_INPUT NCOLOR=4,FORCE = F /
+
&THE_INPUT
+NCOLOR = 4,
+FORCE = F
+/
+
Notes
+Example:
+ function add1(vector,max) result (rtn)
+ integer, dimension(:),intent(in) :: vector
+ integer,dimension(size(vector)) :: rtn
+ integer max
+ integer len
+ logical carry
+ len=size(vector)
+ rtn=vector
+ i=0
+ carry=.true.
+ do while(carry) ! just continue until we do not do a carry
+ i=i+1
+ rtn(i)=rtn(i)+1
+ if(rtn(i) .gt. max)then
+ if(i == len)then ! role over set everything back to 0
+ rtn=0
+ else
+ rtn(i)=0
+ endif
+ else
+ carry=.false.
+ endif
+ enddo
+end function
+
test_vect=0
+ do
+ test_vect=add1(test_vect,3)
+ result=fitness(test_vect)
+ if(result .lt. 1.0_b8)then
+ write(*,*)test_vect
+ stop
+ endif
+ enddo
+
Exersize 5 Modify the program to use the random +number generator given earlier.
+UNPACK Unpack an array of rank one into an array under a mask
+Examples
+program matrix
+ real w(10),x(10),mat(10,10)
+ call random_number(w)
+ call random_number(mat)
+ x=matmul(w,mat) ! regular matrix multiply USE IT
+ write(*,'("dot(x,x)=",f10.5)'),dot_product(x,x)
+end program
+program allit
+ character(len=10):: f1="(3l1)"
+ character(len=10):: f2="(3i2)"
+ integer b(2,3),c(2,3),one_d(6)
+ logical l(2,3)
+ one_d=(/ 1,3,5 , 2,4,6 /)
+ b=transpose(reshape((/ 1,3,5 , 2,4,6 /),shape=(/3,2/)))
+ C=transpose(reshape((/ 0,3,5 , 7,4,8 /),shape=(/3,2/)))
+ l=(b.ne.c)
+ write(*,f2)((b(i,j),j=1,3),i=1,2)
+ write(*,*)
+ write(*,f2)((c(i,j),j=1,3),i=1,2)
+ write(*,*)
+ write(*,f1)((l(i,j),j=1,3),i=1,2)
+ write(*,*)
+ write(*,f1)all ( b .ne. C ) !is .false.
+ write(*,f1)all ( b .ne. C, DIM=1) !is [.true., .false., .false.]
+ write(*,f1)all ( b .ne. C, DIM=2) !is [.false., .false.]
+end
+
1 3 5
+ 2 4 6
+ 0 3 5
+ 7 4 8
+ TFF
+ TFT
+ F
+ TFF
+ FF
+
.CUF
+-O0, -O1, -O2, -O3, -O4
+-g
+Fortran 90 has features to:
+Fortran 90 has new language elements
+Examples
+ Brought to you by ANSI committee X3J3 and ISO-IEC/JTC1/SC22/WG5 (Fortran)
+ This is neither complete nor precisely accurate, but hopefully, after
+ a small investment of time it is easy to read and very useful.
+
+ This is the free form version of Fortran, no statement numbers,
+ no C in column 1, start in column 1 (not column 7),
+ typically indent 2, 3, or 4 spaces per each structure.
+ The typical extension is .f90 .
+
+ Continue a statement on the next line by ending the previous line with
+ an ampersand & . Start the continuation with & for strings.
+
+ The rest of any line is a comment starting with an exclamation mark ! .
+
+ Put more than one statement per line by separating statements with a
+ semicolon ; . Null statements are OK, so lines can end with semicolons.
+
+ Separate words with space or any form of "white space" or punctuation.
+
<xxx> means fill in something appropriate for xxx and do not type
+ the "<" or ">" .
+
+ ... ellipsis means the usual, fill in something, one or more lines
+
+ [stuff] means supply nothing or at most one copy of "stuff"
+ [stuff1 [stuff2]] means if "stuff1" is included, supply nothing
+ or at most one copy of stuff2.
+
+ "old" means it is in the language, like almost every feature of past
+ Fortran standards, but should not be used to write new programs.
+
program <name> usually file name is <name>.f90
+ use <module_name> bring in any needed modules
+ implicit none good for error detection
+ <declarations>
+ <executable statements> order is important, no more declarations
+ end program <name>
+
+
+ block data <name> old
+ <declarations> common, dimension, equivalence now obsolete
+ end block data <name>
+
+
+ module <name> bring back in with use <name>
+ implicit none good for error detection
+ <declarations> can have private and public and interface
+ end module <name>
+
+ subroutine <name> use: call <name> to execute
+ implicit none good for error detection
+ <declarations>
+ <executable statements>
+ end subroutine <name>
+
+
+ subroutine <name>(par1, par2, ...)
+ use: call <name>(arg1, arg2,... ) to execute
+ implicit none optional, good for error detection
+ <declarations> par1, par2, ... are defined in declarations
+ and can be specified in, inout, pointer, etc.
+ <executable statements>
+ return optional, end causes automatic return
+ entry <name> (par...) old, optional other entries
+ end subroutine <name>
+
+
+ function <name>(par1, par2, ...) result(<rslt>)
+ use: <name>(arg1, arg2, ... argn) as variable
+ implicit none optional, good for error detection
+ <declarations> rslt, par1, ... are defined in declarations
+ <executable statements>
+ <rslt> = <expression> required somewhere in execution
+ [return] optional, end causes automatic return
+ end function <name>
+
+ old
+ <type> function(...) <name> use: <name>(arg1, arg2, ... argn) as variable
+ <declarations>
+ <executable statements>
+ <name> = <expression> required somewhere in execution
+ [return] optional, end causes automatic return
+ end function <name>
+
<statement> will mean exactly one statement in this section
+
+ a construct is multiple lines
+
+ <label> : <statement> any statement can have a label (a name)
+
+ <variable> = <expression> assignment statement
+
+ <pointer> >= <variable> the pointer is now an alias for the variable
+ <pointer1> >= <pointer2> pointer1 now points same place as pointer2
+
+ stop can be in any executable statement group,
+ stop <integer> terminates execution of the program,
+ stop <string> can have optional integer or string
+
+ return exit from subroutine or function
+
+ do <variable>=<from>,<to> [,<increment>] optional: <label> : do ...
+ <statements>
+
+ exit \_optional or exit <label>
+ if (<boolean expression>) exit /
+ exit the loop
+ cycle \_optional or cycle <label>
+ if (<boolean expression>) cycle /
+ continue with next loop iteration
+ end do optional: end do <name>
+
+
+ do while (<boolean expression>)
+ ... optional exit and cycle allowed
+ end do
+
+
+ do
+ ... exit required to end the loop
+ optional cycle can be used
+ end do
+
+
+
+ if ( <boolean expression> ) <statement> execute the statement if the
+ boolean expression is true
+
+ if ( <boolean expression1> ) then
+ ... execute if expression1 is true
+ else if ( <boolean expression2> ) then
+ ... execute if expression2 is true
+ else if ( <boolean expression3> ) then
+ ... execute if expression3 is true
+ else
+ ... execute if none above are true
+ end if
+
+
+ select case (<expression>) optional <name> : select case ...
+ case (<value>)
+ <statements> execute if expression == value
+ case (<value1>:<value2>)
+ <statements> execute if value1 ≤ expression ≤ value2
+ ...
+ case default
+ <statements> execute if no values above match
+ end select optional end select <name>
+
+
+ real, dimension(10,12) :: A, R a sample declaration for use with "where"
+ ...
+ where (A /= 0.0) conditional assignment, only assignment allowed
+ R = 1.0/A
+ elsewhere
+ R = 1.0 elements of R set to 1.0 where A == 0.0
+ end where
+
+ go to <statement number> old
+
+ go to (<statement number list>), <expression> old
+
+ for I/O statements, see: section 10.0 Input/Output Statements
+
+ many old forms of statements are not listed
+
There are five (5) basic types: integer, real, complex, character and logical.
+ There may be any number of user derived types. A modern (not old) declaration
+ starts with a type, has attributes, then ::, then variable(s) names
+
+ integer i, pivot, query old
+
+ integer, intent (inout) :: arg1
+
+ integer (selected_int_kind (5)) :: i1, i2
+
+ integer, parameter :: m = 7
+
+ integer, dimension(0:4, -5:5, 10:100) :: A3D
+
+ double precision x old
+
+ real (selected_real_kind(15,300) :: x
+
+ complex :: z
+
+ logical, parameter :: what_if = .true.
+
+ character, parameter :: me = "Jon Squire"
+
+ type <name> a new user type, derived type
+ declarations
+ end type <name>
+
+ type (<name>) :: stuff declaring stuff to be of derived type <name>
+
+ real, dimension(:,:), allocatable, target :: A
+
+ real, dimension(:,:), pointer :: P
+
+ Attributes may be:
+
+ allocatable no memory used here, allocate later
+ dimension vector or multi dimensional array
+ external will be defined outside this compilation
+ intent argument may be in, inout or out
+ intrinsic declaring function to be an intrinsic
+ optional argument is optional
+ parameter declaring a constant, can not be changed later
+ pointer declaring a pointer
+ private in a module, a private declaration
+ public in a module, a public declaration
+ save keep value from one call to the next, static
+ target can be pointed to by a pointer
+ Note: not all combinations of attributes are legal
+
note: "statement" means key word that starts a statement, one line
+ unless there is a continuation "&"
+ "construct" means multiple lines, usually ending with "end ..."
+ "attribute" means it is used in a statement to further define
+ "old" means it should not be used in new code
+
+ allocatable attribute, no space allocated here, later allocate
+ allocate statement, allocate memory space now for variable
+ assign statement, old, assigned go to
+ assignment attribute, means subroutine is assignment (=)
+ block data construct, old, compilation unit, replaced by module
+ call statement, call a subroutine
+ case statement, used in select case structure
+ character statement, basic type, intrinsic data type
+ common statement, old, allowed overlaying of storage
+ complex statement, basic type, intrinsic data type
+ contains statement, internal subroutines and functions follow
+ continue statement, old, a place to put a statement number
+ cycle statement, continue the next iteration of a do loop
+ data statement, old, initialized variables and arrays
+ deallocate statement, free up storage used by specified variable
+ default statement, in a select case structure, all others
+ do construct, start a do loop
+ double precision statement, old, replaced by selected_real_kind(15,300)
+ else construct, part of if else if else end if
+ else if construct, part of if else if else end if
+ elsewhere construct, part of where elsewhere end where
+ end block data construct, old, ends block data
+ end do construct, ends do
+ end function construct, ends function
+ end if construct, ends if
+ end interface construct, ends interface
+ end module construct, ends module
+ end program construct, ends program
+ end select construct, ends select case
+ end subroutine construct, ends subroutine
+ end type construct, ends type
+ end where construct, ends where
+ entry statement, old, another entry point in a procedure
+ equivalence statement, old, overlaid storage
+ exit statement, continue execution outside of a do loop
+ external attribute, old statement, means defines else where
+ function construct, starts the definition of a function
+ go to statement, old, requires fixed form statement number
+ if statement and construct, if(...) statement
+ implicit statement, "none" is preferred to help find errors
+ in a keyword for intent, the argument is read only
+ inout a keyword for intent, the argument is read/write
+ integer statement, basic type, intrinsic data type
+ intent attribute, intent(in) or intent(out) or intent(inout)
+ interface construct, begins an interface definition
+ intrinsic statement, says that following names are intrinsic
+ kind attribute, sets the kind of the following variables
+ len attribute, sets the length of a character string
+ logical statement, basic type, intrinsic data type
+ module construct, beginning of a module definition
+ namelist statement, defines a namelist of input/output
+ nullify statement, nullify(some_pointer) now points nowhere
+ only attribute, restrict what comes from a module
+ operator attribute, indicates function is an operator, like +
+ optional attribute, a parameter or argument is optional
+ out a keyword for intent, the argument will be written
+ parameter attribute, old statement, makes variable real only
+ pause old, replaced by stop
+ pointer attribute, defined the variable as a pointer alias
+ private statement and attribute, in a module, visible inside
+ program construct, start of a main program
+ public statement and attribute, in a module, visible outside
+ real statement, basic type, intrinsic data type
+ recursive attribute, allows functions and derived type recursion
+ result attribute, allows naming of function result result(Y)
+ return statement, returns from, exits, subroutine or function
+ save attribute, old statement, keep value between calls
+ select case construct, start of a case construct
+ stop statement, terminate execution of the main procedure
+ subroutine construct, start of a subroutine definition
+ target attribute, allows a variable to take a pointer alias
+ then part of if construct
+ type construct, start of user defined type
+ type ( ) statement, declaration of a variable for a users type
+ use statement, brings in a module
+ where construct, conditional assignment
+ while construct, a while form of a do loop
+
backspace statement, back up one record
+ close statement, close a file
+ endfile statement, mark the end of a file
+ format statement, old, defines a format
+ inquire statement, get the status of a unit
+ open statement, open or create a file
+ print statement, performs output to screen
+ read statement, performs input
+ rewind statement, move read or write position to beginning
+ write statement, performs output
+
** exponentiation
+ * multiplication
+ / division
+ + addition
+ - subtraction
+ // concatenation
+ == .eq. equality
+ /= .ne. not equal
+ < .lt. less than
+ > .gt. greater than
+ <= .le. less than or equal
+ >= .ge. greater than or equal
+ .not. complement, negation
+ .and. logical and
+ .or. logical or
+ .eqv. logical equivalence
+ .neqv. logical not equivalence, exclusive or
+
+ .eq. == equality, old
+ .ne. /= not equal. old
+ .lt. < less than, old
+ .gt. > greater than, old
+ .le. <= less than or equal, old
+ .ge. >= greater than or equal, old
+
+
+ Other punctuation:
+
+ / ... / used in data, common, namelist and other statements
+ (/ ... /) array constructor, data is separated by commas
+ 6*1.0 in some contexts, 6 copies of 1.0
+ (i:j:k) in some contexts, a list i, i+k, i+2k, i+3k, ... i+nk≤j
+ (:j) j and all below
+ (i:) i and all above
+ (:) undefined or all in range
+
Logical constants:
+
+ .true. True
+ .false. False
+
+ Integer constants:
+
+ 0 1 -1 123456789
+
+ Real constants:
+
+ 0.0 1.0 -1.0 123.456 7.1E+10 -52.715E-30
+
+ Complex constants:
+
+ (0.0, 0.0) (-123.456E+30, 987.654E-29)
+
+ Character constants:
+
+ "ABC" "a" "123'abc$%#@!" " a quote "" "
+ 'ABC' 'a' '123"abc$%#@!' ' a apostrophe '' '
+
+ Derived type values:
+
+ type name
+ character (len=30) :: last
+ character (len=30) :: first
+ character (len=30) :: middle
+ end type name
+
+ type address
+ character (len=40) :: street
+ character (len=40) :: more
+ character (len=20) :: city
+ character (len=2) :: state
+ integer (selected_int_kind(5)) :: zip_code
+ integer (selected_int_kind(4)) :: route_code
+ end type address
+
+ type person
+ type (name) lfm
+ type (address) snail_mail
+ end type person
+
+ type (person) :: a_person = person( name("Squire","Jon","S."), &
+ address("106 Regency Circle", "", "Linthicum", "MD", 21090, 1936))
+
+ a_person%snail_mail%route_code == 1936
+
open (<unit number>)
+ open (unit=<unit number>, file=<file name>, iostat=<variable>)
+ open (unit=<unit number>, ... many more, see below )
+
+ close (<unit number>)
+ close (unit=<unit number>, iostat=<variable>,
+ err=<statement number>, status="KEEP")
+
+ read (<unit number>) <input list>
+ read (unit=<unit number>, fmt=<format>, iostat=<variable>,
+ end=<statement number>, err=<statement number>) <input list>
+ read (unit=<unit number>, rec=<record number>) <input list>
+
+ write (<unit number>) <output list>
+ write (unit=<unit number>, fmt=<format>, iostat=<variable>,
+ err=<statement number>) <output list>
+ write (unit=<unit number>, rec=<record number>) <output list>
+
+ print *, <output list>
+
+ print "(<your format here, use apostrophe, not quote>)", <output list>
+
+ rewind <unit number>
+ rewind (<unit number>, err=<statement number>)
+
+ backspace <unit number>
+ backspace (<unit number>, iostat=<variable>)
+
+ endfile <unit number>
+ endfile (<unit number>, err=<statement number>, iostat=<variable>)
+
+ inquire ( <unit number>, exists = <variable>)
+ inquire ( file=<"name">, opened = <variable1>, access = <variable2> )
+ inquire ( iolength = <variable> ) x, y, A ! gives "recl" for "open"
+
+ namelist /<name>/ <variable list> defines a name list
+ read(*,nml=<name>) reads some/all variables in namelist
+ write(*,nml=<name>) writes all variables in namelist
+ &<name> <variable>=<value> ... <variable=value> / data for namelist read
+
+ Input / Output specifiers
+
+ access one of "sequential" "direct" "undefined"
+ action one of "read" "write" "readwrite"
+ advance one of "yes" "no"
+ blank one of "null" "zero"
+ delim one of "apostrophe" "quote" "none"
+ end = <integer statement number> old
+ eor = <integer statement number> old
+ err = <integer statement number> old
+ exist = <logical variable>
+ file = <"file name">
+ fmt = <"(format)"> or <character variable> format
+ form one of "formatted" "unformatted" "undefined"
+ iolength = <integer variable, size of unformatted record>
+ iostat = <integer variable> 0==good, negative==eof, positive==bad
+ name = <character variable for file name>
+ named = <logical variable>
+ nml = <namelist name>
+ nextrec = <integer variable> one greater than written
+ number = <integer variable unit number>
+ opened = <logical variable>
+ pad one of "yes" "no"
+ position one of "asis" "rewind" "append"
+ rec = <integer record number>
+ recl = <integer unformatted record size>
+ size = <integer variable> number of characters read before eor
+ status one of "old" "new" "unknown" "replace" "scratch" "keep"
+ unit = <integer unit number>
+
+ Individual questions
+ direct = <character variable> "yes" "no" "unknown"
+ formatted = <character variable> "yes" "no" "unknown"
+ read = <character variable> "yes" "no" "unknown"
+ readwrite = <character variable> "yes" "no" "unknown"
+ sequential = <character variable> "yes" "no" "unknown"
+ unformatted = <character variable> "yes" "no" "unknown"
+ write = <character variable> "yes" "no" "unknown"
+
format an explicit format can replace * in any
+ I/O statement. Include the format in
+ apostrophes or quotes and keep the parenthesis.
+
+ examples:
+ print "(3I5,/(2X,3F7.2/))", <output list>
+ write(6, '(a,E15.6E3/a,G15.2)' ) <output list>
+ read(unit=11, fmt="(i4, 4(f3.0,TR1))" ) <input list>
+
+ A format includes the opening and closing parenthesis.
+ A format consists of format items and format control items separated by comma.
+ A format may contain grouping parenthesis with an optional repeat count.
+
+ Format Items, data edit descriptors:
+
+ key: w is the total width of the field (filled with *** if overflow)
+ m is the least number of digits in the (sub)field (optional)
+ d is the number of decimal digits in the field
+ e is the number of decimal digits in the exponent subfield
+ c is the repeat count for the format item
+ n is number of columns
+
+ cAw data of type character (w is optional)
+ cBw.m data of type integer with binary base
+ cDw.d data of type real -- same as E, old double precision
+ cEw.d or Ew.dEe data of type real
+ cENw.d or ENw.dEe data of type real -- exponent a multiple of 3
+ cESw.d or ESw.dEe data of type real -- first digit non zero
+ cFw.d data of type real -- no exponent printed
+ cGw.d or Gw.dEe data of type real -- auto format to F or E
+ nH n characters follow the H, no list item
+ cIw.m data of type integer
+ cLw data of type logical -- .true. or .false.
+ cOw.m data of type integer with octal base
+ cZw.m data of type integer with hexadecimal base
+ "<string>" literal characters to output, no list item
+ '<string>' literal characters to output, no list item
+
+ Format Control Items, control edit descriptors:
+
+ BN ignore non leading blanks in numeric fields
+ BZ treat nonleading blanks in numeric fields as zeros
+ nP apply scale factor to real format items old
+ S printing of optional plus signs is processor dependent
+ SP print optional plus signs
+ SS do not print optional plus signs
+ Tn tab to specified column
+ TLn tab left n columns
+ TRn tab right n columns
+ nX tab right n columns
+ / end of record (implied / at end of all format statements)
+ : stop format processing if no more list items
+
+ <input list> can be:
+ a variable
+ an array name
+ an implied do ((A(i,j),j=1,n) ,i=1,m) parenthesis and commas as shown
+
+ note: when there are more items in the input list than format items, the
+ repeat rules for formats applies.
+
+ <output list> can be:
+ a constant
+ a variable
+ an expression
+ an array name
+ an implied do ((A(i,j),j=1,n) ,i=1,m) parenthesis and commas as shown
+
+ note: when there are more items in the output list than format items, the
+ repeat rules for formats applies.
+
+ Repeat Rules for Formats:
+
+ Each format item is used with a list item. They are used in order.
+ When there are more list items than format items, then the following
+ rule applies: There is an implied end of record, /, at the closing
+ parenthesis of the format, this is processed. Scan the format backwards
+ to the first left parenthesis. Use the repeat count, if any, in front
+ of this parenthesis, continue to process format items and list items.
+
+ Note: an infinite loop is possible
+ print "(3I5/(1X/))", I, J, K, L may never stop
+
Intrinsic Functions are presented in alphabetical order and then grouped
+ by topic. The function name appears first. The argument(s) and result
+ give an indication of the type(s) of argument(s) and results.
+ [,dim=] indicates an optional argument "dim".
+ "mask" must be logical and usually conformable.
+ "character" and "string" are used interchangeably.
+ A brief description or additional information may appear.
+
abs(integer_real_complex) result(integer_real_complex)
+ achar(integer) result(character) integer to character
+ acos(real) result(real) arccosine |real| ≤ 1.0 0≤result≤Pi
+ adjustl(character) result(character) left adjust, blanks go to back
+ adjustr(character) result(character) right adjust, blanks to front
+ aimag(complex) result(real) imaginary part
+ aint(real [,kind=]) result(real) truncate to integer toward zero
+ all(mask [,dim]) result(logical) true if all elements of mask are true
+ allocated(array) result(logical) true if array is allocated in memory
+ anint(real [,kind=]) result(real) round to nearest integer
+ any(mask [,dim=}) result(logical) true if any elements of mask are true
+ asin(real) result(real) arcsine |real| ≤ 1.0 -Pi/2≤result≤Pi/2
+ associated(pointer [,target=]) result(logical) true if pointing
+ atan(real) result(real) arctangent -Pi/2≤result≤Pi/2
+ atan2(y=real,x=real) result(real) arctangent -Pi≤result≤Pi
+ bit_size(integer) result(integer) size in bits in model of argument
+ btest(i=integer,pos=integer) result(logical) true if pos has a 1, pos=0..
+ ceiling(real) result(real) truncate to integer toward infinity
+ char(integer [,kind=]) result(character) integer to character [of kind]
+ cmplx(x=real [,y=real] [kind=]) result(complex) x+iy
+ conjg(complex) result(complex) reverse the sign of the imaginary part
+ cos(real_complex) result(real_complex) cosine
+ cosh(real) result(real) hyperbolic cosine
+ count(mask [,dim=]) result(integer) count of true entries in mask
+ cshift(array,shift [,dim=]) circular shift elements of array, + is right
+ date_and_time([date=] [,time=] [,zone=] [,values=]) y,m,d,utc,h,m,s,milli
+ dble(integer_real_complex) result(real_kind_double) convert to double
+ digits(integer_real) result(integer) number of bits to represent model
+ dim(x=integer_real,y=integer_real) result(integer_real) proper subtraction
+ dot_product(vector_a,vector_b) result(integer_real_complex) inner product
+ dprod(x=real,y=real) result(x_times_y_double) double precision product
+ eoshift(array,shift [,boundary=] [,dim=]) end-off shift using boundary
+ epsilon(real) result(real) smallest positive number added to 1.0 /= 1.0
+ exp(real_complex) result(real_complex) e raised to a power
+ exponent(real) result(integer) the model exponent of the argument
+ floor(real) result(real) truncate to integer towards negative infinity
+ fraction(real) result(real) the model fractional part of the argument
+ huge(integer_real) result(integer_real) the largest model number
+ iachar(character) result(integer) position of character in ASCII sequence
+ iand(integer,integer) result(integer) bit by bit logical and
+ ibclr(integer,pos) result(integer) argument with pos bit cleared to zero
+ ibits(integer,pos,len) result(integer) extract len bits starting at pos
+ ibset(integer,pos) result(integer) argument with pos bit set to one
+ ichar(character) result(integer) pos in collating sequence of character
+ ieor(integer,integer) result(integer) bit by bit logical exclusive or
+ index(string,substring [,back=]) result(integer) pos of substring
+ int(integer_real_complex) result(integer) convert to integer
+ ior(integer,integer) result(integer) bit by bit logical or
+ ishft(integer,shift) result(integer) shift bits in argument by shift
+ ishftc(integer, shift) result(integer) shift circular bits in argument
+ kind(any_intrinsic_type) result(integer) value of the kind
+ lbound(array,dim) result(integer) smallest subscript of dim in array
+ len(character) result(integer) number of characters that can be in argument
+ len_trim(character) result(integer) length without trailing blanks
+ lge(string_a,string_b) result(logical) string_a ≥ string_b
+ lgt(string_a,string_b) result(logical) string_a > string_b
+ lle(string_a,string_b) result(logical) string_a ≤ string_b
+ llt(string_a,string_b) result(logical) string_a < string_b
+ log(real_complex) result(real_complex) natural logarithm
+ log10(real) result(real) logarithm base 10
+ logical(logical [,kind=]) convert to logical
+ matmul(matrix,matrix) result(vector_matrix) on integer_real_complex_logical
+ max(a1,a2,a3,...) result(integer_real) maximum of list of values
+ maxexponent(real) result(integer) maximum exponent of model type
+ maxloc(array [,mask=]) result(integer_vector) indices in array of maximum
+ maxval(array [,dim=] [,mask=]) result(array_element) maximum value
+ merge(true_source,false_source,mask) result(source_type) choose by mask
+ min(a1,a2,a3,...) result(integer-real) minimum of list of values
+ minexponent(real) result(integer) minimum(negative) exponent of model type
+ minloc(array [,mask=]) result(integer_vector) indices in array of minimum
+ minval(array [,dim=] [,mask=]) result(array_element) minimum value
+ mod(a=integer_real,p) result(integer_real) a modulo p
+ modulo(a=integer_real,p) result(integer_real) a modulo p
+ mvbits(from,frompos,len,to,topos) result(integer) move bits
+ nearest(real,direction) result(real) nearest value toward direction
+ nint(real [,kind=]) result(real) round to nearest integer value
+ not(integer) result(integer) bit by bit logical complement
+ pack(array,mask [,vector=]) result(vector) vector of elements from array
+ present(argument) result(logical) true if optional argument is supplied
+ product(array [,dim=] [,mask=]) result(integer_real_complex) product
+ radix(integer_real) result(integer) radix of integer or real model, 2
+ random_number(harvest=real_out) subroutine, uniform random number 0 to 1
+ random_seed([size=] [,put=] [,get=]) subroutine to set random number seed
+ range(integer_real_complex) result(integer_real) decimal exponent of model
+ real(integer_real_complex [,kind=]) result(real) convert to real
+ repeat(string,ncopies) result(string) concatenate n copies of string
+ reshape(source,shape,pad,order) result(array) reshape source to array
+ rrspacing(real) result(real) reciprocal of relative spacing of model
+ scale(real,integer) result(real) multiply by 2**integer
+ scan(string,set [,back]) result(integer) position of first of set in string
+ selected_int_kind(integer) result(integer) kind number to represent digits
+ selected_real_kind(integer,integer) result(integer) kind of digits, exp
+ set_exponent(real,integer) result(real) put integer as exponent of real
+ shape(array) result(integer_vector) vector of dimension sizes
+ sign(integer_real,integer_real) result(integer_real) sign of second on first
+ sin(real_complex) result(real_complex) sine of angle in radians
+ sinh(real) result(real) hyperbolic sine of argument
+ size(array [,dim=]) result(integer) number of elements in dimension
+ spacing(real) result(real) spacing of model numbers near argument
+ spread(source,dim,ncopies) result(array) expand dimension of source by 1
+ sqrt(real_complex) result(real_complex) square root of argument
+ sum(array [,dim=] [,mask=]) result(integer_real_complex) sum of elements
+ system_clock([count=] [,count_rate=] [,count_max=]) subroutine, all out
+ tan(real) result(real) tangent of angle in radians
+ tanh(real) result(real) hyperbolic tangent of angle in radians
+ tiny(real) result(real) smallest positive model representation
+ transfer(source,mold [,size]) result(mold_type) same bits, new type
+ transpose(matrix) result(matrix) the transpose of a matrix
+ trim(string) result(string) trailing blanks are removed
+ ubound(array,dim) result(integer) largest subscript of dim in array
+ unpack(vector,mask,field) result(v_type,mask_shape) field when not mask
+ verify(string,set [,back]) result(integer) pos in string not in set
+
abs(integer_real_complex) result(integer_real_complex)
+ acos(real) result(real) arccosine |real| ≤ 1.0 0≤result≤Pi
+ aimag(complex) result(real) imaginary part
+ aint(real [,kind=]) result(real) truncate to integer toward zero
+ anint(real [,kind=]) result(real) round to nearest integer
+ asin(real) result(real) arcsine |real| ≤ 1.0 -Pi/2≤result≤Pi/2
+ atan(real) result(real) arctangent -Pi/2≤result≤Pi/2
+ atan2(y=real,x=real) result(real) arctangent -Pi≤result≤Pi
+ ceiling(real) result(real) truncate to integer toward infinity
+ cmplx(x=real [,y=real] [kind=]) result(complex) x+iy
+ conjg(complex) result(complex) reverse the sign of the imaginary part
+ cos(real_complex) result(real_complex) cosine
+ cosh(real) result(real) hyperbolic cosine
+ dble(integer_real_complex) result(real_kind_double) convert to double
+ digits(integer_real) result(integer) number of bits to represent model
+ dim(x=integer_real,y=integer_real) result(integer_real) proper subtraction
+ dot_product(vector_a,vector_b) result(integer_real_complex) inner product
+ dprod(x=real,y=real) result(x_times_y_double) double precision product
+ epsilon(real) result(real) smallest positive number added to 1.0 /= 1.0
+ exp(real_complex) result(real_complex) e raised to a power
+ exponent(real) result(integer) the model exponent of the argument
+ floor(real) result(real) truncate to integer towards negative infinity
+ fraction(real) result(real) the model fractional part of the argument
+ huge(integer_real) result(integer_real) the largest model number
+ int(integer_real_complex) result(integer) convert to integer
+ log(real_complex) result(real_complex) natural logarithm
+ log10(real) result(real) logarithm base 10
+ matmul(matrix,matrix) result(vector_matrix) on integer_real_complex_logical
+ max(a1,a2,a3,...) result(integer_real) maximum of list of values
+ maxexponent(real) result(integer) maximum exponent of model type
+ maxloc(array [,mask=]) result(integer_vector) indices in array of maximum
+ maxval(array [,dim=] [,mask=]) result(array_element) maximum value
+ min(a1,a2,a3,...) result(integer-real) minimum of list of values
+ minexponent(real) result(integer) minimum(negative) exponent of model type
+ minloc(array [,mask=]) result(integer_vector) indices in array of minimum
+ minval(array [,dim=] [,mask=]) result(array_element) minimum value
+ mod(a=integer_real,p) result(integer_real) a modulo p
+ modulo(a=integer_real,p) result(integer_real) a modulo p
+ nearest(real,direction) result(real) nearest value toward direction
+ nint(real [,kind=]) result(real) round to nearest integer value
+ product(array [,dim=] [,mask=]) result(integer_real_complex) product
+ radix(integer_real) result(integer) radix of integer or real model, 2
+ random_number(harvest=real_out) subroutine, uniform random number 0 to 1
+ random_seed([size=] [,put=] [,get=]) subroutine to set random number seed
+ range(integer_real_complex) result(integer_real) decimal exponent of model
+ real(integer_real_complex [,kind=]) result(real) convert to real
+ rrspacing(real) result(real) reciprocal of relative spacing of model
+ scale(real,integer) result(real) multiply by 2**integer
+ set_exponent(real,integer) result(real) put integer as exponent of real
+ sign(integer_real,integer_real) result(integer_real) sign of second on first
+ sin(real_complex) result(real_complex) sine of angle in radians
+ sinh(real) result(real) hyperbolic sine of argument
+ spacing(real) result(real) spacing of model numbers near argument
+ sqrt(real_complex) result(real_complex) square root of argument
+ sum(array [,dim=] [,mask=]) result(integer_real_complex) sum of elements
+ tan(real) result(real) tangent of angle in radians
+ tanh(real) result(real) hyperbolic tangent of angle in radians
+ tiny(real) result(real) smallest positive model representation
+ transpose(matrix) result(matrix) the transpose of a matrix
+
all(mask [,dim]) result(logical) true if all elements of mask are true
+ any(mask [,dim=}) result(logical) true if any elements of mask are true
+ bit_size(integer) result(integer) size in bits in model of argument
+ btest(i=integer,pos=integer) result(logical) true if pos has a 1, pos=0..
+ count(mask [,dim=]) result(integer) count of true entries in mask
+ iand(integer,integer) result(integer) bit by bit logical and
+ ibclr(integer,pos) result(integer) argument with pos bit cleared to zero
+ ibits(integer,pos,len) result(integer) extract len bits starting at pos
+ ibset(integer,pos) result(integer) argument with pos bit set to one
+ ieor(integer,integer) result(integer) bit by bit logical exclusive or
+ ior(integer,integer) result(integer) bit by bit logical or
+ ishft(integer,shift) result(integer) shift bits in argument by shift
+ ishftc(integer, shift) result(integer) shift circular bits in argument
+ logical(logical [,kind=]) convert to logical
+ matmul(matrix,matrix) result(vector_matrix) on integer_real_complex_logical
+ merge(true_source,false_source,mask) result(source_type) choose by mask
+ mvbits(from,frompos,len,to,topos) result(integer) move bits
+ not(integer) result(integer) bit by bit logical complement
+ transfer(source,mold [,size]) result(mold_type) same bits, new type
+
achar(integer) result(character) integer to character
+ adjustl(character) result(character) left adjust, blanks go to back
+ adjustr(character) result(character) right adjust, blanks to front
+ char(integer [,kind=]) result(character) integer to character [of kind]
+ iachar(character) result(integer) position of character in ASCII sequence
+ ichar(character) result(integer) pos in collating sequence of character
+ index(string,substring [,back=]) result(integer) pos of substring
+ len(character) result(integer) number of characters that can be in argument
+ len_trim(character) result(integer) length without trailing blanks
+ lge(string_a,string_b) result(logical) string_a ≥ string_b
+ lgt(string_a,string_b) result(logical) string_a > string_b
+ lle(string_a,string_b) result(logical) string_a ≤ string_b
+ llt(string_a,string_b) result(logical) string_a < string_b
+ repeat(string,ncopies) result(string) concatenate n copies of string
+ scan(string,set [,back]) result(integer) position of first of set in string
+ trim(string) result(string) trailing blanks are removed
+ verify(string,set [,back]) result(integer) pos in string not in set
+
Despite its age, Fortran is still a common language in scientific computing on account of its speed and ease of use in writing numerical computing-centric code.
+This section walks through how to compile and run a basic Fortran code, and then a basic Fortran MPI code, adapted from here. See Compilers and Toolchains for compiler and programming environment information on NREL HPC systems. For an extensive guide to Fortran 90, see our page on Advanced Fortran. See External Resources for general Fortran language tutorials and Fortran-MPI tutorials.
+Create a file named hello.f90, and save the following text to the file:
+PROGRAM hello
+
+write(*,*) "Hello World"
+
+END PROGRAM hello
+
Now, we must choose the compiler with which to compile our program. We can choose between the GNU, Intel, Nvidia, and Cray compilers, depending on which system we're on (see Compilers and Toolchains).
+To see available versions of a chosen compiler, use module avail
. For this example, we'll use gfortran, which is part of GNU's gcc
package:
module avail gcc
+ gcc/10.3.0 gcc/11.2.0 gcc/12.1.0(default)
+
We'll use gcc/12.1.0:
+module load gcc/12.1.0
+
Now, we can compile the program with the following command:
+gfortran hello.f90 -o hello
This creates an executable named hello
. Execute it by typing the following into your terminal:
./hello
It should return the following output:
+Hello World
The purpose of Fortran today is to run large scale computations fast. For the "large scale" part, we use MPI. Now that we have a working Hello World program, let's modify it to run on multiple MPI tasks.
+On Kestrel, there are multiple implementations of MPI available. We can choose between OpenMPI, Intel MPI, MPICH, and Cray MPICH. These MPI implementations are associated with an underlying Fortran compiler. For example, if we type:
+module avail openmpi
we find that both openmpi/4.1.4-gcc
and openmpi/4.1.4-intel
are available.
Let's choose the openmpi/gcc combination:
+module load openmpi/4.1.4-gcc
Now, create a new file named hello_mpi.f90
and save the following contents to the file:
PROGRAM hello_mpi
+include 'mpif.h'
+
+integer :: ierr, my_rank, number_of_ranks
+
+call MPI_INIT(ierr)
+call MPI_COMM_SIZE(MPI_COMM_WORLD, number_of_ranks, ierr)
+call MPI_COMM_RANK(MPI_COMM_WORLD, my_rank, ierr)
+
+write(*,*) "Hello World from MPI task: ", my_rank, "out of ", number_of_ranks
+
+call MPI_FINALIZE(ierr)
+
+END PROGRAM hello_mpi
+
To compile this program, type:
+mpif90 hello_mpi.f90 -o hello_mpi
To run this code on the login node, type:
+mpirun -n 4 ./hello_mpi
You should receive a similar output to the following (the rank ordering may differ):
+ Hello World from MPI task: 1 out of 4
+ Hello World from MPI task: 2 out of 4
+ Hello World from MPI task: 3 out of 4
+ Hello World from MPI task: 0 out of 4
+
Generally, we don't want to run MPI programs on the login node! Let's submit this as a job to the scheduler. Create a file named job.in
and modify the file to contain the following:
#!/bin/bash
+
+#SBATCH --time=00:01:00
+#SBATCH --nodes=1
+#SBATCH --ntasks-per-node=4
+#SBATCH --partition=standard
+#SBATCH --account=<your account here>
+
+module load openmpi/4.1.4-gcc
+
+srun -n 4 ./hello_mpi &> hello.out
+
<your account here>
with your account name.
+Submit the job:
+sbatch job.in
When the job is done, the file hello.out should contain the same output as you found before (the ordering of ranks may differ).
+Compiler | +Compiler Executable | +Module Avail | +Systems available on | +
---|---|---|---|
gcc | +gfortran | +gcc | +Kestrel(Eagle), Swift, Vermilion | +
intel | +ifort | +intel-oneapi | +Kestrel(Eagle), Swift, Vermilion | +
intel | +ifort | +intel-classic | +Kestrel | +
Compiler | +MPI | +Compiler Executable | +Module Avail | +Systems available on | +
---|---|---|---|---|
gcc | +openmpi | +mpifort | +openmpi | +Kestrel(Eagle), Swift, Vermilion | +
intel | +openmpi | +mpifort | +openmpi/4.1.x-intel | +Kestrel(Eagle) | +
intel | +intel | +mpiifort | +intel-oneapi-mpi | +Kestrel, Swift, Vermilion | +
gcc | +MPICH | +mpifort | +mpich | +Kestrel, Swift, Vermilion | +
intel | +MPICH | +mpifort | +mpich/4.0.2-intel | +Kestrel only | +
cray | +MPICH | +ftn | +cray-mpich | +Kestrel only | +
Julia is a dynamic programming language that offers high performance while being easy to learn and develop code in.
+This section contains demos (in the form of scripts and notebooks) and how-to guides for doing various things with Julia on NREL HPC environments.
+Eagle | +Swift | +Vermilion | +Kestrel | +
---|---|---|---|
+ | julia/1.6.2-ocsfign | ++ | + |
julia/1.7.2 | +julia/1.7.2-gdp7a25 | +julia/1.7.2 | +julia/1.7.2 | +
+ | + | + | julia/1.8.5-generic-linux | +
julia/1.9.0 | ++ | + | + |
Julia 1.9.x does not work well on Sapphire Rapids
+We advise against installing and using Julia 1.9.x on Kestrel as packages can fail to precompile and result in a segmentation fault. This is a known issue with Julia 1.9.x on Sapphire Rapids processors, possibly due to an LLVM issue. Julia 1.10 will be installed as a module once a stable release is available. Until then, please use Julia 1.7 or 1.8.
+The following scripts and notebooks are available on the master
branch of NREL/HPC to download and run,
PyJulia_Demo.ipynb
)mpi4py
and MPI.jl
+Running the demos requires the python modules mpi4py
and julia
. For details on installing these modules, see the 'Environment Setup' section of the README found in the demos/scripts directory.
For more information on mpi4py, see the mpi4py documentation
+For more information on PyJulia, see the PyJulia documentation.
+ + + + + + + + +The following sections describe Julia packages and native function calls that can be used to call Python, C, and FORTRAN libraries.
+We can use the PyCall.jl
package to call Python code from Julia.
using PyCall
+
# The following makes it so that print statements in python will appear in this notebook
+# This is not necessary when using PyCall in a terminal based Julia instance
+pyimport("sys")."stdout" = PyTextIO(stdout)
+pyimport("sys")."stderr" = PyTextIO(stderr);
+
We can execute arbitrary Python code with the special Julia strings py"..."
and py"""..."""
.
py"""
+import math
+class Point:
+ def __init__(self, x,y):
+ self.x = x
+ self.y = y
+ def distance(self, p):
+ return math.sqrt((self.x - p.x)**2 + (self.y - p.y)**2)
+"""
+
+p = py"Point(1.0, 2.0)"
+
PyObject <__main__.Point object at 0x7fa3d66bd340>
+
We can even use Julia's string interpolation to give values to the Python code:
+x = rand()
+q = py"Point($(x), $rand())"
+
PyObject <__main__.Point object at 0x7fa3d66bdb80>
+
Attributes are directly accessible through the standard dot syntax:
+@show p.x
+@show p.distance(q);
+
p.x = 1.0
+p.distance(q) = 1.7581695820873517
+
But say we have a module in Python that we want to call from Julia. We can do that too (otherwise this wouldn't be much use would it?). The pyimport
function returns an object that gives us access to that modules functions:
np = pyimport("numpy")
+A = rand(3,3)
+b = rand(3)
+x = np.linalg.solve(A, b)
+@show maximum(abs.(A * x - b));
+
maximum(abs.(A * x - b)) = 1.1102230246251565e-16
+
In the previous slide A
and b
are created by Julia while x
is created by Python, but we are using them interchangeably. We can do this because PyCall handles most type conversions automatically.
for x in [5.0, 2, ["a", "b"], Dict("a"=>rand(), "b"=>rand()), A]
+ @show typeof(x)
+ py"""print(type($x))"""
+end
+
typeof(x) = Float64
+<class 'float'>
+typeof(x) = Int64
+<class 'int'>
+typeof(x) = Vector{String}
+<class 'list'>
+typeof(x) = Dict{String, Float64}
+<class 'dict'>
+typeof(x) = Matrix{Float64}
+<class 'numpy.ndarray'>
+
Note that the matrix is converted to a numpy
array if numpy
is installed.
The same is true going from Python to Julia.
+py"""
+objs = [{'a':1,'b':2}, [1, 'a', 3.0], 2.0+3j]
+for k in range(len(objs)):
+ $println($typeof(objs[k]))
+ print(type(objs[k]))
+"""
+
Dict{Any, Any}
+<class 'dict'>
+Vector{Any}
+<class 'list'>
+ComplexF64
+<class 'complex'>
+
We do need to be a little careful with some of Julia's less common types especially if we give it to python and bring it back:
+a = Int32(5)
+@show typeof(a)
+@show typeof(py"$a");
+
typeof(a) = Int32
+typeof(py"$a") = Int64
+
In these cases, we may want to handle the conversion ourselves. One option is getting the raw PyObject
back by using the py"..."o
syntax and then calling an appropriate convert
function:
@show typeof(a)
+@show typeof(py"$a"o)
+@show typeof(convert(Int32, py"$a"o));
+
typeof(a) = Int32
+typeof(py"$a"o) = PyObject
+typeof(convert(Int32, py"$a"o)) = Int32
+
Another way of handling (or preventing) type conversions is to use the pycall
function.
pycall(np.random.normal, PyObject, size=3)
+
PyObject array([ 1.27173788, -0.55905635, -1.81371862])
+
Here we specified to leave the object as a raw PyObject (i.e. no type conversion at all)
+We can also give it a Julia type to convert to
+pycall(np.random.normal, Vector{ComplexF32}, size=3)
+
3-element Vector{ComplexF32}:
+ 0.82824904f0 + 0.0f0im
+ -1.8152742f0 + 0.0f0im
+ 0.6555549f0 + 0.0f0im
+
Here we forced the type conversion to complex numbers with 32-bit precision for the real and imaginary parts.
+But what if we need to call a Python function that requires a callback? Not a problem. PyCall will automatically convert Julia functions to Python callable objects!
+si = pyimport("scipy.integrate")
+tk = 0.0:1e-2:10.0
+function my_ode(t::Float64, y::Vector{Float64})::Vector{Float64}
+ dy = zeros(length(y))
+ dy[1] = 5.0*y[1] - 5.0*y[1]*y[2]
+ dy[2] = y[1]*y[2] - y[2]
+ return dy
+end
+soln = si.solve_ivp(my_ode, (0.0, 10.0), [5.0, 1.0], t_eval=tk);
+
using Plots
+plot(soln["t"], soln["y"]')
+
plot(soln["y"][1,:], soln["y"][2,:])
+
For more details, see the PyCall github repo.
+Here we will discuss how to call a C library function from within Julia.
+Calling a FORTRAN library function is the same except that FORTRAN compilers "mangle" the function names. This means that they are not precisely the same names as in the source code and you need to know what compiler was used to compile the FORTRAN library so you can determine the mangling scheme.
+Note that the library we are calling must be compiled as a shared library.
+As an example we will use the "silly" library that was written just for this.
+Here are the functions available in the silly library: +
void fill_zeros(double *to_fill, int size);
+void fill_value(double *to_fill, int size, double value);
+void fill_cb(double *to_fill, int size, double (*func)(int));
+
To call one of these functions, we will use the builtin Julia function ccall
:
N = 4
+my_vector = Vector{Float64}(undef, N)
+@show my_vector
+ccall((:fill_zeros,"fake-lib/libsilly"), # function and library
+ Cvoid, # return type
+ (Ref{Float64}, Cint), # argument types
+ my_vector, N # arguments
+)
+@show my_vector
+ccall((:fill_value,"fake-lib/libsilly"),
+ Cvoid,
+ (Ref{Float64}, Cint, Cdouble),
+ my_vector, N, pi
+)
+@show my_vector;
+
my_vector = [2.257468188e-314, 0.0, 2.257517705e-314, 2.257468188e-314]
+my_vector = [0.0, 0.0, 0.0, 0.0]
+my_vector = [3.141592653589793, 3.141592653589793, 3.141592653589793, 3.141592653589793]
+
What if we want to use a function that requires a callback (so one of its arguments is a function pointer)? We can create a pointer to a Julia function with the @cfunction
macro.
function my_filler(index::Int)::Float64
+ return index / 10.0
+end
+cfunc = @cfunction(my_filler, Float64, (Int,))
+
Ptr{Nothing} @0x000000017ee10ec0
+
Now we call the C function with ccall
as before. The type of the function pointer is Ptr{Cvoid}
.
ccall((:fill_cb, "fake-lib/libsilly"),
+ Cvoid,
+ (Ref{Float64}, Cint, Ptr{Cvoid}),
+ my_vector, N, cfunc)
+@show my_vector;
+
my_vector = [0.0, 0.1, 0.2, 0.3]
+
For more details, see the Calling C and FORTRAN Code section of the Julia documentation. (If the link does not work, just google "julia call c library".)
+A more complex example is provided by Ipopt.jl. You may also wish to look at the Ipopt library C API. The easiest way to do this is actually to just look at the header file at src/Interfaces/IpStdCInterface.h
which is viewable at the Ipopt github repo.
Interested in calling a function/library written in something other than Python, C or FORTRAN? Checkout the Julia Interop group on GitHub. Interfaces already exist for C++, MATLAB, Mathematica and R to name a few.
+ + + + + + + + +Julia modules exist on NREL HPC systems. Access simply with
+module load julia
+
To see all available Julia modules on the system, use the command
+module spider julia
+
However, if you need a version of Julia for which a module does not exist or want your own personal Julia build, there are several options described in the rest of this document. Below is a general guide for what approach to use:
+Older versions of Julia are available from conda-forge
channel
conda create -n julia-env
+source activate julia-env
+conda install -c conda-forge julia
+
A working version of Spack. For detailed instructions on getting Spack setup see the GitHub repository. Briefly, this can be done with the following
+git clone https://github.com/spack/spack.git
+cd spack
+git checkout releases/v0.15 # Change to desired release
+. share/spack/setup-env.sh # Activate spack shell support
+
Info
+Steps 1 and 2 may be skipped when using the develop branch or any release branch after v0.15.
+var/spack/repos/builtin/packages/julia/package.py
in your favorite editor.if spec.target.family == 'x86_64' or spec.target.family == 'x86':
+
else:
+ target_str = str(spec.target).replace('_','-')
+ options += [
+ 'MARCH={0}'.format(target_str),
+ 'JULIA_CPU_TARGET={0}'.format(target_str)
+ ]
+
spack install julia
+
All the required build tools and libraries are available on the clusters either by default or through modules. The needed modules are covered in the instructions.
+JULIA_HOME
is the base directory of Julia source code (initially called julia
after git clone
)When compiling Julia you can choose to compile against Intel's MKL libraries or OpenBLAS for the Julia linear algebra operations. If you are going to be doing significant matrix-vector operations directly in Julia, then you will want to compile it with MKL. If most of the matrix-vector operations are being done in a subprogram or library (e.g. Ipopt) then it will make no difference what you compile Julia with. In this latter case, it is recommended that you compile with OpenBLAS since that is significantly easier. Instructions for both choices are given below.
+Note
+When compiling Julia with MKL, Julia uses the single dynamic library
option for linking. Any dynamic libraries (e.g. Ipopt or CoinHSL) loaded by Julia also need to be linked to MKL with this approach. Failing to do so will result in unusual behavior, e.g. getting garbage values passed to the MKL function calls.
Info
+We are using Eagle as an example for this demonstration. The process remains the same for other clusters.
+git clone https://github.com/JuliaLang/julia.git
cd julia
git checkout <julia_version>
Make.user
(you will need to create the file if it doesn't exist) in JULIA_HOME
put the following:MARCH=skylake-avx512
-- tell the compiler to optimize floating point instructions for Eagle's Skylake processorsUSE_INTEL_MKL=1
-- Use Intel versions of BLAS and LAPACK (this is why we loaded mkl module)USE_BLAS64=0
-- Use the 64-bit library with the 32-bit integer interface. This will necessitate changes in Make.inc
. The reasons for this are discussed in step 7.Tip
+I found it useful to create the file Make.user
in another location (e.g. home directory) and drop a link into the Julia build directory as I used git clean -x -f -d
to make sure everything is completely clean
USER
variable that leads to problems with xalt/ld
(a script that invokes ld). To fix this do the following:make -C deps/ extract-suitesparse
JULIA_HOME/deps/scratch/SuiteSparse-5.4.0/UMFPACK/Lib/Makefile
Makefile
, do a global replace on USER
--i.e. change all occurrences of the variable USER
to something else like MUSER
Make.inc
in your favorite editor and make the following changeMKLLIB
is set (there will be an if-else statement depending on the value of USE_BLAS64
)MKLLIB := $(MKLROOT)/lib/intel64
make -j4
-- -j4
allows make
to use 4 processes to build and can speed up compilation (additional speed ups may be possible by increasing the number of processes)We will make use of the following basic Monte Carlo integration function throughout this presentation
+using Statistics
+using BenchmarkTools # for the `@btime` macro
+
+function mc_integrate(f::Function, a::Real=0, b::Real=1, n::Int=100000)
+ ihat = 0.0
+ for k in 1:n
+ x = (b - a)*rand() + a
+ ihat += (f(x) - ihat) / k
+ end
+ return ihat
+end
+
+function intense_computation(t::Real)
+ sleep(t)
+ return rand()
+end;
+
Tasks are execution streams that do not depend on each other and can be done in any order. They can be executed asynchronously but they are not executed in parallel. That is, only one task is running at a given time but the order of execution is not predetermined.
+Tasks are also known as coroutines.
+Running a task is done in 3 steps:
+Creating a task can be done directly with the Task
object:
my_task = Task(()->mc_integrate(sin, -pi, pi))
+
Task (runnable) @0x000000011ecc0ab0
+
Note the Task
constructor takes a function with no arguments.
We can always define an zero argument anonymous function to pass to the Task
constructor. The @task
macro exists for this purpose:
my_task = @task mc_integrate(sin, -pi, pi)
+
Task (runnable) @0x0000000136384cd0
+
Next we schedule the task to run using the schedule
function
schedule(my_task)
+
Task (done) @0x0000000136384cd0
+
Many times we want to create and schedule a task immediately. We can do this with the @async
macro:
my_task = @async mc_integrate(sin, -pi, pi)
+
Task (done) @0x000000011d14edc0
+
We can collect the results of the task once it has completed with the fetch
function
fetch(my_task)
+
0.0020294747408654656
+
There are a few helpful details to know about fetch
:
fetch
is called, the call to fetch
will block until the task has completed.fetch
will raise a TaskFailedException
which wraps the original exception.Remember that tasks are not inherently parallel, just asynchronous execution streams.
+function run_mci()
+ N = 10
+ result = zeros(N)
+ for k in 1:N
+ result[k] = mc_integrate(sin, -pi, pi)
+ end
+ return mean(result)
+end
+
+function run_mci_task()
+ N = 10
+ task_res = zeros(N)
+ @sync for k in 1:N
+ @async(task_res[k] = mc_integrate(sin, -pi, pi))
+ end
+ return mean(task_res)
+end;
+
@btime run_mci()
+@btime run_mci_task();
+
22.094 ms (1 allocation: 160 bytes)
+ 24.318 ms (75 allocations: 4.78 KiB)
+
Note
+The @sync
macro will block at the end of the code block until all enclosed @async
statements have completed execution.
Sometimes we need to communicate between tasks. An easy way to accomplish this is to use Julia's Channel
type. We can think of a Channel
like a pipe or a queue: objects are put in at one end and taken off at the other.
Let's rewrite run_mci_task
to use channels by dividing the run_mci
workflow into two functions.
The first function will perform small Monte-Carlo integrations and put the results on a channel with the put!
function. When it has finished the requested number of computations it will close the channel with close
and return.
function integrator(output::Channel{Float64}, N::Int)
+ for k in 1:N
+ result = mc_integrate(sin, -pi, pi)
+ put!(output, result)
+ end
+ close(output)
+ return
+end;
+
Note
+If the channel is full, put!
will block until space opens up.
The second function will take the results off the channel using the take!
function and accumulate them into an average. We keep pulling results from the channel as long as there is a result or the channel is open. We can check the former with isready
and the latter with isopen
.
function accumulator(input::Channel{Float64})
+ mean_val = 0.0
+ k = 0
+ while isready(input) || isopen(input)
+ value = take!(input)
+ k += 1
+ mean_val += (value - mean_val) / k
+ end
+ return mean_val
+end;
+
Note
+If the channel is empty, the take!
function will block until there is an item available.
Now we create channel which can hold 10 results, create and schedule both tasks and finally fetch the result.
+function run_mci_chan()
+ comm_ch = Channel{Float64}(10)
+ atask = @async accumulator(comm_ch)
+ @async integrator(comm_ch, 10)
+ result = fetch(atask)
+ return result
+end;
+
@btime run_mci_chan();
+
22.097 ms (25 allocations: 1.45 KiB)
+
If tasks aren't parallel, why are we talking about them in a parallel computing tutorial?
+Remeber that tasks are discrete computation units. They naturally define boundaries between computational tasks. Julia's native parallel capabilities are ways of scheduling tasks on other processors.
+Julia (v1.3 or greater) has multithreading built into the language. By default, Julia starts with a single thread. To start Julia with multiple threads either
+* set the environment variable JULIA_NUM_THREADS
to some value > 1
+* start Julia with --threads
or -t
option (Julia v1.5 or greater)
Once started, we can see how many threads are running with the function Threads.nthreads
Threads.nthreads()
+
2
+
@threads
Macro#Many computations take the form of looping over an array where the result of the computation is put into an element in the array and these computations do not interact. In this case, we can make use of the Threads.@threads
macro.
Lets apply this to our Monte-Carlo integration.
+function run_mci_mt()
+ N = 10
+ mt_res = zeros(N)
+ Threads.@threads for k in 1:N
+ mt_res[k] = mc_integrate(sin, -pi, pi)
+ end
+ return mean(mt_res)
+end;
+
@btime run_mci_mt();
+
11.118 ms (12 allocations: 1.00 KiB)
+
@spawn
Macro#Some applications require dispatching individual tasks on different threads. We can do this using the Threads.@spawn
macro. This is like the @async
macro but will schedule the task on an available thread. That is, it creates a Task
and schedules it but on an available thread.
function run_mci_mt2()
+ N = 10
+ mt_res = Vector{Float64}(undef, N)
+ @sync for k in 1:N
+ @async(mt_res[k] = fetch(Threads.@spawn mc_integrate(sin, -pi, pi)))
+ end
+ return mean(mt_res)
+end;
+
@btime run_mci_mt2();
+
11.385 ms (126 allocations: 8.80 KiB)
+
There are a couple of oddities about Julia's multi-threading capability to remember:
+The combination of these two behaviors can lead to load imbalances amongst threads when there are blocking operations within a thread's tasks.
+Just as before, we can use a Channel
to communicate between tasks in a multi-threaded environment. The only difference is that we replace @async
with Threads.@spawn
.
function run_mci_mt3()
+ comm_ch = Channel{Float64}(10)
+ itask = Threads.@spawn integrator(comm_ch, 10)
+ atask = Threads.@spawn accumulator(comm_ch)
+ result = fetch(atask)
+ return result
+end;
+
@btime run_mci_mt3();
+
22.183 ms (35 allocations: 1.61 KiB)
+
Note
+We can see from the timing results this is not the best way to distribute the work since the integrator
function has much more computational work than the accumulator
function.
Communication patterns are one-sided, so users only manage one process. Communication itself takes the form of function or macro calls rather than explicit send and receive calls.
+Distributed.jl is built on two basic types: remote calls and remote references. A remote call is a directive to execute a particular function on a particular process. A remote reference is a reference to a variable stored on a particular process.
+There is a strong resemblance to the way Julia handles tasks: Function calls (wrapped in appropriate types) are scheduled on worker processes through remote calls which return remote references. The results of these calls are then retrieved by fetching the values using the remote references.
+We can launch more Julia processes on the same or other machines with the addprocs
function. Here we launch 2 worker processes on the local machine:
using Distributed
+addprocs(2);
+
Each Julia process is identified by a (64-bit) integer. We can get a list of all active processes with procs
:
@show procs();
+
procs() = [1, 2, 3]
+
There is a distinction between the original Julia process and those we launched. The original Julia process is often called the master process and always has id equal to 1. The launched processes are called workers. We can obtain a list of workers with the workers
function:
@show workers();
+
workers() = [2, 3]
+
By default, distributed processing operations use the workers only.
+We can also start up worker processes from the command lines using the -p
or --procs
option.
In order to launch Julia processes on other machines, we give addprocs
a vector of tuples where each tuple is the hostname as a string paired with the number of processes to start on that host.
The Julia global state is not copied in the new processes. We need to manually load any modules and define any functions we need. This is done with the Distributed.@everywhere
macro:
@everywhere using Statistics
+@everywhere function mc_integrate(f::Function, a::Real=0, b::Real=1, n::Int=100000)
+ ihat = 0.0
+ for k in 1:n
+ x = (b - a)*rand() + a
+ ihat += (f(x) - ihat) / k
+ end
+ return ihat
+end;
+
@distributed
Macro#The @distributed
macro is the distributed memory equivalent of the Threads.@threads
macro. This macro partitions the range of the for loop and executes the computation on all worker processes.
function run_mci_dist()
+ N = 10
+ total = @distributed (+) for k in 1:N
+ mc_integrate(sin, -pi, pi)
+ end
+ return total/N
+end;
+
@btime run_mci_dist();
+
11.224 ms (157 allocations: 7.16 KiB)
+
Between the macro and the for loop is an optional reduction. Here we have used +
but this can be any valid reduction operator including a user defined function. The values given to the reduction are the values of the last expression in the loop.
Note
+If we do not provide a reduction, @distributed
creates a task for each element of the loop and schedules them on worker processes and returns without waiting for the tasks to complete. To wait for completion of the tasks, the whole block can be wrapped with @sync
macro.
@spawnat
Macro#Julia also provides more fine grained control for launching tasks on workers with the @spawnat
Macro:
function run_mci_dist2()
+ N = 10
+ futures = Vector{Future}(undef, N)
+ for k in 1:N
+ futures[k] = @spawnat(:any, mc_integrate(sin, -pi, pi))
+ end
+ return mean(fetch.(futures))
+end;
+
The first argument to @spawnat
is the worker to run the computation on. Here we have used :any
indicating that Julia should pick a process for us. If we wanted to execute the computation on a particular worker, we could specify which one with the worker id value. The second argument is the expression to compute.
@spawnat
returns a Future
which is a remote reference. We call fetch
on it to retrieve the value of the computation. Note that fetch
will block until the computation is complete.
@btime run_mci_dist2();
+
13.020 ms (1119 allocations: 44.34 KiB)
+
Warning
+The entire expression is sent to the worker process before anything in the expression is executed. This can cause performance issues if we need a small part of a big object or array.
+@everywhere struct MyData
+ Data::Vector{Float64}
+ N::Int
+end
+function slow(my_data::MyData)
+ return fetch(@spawnat(2, mean(rand(my_data.N))))
+end;
+
large_data = MyData(rand(1000000), 5)
+@btime slow(large_data);
+
1.731 ms (108 allocations: 4.08 KiB)
+
This is easily fixed using a local variable:
+function fast(my_data::MyData)
+ n = my_data.N
+ return fetch(@spawnat(2, mean(rand(n))))
+end;
+
@btime fast(large_data);
+
192.843 μs (100 allocations: 3.80 KiB)
+
As suggested by the name, these are the remote versions of the Channel
type we've already seen. If you look at the source code, they are actually wrap an AbstractChannel
to provide the needed remote functionality. We can effectively treat them just like a Channel
.
Let's redo our integrator
- accumulator
workflow, but this time let's do a better job of distributing the work:
@everywhere function integrator(output::RemoteChannel{Channel{Float64}}, N::Int)
+ for k in 1:N
+ result = mc_integrate(sin, -pi, pi)
+ put!(output, result)
+ end
+ put!(output, NaN)
+ return
+end;
+@everywhere function accumulator(input::RemoteChannel{Channel{Float64}}, nworkers::Int)
+ mean_val = 0.0
+ k = 0
+ finished = 0
+ while finished < nworkers
+ value = take!(input)
+ if value === NaN
+ finished += 1
+ else
+ k += 1
+ mean_val += (value - mean_val) / k
+ end
+ end
+ return mean_val
+end;
+
function run_mci_rc()
+ comm_ch = RemoteChannel(()->Channel{Float64}(10), 1)
+ @spawnat(2, integrator(comm_ch, 5))
+ @spawnat(3, integrator(comm_ch, 5))
+ atask = @async accumulator(comm_ch, nworkers())
+ return fetch(atask)
+end;
+
Here we create a RemoteChannel
on the master process, divide the computationally intensive integrator
function into two calls and remotely execute them on the worker processes. Then we start a task on the master process to accumulate the values and call fetch to wait for and retrieve the result.
@btime run_mci_rc();
+
12.328 ms (1066 allocations: 41.97 KiB)
+
To shutdown the worker processes we can use rmprocs
.
rmprocs(workers())
+
Task (done) @0x000000011cd3cde0
+
Alternatively, we can also just exit Julia and the workers will be shutdown as part of the exit process.
+MPI.jl
is a Julia wrapper around an MPI library. By default it will download an MPI library suitable for running on the installing system. However, it is easily configured to use an existing system MPI implementation (e.g. one of the MPI modules on the cluster). See the documentation for instructions on how to do this.
MPI.jl
mostly requires transmitted things to be buffers of basic types (types that are easily converted to C). Some functions can transmit arbitrary data by serializing them, but this functionality is not as fleshed out as in mpi4py.
We first need to load and initialize MPI.
+using MPI
+MPI.Init()
+
MPI.Init
loads the MPI library and calls MPI_Init
as well as sets up types for that specific MPI library.
Now we can implement our Monte-Carlo integration workflow using MPI
+function run_mci_mpi()
+
+ comm = MPI.COMM_WORLD
+ rank = MPI.Comm_rank(comm)
+ size = MPI.Comm_size(comm)
+
+ if rank == 0
+ N = 10
+ num = [N]
+ else
+ num = Vector{Int}(undef, 1)
+ end
+ MPI.Bcast!(num, 0, comm)
+
+ rank_sum = 0.0
+ for k in rank+1:size:num[1]
+ rank_sum += mc_integrate(sin, -pi, pi)
+ end
+
+ total = MPI.Reduce([rank_sum], MPI.SUM, 0, comm)
+ if rank == 0
+ result = total / N
+ else
+ result = nothing
+ end
+
+ return result
+end
+
To benchmark this we time it many (10000) times and track the minimal value (this is similar to what the @btime
macro does).
function run_loop(nruns::Int)
+
+ min_time = 1e10
+ result = 0.0
+
+ for _ in 1:nruns
+ MPI.Barrier(MPI.COMM_WORLD)
+ start = time()
+ result = run_mci_mpi()
+ stop = time()
+ elapsed = stop - start
+ if elapsed < min_time
+ min_time = elapsed
+ end
+ end
+
+ if MPI.Comm_rank(MPI.COMM_WORLD) == 0
+ println("Elapsed time: ", min_time)
+ end
+
+ return
+end
+
+run_loop(10000)
+
Here are the results:
+mpirun -n 2 julia mpi_mci.jl
+ Activating environment at `~/HPC_Apps/julia-tutorial/Project.toml`
+ Activating environment at `~/HPC_Apps/julia-tutorial/Project.toml`
+Elapsed time: 0.01108694076538086
+
We provide a brief survey of available packages that can be used to get started.
+Packages exist for NVIDIA's CUDA, AMD's ROCm, and Intel's oneAPI. CUDA.jl is the most mature while the other two, as of this writing, are still underdevelopment.
+The package KernelAbstractions.jl is an abstraction layer for enabling different GPU backends.
+See the JuliaGPU organization's webpage or github repo for a great place to get started.
+The following are great resource for learning more
+"Julia aims to create an unprecedented combination of ease-of-use, power, and efficiency in a single language." --Julia Documentation
+Feature Highlights:
+julia
in a terminal (assuming Julia is in your path)jmaack-32918s:~ jmaack$ julia
+ _
+ _ _ _(_)_ | Documentation: https://docs.julialang.org
+ (_) | (_) (_) |
+ _ _ _| |_ __ _ | Type "?" for help, "]?" for Pkg help.
+ | | | | | | |/ _` | |
+ | | |_| | | | (_| | | Version 1.6.1 (2021-04-23)
+ _/ |\__'_|_|_|\__'_| |
+|__/ |
+
julia> 4 * pi^2 + sqrt(2)im
+39.47841760435743 + 1.4142135623730951im
+
+help?> Int
+search: Int Int8 Int64 Int32 Int16 Int128 Integer intersect intersect! InteractiveUtils InterruptException
+
+ Int64 <: Signed
+
+ 64-bit signed integer type.
+
+julia> exit()
+
Tip
+When using the REPL, the result of the (last) expression is always printed. This is sometimes undesirable. We can suppress printing by ending the last expression with a semicolon ;
. This is used throughout this presentation for appearance purposes. Unless otherwise stated any semicolon in code is not needed.
There are two ways to define functions
+Standard way:
+function my_function(x)
+ return x^2
+end;
+
Short form way:
+my_func(x) = x^2;
+
It is also possible to define anonymous functions (and save pointers to them):
+f = (x)->x^2;
+
@show my_function(pi)
+@show my_func(pi)
+@show f(pi);
+
my_function(pi) = 9.869604401089358
+my_func(pi) = 9.869604401089358
+f(pi) = 9.869604401089358
+
Info
+Julia uses the standard control flow keywords such as for
, while
, if
, elseif
, else
. See the Control Flow section of the Julia documentation for more details. Obviously, these are helpful in writing functions.
Packages can be accessed in two ways:
+import
statement -- makes all module attributes (i.e. functions and types) available by prefixing the module name followed by a dot
x = rand(5)
+import Statistics
+Statistics.mean(x)
+
0.3339056277968421
+
using
statement -- everything exported by the module is directly accessible
using Statistics
+mean(x)
+
0.3339056277968421
+
Any attribute that is not exported by the module can still be accessed by prefixing the module name followed by a dot.
+Statistics._conj(x)
+
5-element Vector{Float64}:
+ 0.17922586649673145
+ 0.7155842248637634
+ 0.29280412953665125
+ 0.10325841440419592
+ 0.3786555036828685
+
Note
+Like in python, there are no private attributes. Users may access anything created by a module. Package authors can suggest attributes that users should not use by not exporting them or with naming conventions (e.g. prefixing _
to any name that is internal only).
Julia 1.6 introduced the "pythonic" import syntax
+import Statistics as Stats
+Stats.mean(x)
+
0.3339056277968421
+
In older Julia versions, we can declare a constant for our packages
+import Statistics
+const St = Statistics
+St.mean(x)
+
0.3339056277968421
+
Tip
+When writing Julia code, use import
rather than using
. This makes code easier to follow as well as giving hints on where to look for documentation.
Julia uses the MATLAB dot syntax to operate component-wise on arrays (i.e. vectors and matrices)
+x = rand(3)
+y = rand(3)
+(x.*y).^2
+
3-element Vector{Float64}:
+ 0.5367929263482071
+ 0.008092183589557244
+ 0.36146876615689527
+
Julia also extends this syntax to ANY function that operates on vector elements
+number_op(x) = x + 5
+number_op.(x)
+
3-element Vector{Float64}:
+ 5.754141942494573
+ 5.8412967567631
+ 5.637813968303307
+
In Julia, vectorizing is done for convenience rather than performance:
+function my_mult_for(x,y)
+ z = zeros(length(x))
+ for k in length(x)
+ z[k] = x[k] * y[k]
+ end
+ return z
+end
+
+function my_mult_vect(x,y)
+ return x .* y
+end;
+
# This forces Julia to compile the function definitions
+# so that the timing results in the next cell are correct
+x = rand(2)
+y = rand(2)
+@time my_mult_vect(x,y)
+@time my_mult_for(x,y);
+
0.055219 seconds (145.07 k allocations: 8.243 MiB, 99.96% compilation time)
+ 0.009099 seconds (15.42 k allocations: 873.090 KiB, 99.82% compilation time)
+
x = rand(10000)
+y = rand(10000)
+@time my_mult_vect(x,y)
+@time my_mult_for(x,y);
+
0.000015 seconds (2 allocations: 78.203 KiB)
+ 0.000032 seconds (2 allocations: 78.203 KiB)
+
Open the REPL and hit the [
key to enter package management mode. From here we can add or remove packages:
(@v1.6) pkg> add Compat
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.6/Project.toml`
+ [34da2185] + Compat v3.31.0
+ Updating `~/.julia/environments/v1.6/Manifest.toml`
+ [34da2185] + Compat v3.31.0
+ [8bb1440f] + DelimitedFiles
+ [8ba89e20] + Distributed
+ [1a1011a3] + SharedArrays
+ [2f01184e] + SparseArrays
+ [10745b16] + Statistics
+
+(@v1.6) pkg> rm Compat
+ Updating `~/.julia/environments/v1.6/Project.toml`
+ [34da2185] - Compat v3.31.0
+ Updating `~/.julia/environments/v1.6/Manifest.toml`
+ [34da2185] - Compat v3.31.0
+ [8bb1440f] - DelimitedFiles
+ [8ba89e20] - Distributed
+ [1a1011a3] - SharedArrays
+ [2f01184e] - SparseArrays
+ [10745b16] - Statistics
+
We can also print out what packages are available +
(@v1.6) pkg> st
+ Status `~/.julia/environments/v1.6/Project.toml`
+ [7073ff75] IJulia v1.23.2
+ [438e738f] PyCall v1.92.3
+
(@v1.6) pkg> up
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General.git`
+ No Changes to `~/.julia/environments/v1.6/Project.toml`
+ No Changes to `~/.julia/environments/v1.6/Manifest.toml`
+
Package management mode in the REPL is actually just a convenient interface to the Julia package Pkg.jl which is part of the Julia standard library.
+All package mode commands are functions in Pkg.jl:
+import Pkg; Pkg.add("Compat"); Pkg.rm("Compat")
+
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General.git`
+ Resolving package versions...
+ Updating `~/.julia/environments/v1.6/Project.toml`
+ [34da2185] + Compat v3.31.0
+ Updating `~/.julia/environments/v1.6/Manifest.toml`
+ [34da2185] + Compat v3.31.0
+ [8bb1440f] + DelimitedFiles
+ [8ba89e20] + Distributed
+ [1a1011a3] + SharedArrays
+ [2f01184e] + SparseArrays
+ [10745b16] + Statistics
+ Updating `~/.julia/environments/v1.6/Project.toml`
+ [34da2185] - Compat v3.31.0
+ Updating `~/.julia/environments/v1.6/Manifest.toml`
+ [34da2185] - Compat v3.31.0
+ [8bb1440f] - DelimitedFiles
+ [8ba89e20] - Distributed
+ [1a1011a3] - SharedArrays
+ [2f01184e] - SparseArrays
+ [10745b16] - Statistics
+
Pkg.status(); Pkg.update()
+
+ Status `~/.julia/environments/v1.6/Project.toml`
+ [7073ff75] IJulia v1.23.2
+ [438e738f] PyCall v1.92.3
+ Updating registry at `~/.julia/registries/General`
+ Updating git-repo `https://github.com/JuliaRegistries/General.git`
+ No Changes to `~/.julia/environments/v1.6/Project.toml`
+ No Changes to `~/.julia/environments/v1.6/Manifest.toml`
+
Warning
+If you want to use Julia within Jupyter notebook, some package management features (like adding new packages) do not work well. It is best to add/remove/update either with a script or using the REPL.
+Environments allow us to install different versions of packages for use with different projects. Very similar to python virtual environments or conda environments.
+Pkg.activate("env-one"); Pkg.status()
+
+ Activating environment at `~/HPC_Apps/julia-tutorial/env-one/Project.toml`
+ Status `~/HPC_Apps/julia-tutorial/env-one/Project.toml`
+ [91a5bcdd] Plots v1.13.1
+
Pkg.activate("env-two"); Pkg.status()
+
+ Activating environment at `~/HPC_Apps/julia-tutorial/env-two/Project.toml`
+ Status `~/HPC_Apps/julia-tutorial/env-two/Project.toml`
+ [91a5bcdd] Plots v1.16.6
+
The environment names are given by the directory in which they reside. The explicitly added packages are given in the Project.toml
file. The entire environment with all the required dependencies (down to specific commits) are in the Manifest.toml
file.
There are 3 ways to activate an environment:
+Pkg.activate
function:
+ Pkg.activate("path/to/environment/")
+
activate
command:
+ activate path/to/environment
+
--project
option:
+ julia --project=<path/to/environment>
+
The first 2 ways can also be used to create new environments.
+To copy an environment, all you need is the Project.toml
file. Put it in the desired directory and activate that environment. Finally, in package management mode, use the instantiate
command:
(fake-env) pkg> st
+ Status `~/fake-env/Project.toml`
+→ [da04e1cc] MPI v0.18.1
+ Info packages marked with → not downloaded, use `instantiate` to download
+
+(fake-env) pkg> instantiate
+ Installed MPI ─ v0.18.1
+ Building MPI → `~/.julia/scratchspaces/44cfe95a-1eb2-52ea-b672-e2afdf69b78f/494d99052881a83f36f5ef08b23de07cc7c03a96/build.log`
+Precompiling project...
+ 1 dependency successfully precompiled in 2 seconds (11 already precompiled)
+
Note
+Alternatively, you can use the Pkg.instantiate
function.
Info
+If you need to exactly copy an environment exactly copy both the Project.toml
and Manifest.toml
files into the desired directory and use the instantiate
command.
Julia environments can be layered such that packages from more than just the top layer environment can be imported. This allows us to have access to debugging and development tools without putting them in whatever environment were working on. This is a major difference from conda environments.
+Pkg.status()
+ Status `~/HPC_Apps/julia-tutorial/env-one/Project.toml`
+ [91a5bcdd] Plots v1.13.1
+
import BenchmarkTools as BT # THIS IS NOT IN OUR TOP ENVIRONMENT!!!
+
When loading a package, Julia has a hierarchy of environments that it checks for the package. Julia loads the first version of the package it encounters in this hierarchy. The environment hierarchy can be altered by the JULIA_LOAD_PATH
environment variable.
These environment stacks are discussed more in the Environments subsection of the Code Loading part of the Julia Manual.
+In Julia everything has a type. We can access an object's type with the typeof
function:
typeof(7.5)
+
Float64
+
Even types have a type:
+typeof(Float64)
+
DataType
+
Julia also has a type hierarchy. There are subtypes and supertypes. We can access explore these with the functions subtypes
and supertype
:
subtypes(Float64)
+
Type[]
+
supertype(Float64)
+
AbstractFloat
+
Float64
has no subtypes because it is a Concrete Type. All the supertypes are an Abstract Type. Only Concrete Types can actually exist.
Every type has only one immediate supertype. However, each supertype has a supertype. We can get the whole chain with the supertypes
(plural) function:
supertypes(Float64)
+
(Float64, AbstractFloat, Real, Number, Any)
+
Let us see all the floating point types available in Julia:
+subtypes(AbstractFloat)
+
4-element Vector{Any}:
+ BigFloat
+ Float16
+ Float32
+ Float64
+
We can test whether or not a type is a subtype of something with the <:
operator:
Float64 <: AbstractFloat
+
true
+
Float64 <: Float64
+
true
+
Int <: AbstractFloat
+
false
+
Warning
+Subtypes and supertypes get complicated when dealing with containers:
+Float64 <: Real
+
true
+
Vector{Float64} <: Vector{Real}
+
false
+
Vector{Float64} <: Vector
+
true
+
We can use this to write functions:
+function my_abs_sub(x)
+ if typeof(x) <: Complex
+ println("Complex!")
+ return sqrt(x.re^2 + x.im^2)
+ elseif typeof(x) <: Real
+ println("Real!")
+ return x < 0 ? -x : x
+ else
+ error("Not a number!")
+ end
+end
+@show my_abs_sub(-5)
+@show my_abs_sub(-5.0)
+@show my_abs_sub(-1 + 2im);
+
Real!
+my_abs_sub(-5) = 5
+Real!
+my_abs_sub(-5.0) = 5.0
+Complex!
+my_abs_sub(-1 + 2im) = 2.23606797749979
+
A more Julia way of doing this is to write the typing information directly into the function definition:
+function my_abs_md(x::Real)
+ println("Multiple Dispatch Real!")
+ return x < 0 ? -x : x
+end
+function my_abs_md(x::Complex)
+ println("Multiple Dispatch Complex!")
+ return sqrt(x.re^2 + x.im^2)
+end
+@show my_abs_md(-5)
+@show my_abs_md(-1 + 2im);
+
Multiple Dispatch Real!
+my_abs_md(-5) = 5
+Multiple Dispatch Complex!
+my_abs_md(-1 + 2im) = 2.23606797749979
+
Notice that the functions have the same name, but the correct one is executed based on the type of the argument. This is called Multiple Dispatch.
+Tip
+Add typing information for any function you are likely to use a lot. There are two reasons:
+Julia allows us to define our own (composite) types:
+struct Point
+ x::Float64
+ y::Float64
+end
+p0 = Point(0, 0)
+p1 = Point(1.0, 2.0)
+
Point(1.0, 2.0)
+
We can define functions with this type as the argument now
+function distance(p::Point, q::Point)
+ return sqrt((p.x - q.x)^2 + (p.y - q.y)^2)
+end
+distance(p0, p1)
+
2.23606797749979
+
We can build structs with other structs as components:
+struct Circle
+ center::Point
+ radius::Float64
+end
+
+my_circle = Circle(p1, 5)
+
Circle(Point(1.0, 2.0), 5.0)
+
function is_in(p::Point, c::Circle)
+ return distance(p, c.center) < c.radius
+end
+@show is_in(p0, my_circle)
+@show is_in(Point(100,0), my_circle);
+
is_in(p0, my_circle) = true
+is_in(Point(100, 0), my_circle) = false
+
What if we want to change the radius of the circle?
+my_circle.radius = 10.0 # Causes an error!!
+
setfield! immutable struct of type Circle cannot be changed
+
+
+
+Stacktrace:
+
+ [1] setproperty!(x::Circle, f::Symbol, v::Float64)
+
+ @ Base ./Base.jl:34
+
+ [2] top-level scope
+
+ @ In[34]:1
+
+ [3] eval
+
+ @ ./boot.jl:360 [inlined]
+
+ [4] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
+
+ @ Base ./loading.jl:1116
+
Structs are immutable (cannot be changed) by default in Julia. This allows for some optimizations behind the scenes and most of the time we do not need to change the values in a Struct.
+If we need to change fields in a struct, we add the mutable
keyword:
mutable struct MutableCircle
+ center::Point
+ radius::Float64
+end
+my_mutable_circle = MutableCircle(p1, 5.0)
+@show my_mutable_circle
+my_mutable_circle.radius = 10.0
+@show my_mutable_circle;
+
my_mutable_circle = MutableCircle(Point(1.0, 2.0), 5.0)
+my_mutable_circle = MutableCircle(Point(1.0, 2.0), 10.0)
+
Let us go back to our Point type:
+struct Point
+ x::Float64
+ y::Float64
+end
+
We locked in the types in the fields of this struct. What if we want to use a Point
struct with a different type? Such as an Int
. We use a Parametric Type.
We define a Parametric Type in the following way:
+struct ParametricPoint{R <: Real}
+ x::R
+ y::R
+end
+
+function distance(p::ParametricPoint{<:Real},
+ q::ParametricPoint{<:Real})
+ return sqrt((p.x - q.x)^2 + (p.y - q.y)^2)
+end;
+
p0 = ParametricPoint(1, -1)
+@show typeof(p0)
+p1 = ParametricPoint(2.0, 0.0)
+@show typeof(p1)
+@show distance(p0,p1);
+
typeof(p0) = ParametricPoint{Int64}
+typeof(p1) = ParametricPoint{Float64}
+distance(p0, p1) = 1.4142135623730951
+
At a very high level, Julia code is executed in two phases:
+Julia code is parsed and turned into expressions. These expressions are themselves Julia data structures.
+expr = Meta.parse("z^2 + 1")
+expr
+
:(z ^ 2 + 1)
+
While the expression prints as a human-readable mathematical expression, it is actually a tree:
+dump(expr)
+
Expr
+ head: Symbol call
+ args: Array{Any}((3,))
+ 1: Symbol +
+ 2: Expr
+ head: Symbol call
+ args: Array{Any}((3,))
+ 1: Symbol ^
+ 2: Symbol z
+ 3: Int64 2
+ 3: Int64 1
+
Since this is a data structure, we can change the expression
+expr.args[1] = :-
+expr.args[2].args[1] = :*
+expr
+
:(z * 2 - 1)
+
Then evaluate it
+z = 3
+@show eval(expr)
+z = 2.5
+@show eval(expr);
+
eval(expr) = 5
+eval(expr) = 4.0
+
Note we gave z
a value after we wrote the expression.
A macro is a special function that takes expressions, symbols and literal values as arguments and returns an expression. The biggest difference between a macro and a normal function is that a macro is executed during the parse phase. This means that in a macro we have access to the expression!
+Let's take a look at the @assert
macro:
x = 5; y = 4;
+@assert x == y
+
AssertionError: x == y
+
+
+
+Stacktrace:
+
+ [1] top-level scope
+
+ @ In[42]:2
+
+ [2] eval
+
+ @ ./boot.jl:360 [inlined]
+
+ [3] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
+
+ @ Base ./loading.jl:1116
+
The error contains the expression that caused the error! This is not possible to do with a function because that expression is not available at runtime.
+How do we write macros? More or less like we write functions but using the macro
keyword instead of the function
keyword:
macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+ x = [gensym() for _ in 1:nargs]
+ quote
+ $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+ end
+end
+
@fadd (macro with 1 method)
+
This macro takes two functions and creates an expression that for a function that computes the sum of the two. It is actually generating code!
+p(x) = x^2
+q(x) = (2x + 5) / x^2
+@fadd(h, p, q, 1)
+@show p(pi) + q(pi)
+@show h(pi);
+
p(pi) + q(pi) = 11.012830091668627
+h(pi) = 11.012830091668627
+
We can look at the expression that the macro generates with the macro @macroexpand
:
@macroexpand(@fadd(h, p, q, 1))
+
quote
+ #= In[43]:4 =#
+ h(var"#73###258") = begin
+ #= In[43]:4 =#
+ p(var"#73###258") + q(var"#73###258")
+ end
+end
+
Ignoring all the stuff with #
symbols we can see that the expression returned by the macro looks more or less like a function definition.
Having seen how this works let's unpack the macro definition a bit more. For context, here's the whole definition again:
+macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+ x = [gensym() for _ in 1:nargs]
+ quote
+ $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+ end
+end
+
We'll unpack it one line at a time.
+Having seen how this works let's unpack the macro definition a bit more. For context, here's the whole definition again:
+macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+ x = [gensym() for _ in 1:nargs]
+ quote
+ $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+ end
+end
+
First Line:
+macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+ ...
+end
+
The macro definition looks a lot like a function definition but with macro
instead of function
.
Second Line:
+ x = [gensym() for _ in 1:nargs]
+
Here we create a vector of symbols of size nargs
. The gensym
function generates a symbol for a variable that is guaranteed not to clash with existing variables. These symbols will be the arguments of our new function.
Third Line:
+ quote
+ # expression here
+ end
+
This is an easy way to generate an expression. The contents of this block is the expression returned by the macro.
+Fourth Line:
+ $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+
This is the meat of the macro and it may seem a bit much at first. However, each term is essentially the same. So let's just focus on the left hand side of the equality.
+ $(esc(name))($(x...))
+
name
variable is local to the macro. It's value is what we want to put into the expression. So we interpolate it into the expression using $
. esc
function.esc
, Julia will assume that the variable is local and needs to be renamed with gensym
transformed so that it will not clash with other variables.x
into the expression. This is done with the splat operator ...
in conjunction with $
.Why can't we just write a function to do this? Let's try:
+function fadd(name, f::Function, g::Function, nargs::Int)
+ x = [gensym() for _ in 1:nargs]
+ [WHAT HERE?](x...) = f(x...) + g(x...)
+ return [WHAT TO RETURN?]
+end
+
There are a couple problems here:
+name
we would end up with a function called name.fadd
. Once we return from fadd
, the name is no longer bound to this function.If we do not care about creating function names, we could construct and return an anonymous function:
+function fadd(f::Function, g::Function, nargs::Int)
+ x = [gensym() for _ in 1:nargs]
+ return (x...)->(f(x...) + g(x...))
+end
+h1 = fadd(p,q,1)
+h1(pi)
+
11.012830091668627
+
This gets us pretty close to the same functionality since we could assign the function pointer to any valid variable name.
+However, we did not maximize the value of the macro. We can actually generate documentation for our function as well:
+macro fadd(name::Symbol, f::Symbol, g::Symbol, nargs::Int)
+ x = [gensym() for _ in 1:nargs]
+ local help = "Functions $f and $g added together. Created with the `@fadd` macro!"
+ quote
+ @doc string($help)
+ $(esc(name))($(x...)) = $(esc(f))($(x...)) + $(esc(g))($(x...))
+ end
+end
+@fadd(h,p,q,1);
+
?h
+
Functions p and q added together. Created with the `@fadd` macro!
+
The Julia Documentation is a great place to read about Julia features. Numerous examples are normally given along with detailed explanation.
+The official Julia website is a great place to find Julia tutorials, learn about the Julia community or discover research using Julia.
+ + + + + + + + +Dask is a framework for parallelizing Python code. The most common use case is to enable Python programmers to scale scientific and machine learning analyses to run on distributed hardware. Dask has similarities to Apache Spark (see FAQ for comparison), but Dask is more Python native and interfaces with common scientific libraries such as NumPy and Pandas.
+Dask can be installed via Conda. For example, to install Dask into a new conda environment, first load the appropriate anaconda module (e.g., module load anaconda3
on Kestrel), and then run:
conda env create -n dask python=3.9
+conda activate dask
+conda install dask
+
This installs Dask along with common dependencies such as NumPy. Additionally, the dask-jobqueue
package (discussed below), can be installed via:
conda install dask-jobqueue -c conda-forge
+
Further, there is the dask-mpi
package (also discussed below). To ensure compatibility with the system MPI libraries, it is recommended to install dask-mpi
using pip. As such, we recommending installing any conda packages first. dask-mpi
depends on mpi4py
, although we have found that the pip install command does not automatically install mpi4py
, so we install it explicitly. Also, installation of mpi4py
will link against the system libraries, so the desired MPI library should be loaded first. In addition, it may be necessary to explicitly specify the MPI compiler driver. For example, to install mpi4py on Kestrel using the default programming environment and MPI (PrgEnv-cray using Cray MPICH):
module load PrgEnv-cray
+env MPICC=cc pip install dask-mpi mpi4py
+
Dask can be used locally on your laptop or an individual node. Additionally, it provides wrappers for multiprocessing and threadpools. One advantage of using LocalCluster
is that you can easily drop in another cluster configuration to further parallelize, with minimal modification of the code.
The following is a simple example that uses a local cluster with the dask.delayed
interface, which can be used when the problem doesn't fit into one of the built-in collection types such as dask.array
or dask.dataframe
:
from distributed import Client, LocalCluster
+import dask
+import time
+import random
+
+@dask.delayed
+def inc(x):
+ time.sleep(random.random())
+ return x + 1
+
+@dask.delayed
+def dec(x):
+ time.sleep(random.random())
+ return x - 1
+
+@dask.delayed
+def add(x, y):
+ time.sleep(random.random())
+ return x + y
+
+def main ():
+ cluster = LocalCluster(n_workers=2)
+ client = Client(cluster)
+ zs = []
+ for i in range(256):
+ x = inc(i)
+ y = dec(x)
+ z = add(x, y)
+ zs.append(z)
+
+ result = dask.compute(*zs)
+ print (result)
+
+
+if __name__ == "__main__":
+ main()
+
The dask-jobqueue
library makes it easy to deploy Dask to a distributed cluster using Slurm. This is particularly useful when running an interactive notebook, where the workers can be scaled dynamically.
For the following example, first make sure that both dask
and dask-jobqueue
have been installed. Create a file named dask_slurm_example.py
with the following contents, and replace <project>
with your project allocation.
dask_slurm_example.py
from dask_jobqueue import SLURMCluster
+import socket
+from dask.distributed import Client
+from collections import Counter
+
+cluster = SLURMCluster(
+ cores=18,
+ memory='24GB',
+ account='<project>',
+ walltime='00:30:00',
+ processes=17,
+)
+
+client = Client(cluster)
+
+def test():
+ return socket.gethostname()
+
+result = []
+cluster.scale(jobs=2)
+
+for i in range(2000):
+ result.append(client.submit(test).result())
+
+print(Counter(result))
+print(cluster.job_script())
+
Then the script can simply be executed directly from a login node:
+python dask_slurm_example.py
+
Note that although 2 jobs are requested, Dask launches the jobs dynamically, so depending on the status of the job queue, your results may indicate that only a single node was used.
+Dask also provides a package called dask-mpi
that uses MPI to create the cluster. Note that dask-mpi
only uses MPI to start the cluster, not for inter-node communication.
Dask-MPI provides two interfaces to launch Dask, either from a batch script using the Python API, or from the command line.
+Here we show a simple example that uses Dask-MPI with a batch script. Make sure that you have installed dask-mpi
following the Installation Instructions. Create dask_mpi_example.py
and dask_mpi_launcher.sh
with the contents below. In dask_mpi_launcher.sh
, replace <project>
with your allocation.
dask_mpi_example.py
from dask_mpi import initialize
+from dask.distributed import Client
+import socket
+import time
+from collections import Counter
+
+def test():
+ return socket.gethostname()
+
+def main():
+ initialize(nthreads=5)
+ client = Client()
+ time.sleep(15)
+
+ result = []
+
+ for i in range (0,100):
+ result.append(client.submit(test).result())
+ time.sleep(1)
+
+ out = str(Counter(result))
+ print(f'nodes: {out}')
+
+main()
+
dask_mpi_launcher.sh
#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --ntasks=4
+#SBATCH --time=10
+#SBATCH --account=<project>
+
+srun -n 4 python dask_mpi_example.py
+
The job is then launched as:
+sbatch dask_mpi_launcher.sh
+
Warning
+We have observed errors such as distributed.comm.core.CommClosedError
when using dask-mpi
. These errors may be related to known issues such as GitHub Issue #94. Users that experience issues with dask-mpi
are encouraged to use dask-jobqueue
instead.
Links to External resources:
+ + + + + + + + + +Bash (Bourne Again Shell) is one of the most widely available and used command line shell applications. Along with basic shell functionality, it offers a wide variety of features which, if utilized thoughtfully, can create powerful automated execution sequences that run software, manipulate text and files, parallelize otherwise single-process software, or anything else you may want to do from the command line.
+Shell scripts are also one of the most common ways our HPC community submits jobs, and running a large parallel workload often requires some initialization of the software environment before meaningful computations can begin. This typically involves tasks such as declaring environment variables, preparing input files or staging directories for data, loading modules and libraries that the software needs to run, preparing inputs, manipulating datasets, and so on. Bash can even be used to launch several single-core jobs, effectively taking on the role of an ad hoc batch executor, as well.
+This article provides a brief introduction to bash, as well as a list of tips, tricks, and good practices when it comes to writing effective bash scripts that can apply widely in both HPC and non-HPC environments. We will also provide links to some additional resources to help further your bash scripting skills.
+All of bash commands work at the command prompt "live", i.e. interpreted line-by-line as you type commands and press enter. A bash "script" may be regarded as a list of bash commands that have been saved to a file for convenience, usually with some basic formatting, and possibly comments, for legibility.
+All bash scripts must begin with a special character combination, called the "shebang" or #!
character, followed by the name of an interpreter:
#!/bin/bash
This declares that the contents of the file that follow are to be interpreted as commands, using /bin/bash
as the interpreter. This includes commands, control structures, and comments.
Plenty of other interpreters exist. For example, Python scripts begin with: #!/usr/bin/python
or /usr/bin/env python
, perl scripts: #!/usr/bin/perl
, and so on.
If you read a bash script, you may be tempted to default to your usual understanding of how code generally works. For example, with most languages, typically there is a binary or kernel which digests the code you write (compilers/gcc for C, the python interpreter/shell, Java Virtual Machine for Java, and so on.) The binary/kernel/interpreter then interprets the text into some sort of data structure which enforces the priority of certain commands over others, and finally generates some execution of operations based on that data structure.
+Bash isn't too far off from this model, and in some respects functions as any other interpreted language: you enter a command (or a control structure) and it is executed.
+However, as a shell that also serves as your major interface to the underlying operating system, it does have some properties and features that may blur the lines between what you think of as 'interpreted' versus 'compiled'.
+For instance, many aspects of the bash "language" are actually just the names of pre-compiled binaries which do the heavy lifting. Much the same way you can run python
or ssh
in a command line, under the hood normal bash operations such as if
, echo
, and exit
are actually just programs that expect a certain cadence for the arguments you give it. A block such as:
if true; then echo "true was true"; fi
+
A good example is the program [
which is just an oddly-named command you can invoke. Try running which [
at a command prompt. The results may surprise you: /usr/bin/[
is actually a compiled program on disk, not a "built-in" function!
This is why you need to have a space between the brackets and your conditional, because the conditional itself is passed as an argument to the command [
. In languages like C it's common to write the syntax as if (conditional) { ...; }
. However, in bash, if you try to run if [true]
you will likely get an error saying there isn't a command called [true]
that you can run. This is also why you often see stray semicolons that seem somewhat arbitrary, as semicolons separate the execution of two binaries. Take this snippet for example:
+
echo "First message." ; echo "Second message."
+
echo "First message."
+echo "Second message."
+
echo
would be interpreted as an argument to the first echo and would end up outputting: First message. echo Second message.
+Bash interprets ;
and \n
(newline) as separators. If you need to pass these characters into a function (for example, common in find
's -exec
flag) you need to escape them with a \
. This is useful for placing arguments on separate lines to improve readability like this example:
+
chromium-browser \
+--start-fullscreen \
+--new-window \
+--incognito \
+'https://google.com'
+
Similarly, normal if-then-else control flow that you would expect of any programming/scripting language has the same caveats. Consider this snippet: +
if true
+then
+ echo "true is true"
+else
+ echo "false is true?"
+fi
+
if
invokes the command true
which always exits with a successful exit code (0
)if
interprets a success exit code (0
) as a truism and runs the then
.then
command will execute anything it's given until else
, elif
, or fi
else
command is the same as then
but will only execute if if
returned an erroneous exit code.fi
command indicates that no more conditional branches exist relative to the logical expression given to the original if
.All this to say, this is why you often see if-then-else blocks written succinctly as if [ <CONDITIONAL> ]; then <COMMANDS>; fi
with seemingly arbitrary semicolons and spaces. It is exactly why things work this way that bash is able to execute arbitrary executables (some of which you may end up writing) and not require something like Python's subprocess module.
This is just to give you an understanding for why some of the syntax you will encounter is the way it is. Everything in bash is either a command or an argument to a command.
+Bash utilizes many flavors of symbolic enclosures. A complete guide is beyond the scope of this document, but you may see the following:
+( )
- Single parentheses: run enclosed commands in a subshella='bad';(a='good'; mkdir $a); echo $a
+result: directory "good" is made, echoes "bad" to screen$( )
- Single parentheses with dollar sign: subshell output to string(command substitution) (preferred method)echo "my name is $( whoami )"
+result: prints your username<( )
- Parentheses with angle bracket: process substitutionsort -n -k 5 <( ls -l ./dir1) <(ls -l ./dir2)
+result: sorts ls -l results of two directories by column 5 (size)[ ]
- Single Brackets: truth testing with filename expansion or word splittingif [ -e myfile.txt ]; then echo "yay"; else echo "boo"; fi
+result: if myfile.txt exists, celebrate{ }
- Single Braces/curly brackets: expansion of a range${ }
- Single braces with dollar sign: expansion with interpolation` `
- Backticks: command/process substitution(( ))
- Double parentheses: integer arithmetic $(( ))
- Double parentheses with dollar sign: integer arithmatic to string[[ ]]
- Double brackets: truth testing with regex ( )
(Single Parentheses)#There are 3 features in Bash which are denoted by a pair of parentheses, which are Bash subshells, Bash array declarations, and Bash function declarations. See the table below for when each feature is enacted:
+Syntax | +Bash Feature | +
---|---|
Command/line begins with ( |
+Run the contained expression(s) in a subshell. This will pass everything until a closing ) to a child-fork of Bash that inherits the environment from the invoking Bash instance, and exits with the exit code of the last command the subshell exitted with. See the section on subshells for more info. |
+
A valid Bash identifier is set equal to a parnethetically enclosed list of items (.e.g. arr=("a" "b" "c") ) |
+Creates a Bash array with elements enclosed by the parentheses. The default indexing of the elements is numerically incremental from 0 in the given order, but this order can be overridden or string-based keys can be used. See the section on arrays for more info. | +
A valid Bash identifier is followed by () and contains some function(s) enclosed by { } (i.e. func() { echo "test"; } ) |
+Declare a function which can be re/used throughout a Bash script. See the either of "{ } " or functions for more info. |
+
Note that whitespace is required, prohibited, or ignored in certain situations. See this block for specific examples of how to use whitespace in the various contexts of parantheses. +
### Subshells
+(echo hi) # OK
+( echo hi) # OK
+(echo hi ) # OK
+( echo hi ) # OK
+
+### Arrays
+arr=("a" "b" "c") # Array of 3 strings
+arr =("a" "b" "c") # ERROR
+arr= ("a" "b" "c") # ERROR
+arr = ("a" "b" "c") # ERROR
+arr=("a""b""c") # Array of one element that is "abc"
+arr=("a","b","c") # Array of one element that is "a,b,c"
+arr=("a", "b", "c") # ${arr[0]} == "a,"
+
+### Functions
+func(){echo hi;} # ERROR
+func(){ echo hi;} # OK
+func (){ echo hi;} # OK
+func () { echo hi;} # OK
+func () { echo hi; } # OK
+
Command | +Behavior | +
---|---|
(ls -1 | head -n 1) |
+Run the command in a subshell. This will return the exit code of the last process that was ran. | +
test_var=(ls -1) |
+Create a bash array with the elements ls and -1 , meaning ${test_var[1]} will evaluate to -1 . |
+
test_var=$(ls -1) |
+Evaluate ls -1 and capture the output as a string. |
+
test_var=(`ls -1`) or test_var=($(ls -1)) |
+Evaluate ls -1 and capture the output as an array. |
+
Correct:
+[ cmd ]
- There must be spaces or terminating characters (\n
or ;
) surrounding any brackets.
Like many common bash commands, "[" is actually a standalone executable, usually located at /usr/bin/[
, so it requires spaces to invoke correctly.
Erroneous:
+[cmd]
- tries to find a command called [cmd]
which likely doesn't exist[cmd ]
- tries to find a command called [cmd
and pass ]
as an argument to it[ cmd]
- tries to pass cmd]
as an argument to [
which expects an argument of ]
that isn't technically provided.There are many other examples of using enclosures in bash scripting beyond the scope of this introduction. Please see the resources section for more information.
+Variable assignment in bash is simply to assign a value to a string of characters. All subsequent references to that variable must be prefixed by $
:
$ MYSTRING="a string"
+$ echo $MYSTRING
+a string
+$ MYNUMBER="42"
+$ echo $MYNUMBER
+42
+
When you declare a variable in bash, that variable is only available in the shell in which it is declared; if you spawn a sub-shell, the variable will not be accessible. Using the export
command, you can essentially declare the variable to be inheritable.
# without exporting:
+$ TESTVAR=100
+$ echo $TESTVAR
+100 # returns a result
+$ bash # spawn a sub-shell
+$ echo $TESTVAR
+ # no result
+$ exit # exit the subshell
+# with exporting:
+$ export TESTVAR=100
+$ echo $TESTVAR
+100 # returns a result
+$ bash # spawn a sub-shell
+$ echo $TESTVAR
+100 # value is passed into the subshell
+$ exit # exit the subshell
+$
+
"Source" (shortcut: .
) is a built-in bash command that takes a bash script as an argument. Bash will execute the contents of that file in the current shell, instead of spawning a sub-shell. This will load any variables, function declarations, and so on into your current shell.
A common example of using the source
command is when making changes to your ~/.bashrc
, which is usually only parsed once upon login. Rather than logging out and logging back in every time you wish to make a change, you can simply run source ~/.bashrc
or . ~/.bashrc
and the changes will take effect immediately.
Variable typing in bash is implicit, and the need to declare a type is rare, but the declare
command can be used when necessary:
+
$ declare -i MYNUMBER # set type as an integer
+$ echo $MYNUMBER
+0
+$ declare -l MYWORD="LOWERCASE" # set type as lowercase
+$ echo $MYWORD
+lowercase
+$
+
help declare
at the command line for more information on types that can be declared.
+NREL HPC Github - User-contributed bash script and examples that you can use on HPC systems.
+BASH cheat sheet - A concise and extensive list of example commands, built-ins, control structures, and other useful bash scripting material.
+ + + + + + + + +"C++ is a general-purpose programming language providing a direct and efficient model of hardware combined with facilities for defining lightweight abstractions." + - Bjarne Stroustrup, "The C++ Programming Language, Fourth Edition"
+This section illustrates the process to compile and run a basic C++ program on the HPC systems.
+Begin by creating a source file named hello.cpp
with the following contents:
#include <iostream>
+
+int main(void) {
+ std::cout << "Hello, World!\n";
+ return 0;
+}
+
Next, we must select the compiler to use for compiling our program. We can choose among GNU, Intel, and Cray compilers, depending on the system that we are using (see Compilers and Toolchains). To see available modules and versions, use module avail
. For this example, we will use the g++
compiler, which is part of GNU's gcc
package. We will load the default version of the compiler, which in this case is gcc 10.1:
$ module load gcc
+$ module list
+Currently Loaded Modules:
+ 1) gcc/10.1.0
+$ gcc --version | head -1
+gcc (Spack GCC) 10.1.0
+
With the gcc
package, the C++ compiler is provided by the g++
command. To compile the program, run:
$ g++ hello.cpp -o hello
+
This creates an executable named hello
. Now run the program and observe the output:
$ ./hello
+Hello, World!
+
The following is a summary of available compilers and toolchains. User are encouraged to run module avail
to check for the most up-to-date information on a particular system.
Toolchain | +C++ Compiler | +Module | +Systems | +
---|---|---|---|
gcc | +g++ |
+gcc |
+All | +
Intel | +icpc |
+intel-oneapi-compilers |
+Swift, Vermilion, Kestrel | +
Intel | +icpc |
+comp-intel |
+Eagle | +
Cray | +CC |
+PrgEnv-cray |
+Kestrel | +
Note that Kestrel also provides the PrgEnv-intel
and PrgEnv-gnu
modules, which combine the Intel or gcc compilers together with Cray MPICH. Please refer to Kestrel Programming Environments Overview for details about the programming environments available on Kestrel.
For information specific to compiling MPI applications, refer to MPI.
+ + + + + + + + +Learn how to run the R statistical computing environment software.
+R is an open-source programming language designed for statistical computing and graphics. It is the current standard for the development of new statistical methodologies and enjoys a large user base.
+For more information related to the R project, see the R website.
+The supported method for using R on the HPC systems is via Anaconda. In order to access R, first load the anaconda module (on Kestrel, this is module load anaconda3
). Then, create a new conda environment that contains at least the r-base
package. Optionally, install the r-essentials
bundle, which provides many of the most popular R packages for data science.
For example, to create and activate a new environment named r_env
on Kestrel that includes the r-essentials
bundle:
module load anaconda3
+conda create -n r_env r-essentials r-base
+conda activate r_env
+
For more information about using R in the Anaconda framework, see Using R language with Anaconda.
+Note
+To avoid possible conflicts, remove any Intel compiler modules before loading R. One way to do this is via the following:
+$ module purge
+$ module load anaconda3
+
R on Eagle
+On Eagle, after loading the Anaconda module via module load conda
, R is
+available in the base conda environment. However, it is still possible to
+install R into a custom environment as outlined above.
R is most commonly used via an interactive shell. To do this, first request an interactive compute node (see running interactive jobs) using the srun
command. Alternatively, R can be used through Europa running Jupyterhub. For more details, see Jupyterhub.
Once on a compute node, R environments can be accessed through Anaconda as described above. To access the R interactive console, type R at the command line. You will be prompted with the familiar R console in your terminal window:
+$ R
+
+R version 4.0.5 (2021-03-31) -- "Shake and Throw"
+Copyright (C) 2021 The R Foundation for Statistical Computing
+Platform: x86_64-conda-linux-gnu (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under certain conditions.
+Type 'license()' or 'licence()' for distribution details.
+
+ Natural language support but running in an English locale
+
+R is a collaborative project with many contributors.
+Type 'contributors()' for more information and
+'citation()' on how to cite R or R packages in publications.
+
+Type 'demo()' for some demos, 'help()' for on-line help, or
+'help.start()' for an HTML browser interface to help.
+Type 'q()' to quit R.
+
Since running R programs line by line in the interactive console can be a little tedious, it is often better to combine R commands into a single script and have R execute them all at once. R scripts are text files containing R commands with file extension .R:
+hello_world.R
+message = "Hi there!"
+nums = sample(1:100, 5)
+cat(message, "\n")
+cat("Here are some random numbers: ", paste(nums, sep = ", "),"\n")
+
There are several options for running R scripts:
+The source() function will execute R scripts from inside the interactive console.
+> source("hello_world.R")
+ Hi there!
+ Here are some random numbers: 100 41 14 82 63
+
The Rscript command can be used to run R scripts from the command line. Output is piped to the stdout.
+$ Rscript hello_world.R
+Hi there!
+Here are some random numbers: 71 37 50 24 90
+
R CMD BATCH is an older function that behaves similar to Rscript. All output is piped to a corresponding .Rout file.
+$ R CMD BATCH --no-site-file hello_world.R
+$ cat hello_world.Rout
+
+> #hello_world.R
+>
+> message = "Hi there!"
+> nums = sample(1:100, 5)
+> cat(message, "\n")
+Hi there!
+> cat("Here are some random numbers: ", paste(nums, sep = ", "),"\n")
+Here are some random numbers: 41 51 61 70 43
+>
+> proc.time()
+ user system elapsed
+ 0.188 0.024 0.277
+
Another option for using R on the HPC systems is to submit batch jobs to be run on non-interactive nodes.
+An example job script for running the hello_world.R example is below (make sure to update your allocation name as well as the name of the conda environment where R has been installed):
+#! /bin/bash
+#SBATCH --job-name=helloworld
+#SBATCH --nodes=1
+#SBATCH --time=60
+#SBATCH --account=<your_allocation_id>
+
+module purge
+module load anaconda3
+conda activate <r_env>
+Rscript hello_world.R
+
R is a popular open-source language with an active development community. New versions of R are frequently released. Any version can be installed into a custom anaconda environment. Commands for using other versions is shown below:
+$ conda search r-essentials
+Loading channels: done
+# Name Version Build Channel
+r-essentials 1.0 r3.2.1_0 pkgs/r
+r-essentials 1.0 r3.2.1_0a pkgs/r
+r-essentials 1.1 r3.2.1_0 pkgs/r
+r-essentials 1.1 r3.2.1_0a pkgs/r
+r-essentials 1.1 r3.2.2_0 pkgs/r
+r-essentials 1.1 r3.2.2_0a pkgs/r
+r-essentials 1.1 r3.2.2_1 pkgs/r
+r-essentials 1.1 r3.2.2_1a pkgs/r
+r-essentials 1.4 0 pkgs/r
+r-essentials 1.4.1 r3.3.1_0 pkgs/r
+r-essentials 1.4.2 0 pkgs/r
+r-essentials 1.4.2 r3.3.1_0 pkgs/r
+r-essentials 1.4.3 r3.3.1_0 pkgs/r
+r-essentials 1.5.0 0 pkgs/r
+r-essentials 1.5.1 0 pkgs/r
+r-essentials 1.5.2 r3.3.2_0 pkgs/r
+r-essentials 1.5.2 r3.4.1_0 pkgs/r
+r-essentials 1.6.0 r3.4.1_0 pkgs/r
+r-essentials 1.7.0 r342hf65ed6a_0 pkgs/r
+r-essentials 3.4.3 mro343_0 pkgs/r
+r-essentials 3.4.3 r343_0 pkgs/r
+r-essentials 3.5.0 mro350_0 pkgs/r
+r-essentials 3.5.0 r350_0 pkgs/r
+r-essentials 3.5.1 mro351_0 pkgs/r
+r-essentials 3.5.1 r351_0 pkgs/r
+$ conda create -n otherr r-essentials==3.5.1
+<Text>
+$ . activate otherr
+(otherr) $ R --version
+R version 3.5.1 (2018-07-02) -- "Feather Spray"
+Copyright (C) 2018 The R Foundation for Statistical Computing
+Platform: x86_64-pc-linux-gnu (64-bit)
+
+R is free software and comes with ABSOLUTELY NO WARRANTY.
+You are welcome to redistribute it under the terms of the
+GNU General Public License versions 2 or 3.
+For more information about these matters see
+http://www.gnu.org/licenses/.
+
The install.packages()
command in R will download new packages from the CRAN source directory and install them for your account. If you are running R from within a custom Anaconda environment, they will be specific to that environment. In either case, these packages will not be visible to other users.
The command installed.packages()
in R list details about all packages that are loaded and visible to current R session.
Packages are loaded into the current R environment through the library()
function.
R is commonly used to produce high-quality graphics based on data. This capability is built-in and can be extended through the use of packages such as ggplot2. To produce graphics on the HPC systems, the easiest method is to output graphical displays to an appropriate filetype (pdf, jpeg, etc.). Then this file can be moved to your local machine using command line tools such as scp or rsync.
+library(ggplot2)
+set.seed(8675309)
+numbers = rnorm(200, sd = 2)
+more.numbers = rnorm(100, mean = 10, sd = 2)
+
+df = data.frame(values = c(numbers, more.numbers))
+
+p = ggplot(df, aes(x = values, y = ..density..)) +
+ geom_histogram(fill = "dodgerblue",
+ colour = "black",
+ alpha = .5,
+ binwidth = .5) +
+ geom_density(size = 1.5) +
+ labs(y = "Density", x = "Value",
+ title = "Histogram Example")
+
+png(file = "histogram_example.png")
+print(p)
+dev.off()
+
Programming in R on the HPC systems has two distinct advantages. First, running jobs on a remote system means you do not have to tie up your local machine. This can be particularly useful for jobs that take considerable time and resources to run. Secondly, the increased computational capabilities of the HPC system provide an opportunity to improve performance through parallel processing. R code, like many programming languages, is typically written and executed serially. This means that the added benefits of having multiple processing cores available are typically lost.
+A major goal of the R community in recent years has been the development of specialized libraries and programming paradigms to better leverage modern HPC systems. The CRAN task view for High Performance Computing and Parallel Programming contains a detailed list of packages that address various aspects of these problems. For more information, see CRAN Task View: High-Performance and Parallel Computing with R.
+Notable examples are:
+Each package includes in-depth documentation and examples for how to implement parallel processing in R code. Learning these packages does require a moderate amount of time, but for many large problems the improvements in computational efficiency dramatically outweighs the initial investment.
+Most of these packages will have to be installed in a custom environment as many dependencies are incompatible with the version of openmpi installed in conda.
+The pbdR project "enables high-level distributed data parallelism in R, so that it can easily utilize large HPC platforms with thousands of cores, making the R language scale to unparalleled heights." There are several packages within this project: pbdMPI for easy MPI work, pbdDMAT for distributed data matrices and associated functions, and pbdDEMO for a tutorial/vignette describing most of the project's details.
+The pbdMPI
package provides the MPI interface, which requires Open MPI. Note that Open MPI must be loaded prior to installing the package. For example, on Kestrel:
$ module load openmpi/4.1.5-gcc
+$ R
+> install.packages("pbdMPI")
+
The following script is a ranknode.R example using the pbdMPI package:
+library(pbdMPI, quiet = TRUE)
+init()
+.comm.size <- comm.size()
+.comm.rank <- comm.rank()
+.hostname <- Sys.info()["nodename"]
+msg <- sprintf("I am %d of %d on %s.\n", .comm.rank, .comm.size, .hostname)
+comm.cat(msg, all.rank = TRUE, quiet = TRUE)
+comm.cat(msg, rank.print = sample(0:.comm.size, size = 1))
+comm.cat(msg, rank.print = sample(0:.comm.size, size = 1), quiet = TRUE)
+finalize()
+
You could run this interactively from a compute node or by submitting it to the job scheduling using a shell script similar to the one given below. For example, you would submit using sbatch ranknode.sh from a login node provided you name the script appropriately:
+#!/bin/bash
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=24
+#SBATCH --time=5
+#SBATCH --account=<your_allocation_id>
+
+module purge
+module load anaconda3
+module load openmpi/4.1.5-gcc
+conda activate <r_env>
+
+INPUT_BASENAME=ranknode # JOB NAME - USER INPUT PARAMETER
+JOB_FILE=$INPUT_BASENAME.R
+OUT_FILE=$INPUT_BASENAME.Rout
+srun -n 48 Rscript $JOB_FILE > $OUT_FILE
+
In either case (interactive or queue submission), the output produced from the ranknode.R script should look like this:
+I am 0 of 48 on x1004c0s2b0n0.
+I am 1 of 48 on x1004c0s2b0n0.
+I am 2 of 48 on x1004c0s2b0n0.
+...
+I am 46 of 48 on x1004c0s2b0n1.
+I am 47 of 48 on x1004c0s2b0n1.
+I am 42 of 48 on x1004c0s2b0n1.
+I am 45 of 48 on x1004c0s2b0n1.
+
For questions on statistics, the R software environment itself, or advanced R package questions, please contact Lindy Williams.
+Additionally, NREL has an internal R Users Group that meets periodically to highlight interesting packages, problems, and share experiences related to R programming. For more details, contact Daniel Inman.
+Documentation: FFTW
+FFTW is a C library for computing discrete Fourier transforms of arbitrary input sizes and dimensions. It is optimized for speed and can perform discrete Fourier transforms up to several orders of magnitude faster than other commonly available Fourier transform libraries. FFTW supports both single-precision and double-precision transforms, as well as multithreading for parallel execution on shared-memory systems.
+ + + + + + + + +Documentation: HDF5
+HDF5 is a versatile data storage and management library designed for storing and exchanging large and complex data collections. It provides a powerful and flexible data model for representing and organizing data, as well as a variety of high-level programming interfaces for accessing and manipulating data. HDF5 supports a wide range of data types and can handle data sets of virtually unlimited size.
+HDF5 supports both parallel and serial file I/O, achieving high performance with both.
+ + + + + + + + +This page is a tutorial explaining how to include scientific libraries when compiling software.
+There are a few common scientific libraries: LAPACK, BLAS, BLACS, scaLAPACK, FFTW, HDF5, and others. These libraries are generally highly optimized, and many scientific programs favor use of these libraries over in-house implementations of similar functionality. See our libraries overview page for more information.
+Scientific libraries can be packaged together, like in the Intel Math Kernel Library (MKL), or Cray’s LibSci. They can also be built completely separately and act as standalone libraries. These libraries can be built with different MPI implementations and compiler choices.
+If you’re building a code that relies on one or more of these libraries, you can choose how to include these libraries. By the end of this tutorial, how to include these libraries should be clearer. If you need help building a particular package on an NREL machine, please contact HPC help.
+Build tools like make, autoconf, and cmake are convenient ways to automate the compilation of a code. If you’re building a package, you may need to modify/customize how the code compiles, e.g., so it finds and includes the libraries you want. This may involve directly modifying the makefile, modifying the make.include (or make.inc, makefile.include, etc.) file, or using tools like autoconf or CMake to configure the makefile.
+Modifying a makefile (or make.include, etc.) so it compiles using the scientific libraries you want can be a daunting process. We’ll go through a prototypical example and show how different libraries can be included in the build of a program. To do this, we’ll use a makefile.include file for the electronic structure program VASP.
+Note
+We provide a walkthrough of linking scientific libraries using the VASP code as an example. This walkthrough tries to demonstrate key features of the general process of including scientific libraries in a build. We note that the exact build and modification process will vary between codes. Consulting the documentation of the code you’re trying to build is always the best place to start.
+We’ll use the VASP makefile.include file as our walkthrough example. We can find a number of VASP makefile.include files here. We’ll be looking specifically at this file.
+We’ll take a look at building with Intel MKL and the HDF5 package.
+We want to build with MKL and HDF5. If we look at the VASP documentation, we see that LAPACK, scaLAPACK, BLAS, and FFTW are required. MKL covers all of these needs. Thus, we need to tell the makefile where to look for MKL.
+We need our MKL to be built with the same compilers and MPI implementation as we’re building VASP with. Let’s see what sorts of MKL builds are available to us. Using the following command to show what builds of mkl are available as a module:
+module avail 2>&1 | grep mkl
Yields the output:
+intel-oneapi-mkl/2023.0.0-intel ucx/1.13.0
Thus, if we want to use the toolchains managed by NREL, we must use the Intel oneapi toolchain in our VASP build, since intel-oneapi-mkl/2023.0.0-intel
is the only available mkl module. If you want to use a different toolchain, you could build MKL yourself, but that’s outside the scope of this article.
To “use the Intel oneapi toolchain” means to use Intel compilers and Intel’s implementation of MPI to compile VASP. We’re doing this because mkl was built with this toolchain, and we want our toolchains to match as best as possible to minimize build errors and bugs.
+Let’s prepare our environment to use this toolchain. First,
+module purge
To clear your environment. Now, we want the Intel oneapi mkl module, the Intel fortran compiler (ifort), and the Intel MPI fortran compiler (mpiifort). Type:
+module avail 2>&1 | grep oneapi
to see which modules are related to the intel-oneapi toolchain. We can locate the three we want:
+module load intel-oneapi-mkl/2023.0.0-intel
+module load intel-oneapi-mpi/2021.8.0-intel
+module load intel-oneapi/2022.1.0
+
How do we know these are the ones we want? The first line loads the mkl module. The second line gives us mpiifort, the Intel MPI fortran compiler, and the third line gives us ifort, the Intel Fortran compiler. (test the latter two with which mpiifort
and which ifort
-- you’ll see that they’re now in your path. If you module purge
and try which mpiifort
again, you’ll see you’re not able to find mpiifort anymore.)
Now that we have the toolchain loaded into our environment, let’s take a look at the actual makefile.include file (link to file here). There are two important sections for the purpose of getting the code to build. The first:
+CPP = fpp -f_com=no -free -w0 $*$(FUFFIX) $*$(SUFFIX) $(CPP_OPTIONS)
+FC = mpiifort -qopenmp
+FCL = mpiifort
+
The first line says that the compiler pre-processor will be fpp (try which fpp
and you should get an output /sfs/nopt/nrel/apps/compilers/01-23/spack/opt/spack/linux-rhel8-icelake/gcc-8.4.0/intel-oneapi-compilers-2022.1.0-wosfexnwo5ag3gyfoco2w6upcew5yj6f/compiler/2022.1.0/linux/bin/intel64/fpp
, confirming that we’re pulling fpp from intel-oneapi).
The second and third lines say that we’ll be using Intel’s MPI (Try which mpiifort
to confirm that it is in your path). FC is the “Fortran Compiler” and FCL is the corresponding linker. Line 14 additionally says we’ll be compiling with openmp. Different compilers have different executable names (e.g. mpiifort for Intel MPI fortran compiler, mpifort for GNU). See the Fortran documentation page for a complete list.
The next important section is given below:
+# Intel MKL (FFTW, BLAS, LAPACK, and scaLAPACK)
+# (Note: for Intel Parallel Studio's MKL use -mkl instead of -qmkl)
+FCL += -qmkl
+MKLROOT ?= /path/to/your/mkl/installation
+LLIBS += -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
+INCS =-I$(MKLROOT)/include/fftw
+
This makefile.include file has been provided to us by VASP. Our job here is two-fold:
+MKLROOT
in the makefile.include file is set correctly.MKLROOT
.To do step 1, first type:
+module list
To see the modules you’ve loaded into your environment. You should have intel-oneapi-mkl/2023.0.0-intel
in the list. If not, review the environment preparation section. Now, we use the module show
command to find the root directory of mkl:
module show intel-oneapi-mkl/2023.0.0-intel
We see in the output of this command the following line:
+setenv MKLROOT /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0
If we type echo $MKLROOT
, we can confirm that this environment variable is properly set from when we ran the command module load intel-oneapi-mkl/2023.0.0-intel
. In the VASP makefile, we have MKLROOT ?= /path/to/your/mkl/installation
. The ?= means that this variable will not be set if MKLROOT
has already been set. So, we can ignore this line if we’d like. However, to be safe, we should simply copy the path of the MKL root directory to this line in makefile.include, so that this line now reads:
MKLROOT ?= /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0
Tip
+The name of the environment variable for mkl’s root directory set by its module (MKLROOT
, set when we module load intel-oneapi-mkl/2023.0.0-intel
) is not necessarily going to match the corresponding root directory variable in a given makefile. It did in this instance, but that’s not guaranteed. The VASP makefile.include could have just as easily used MKL_ROOT
, instead of MKLROOT
. This is one reason why it’s safer to use module show
to find the path of the root directory, then copy this path into the makefile, rather than rely on environment variables.
To do step 2, we should first look at the contents of $MKLROOT
. To show the contents of the MKL directory, type
ls /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0
We should obtain the following output:
+benchmarks bin env examples include interfaces lib licensing modulefiles tools
If we look closely at the makefile, we see beneath the MKLROOT
line the following:
+
MKLROOT ?= /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0
+LLIBS += -L$(MKLROOT)/lib/intel64 -lmkl_scalapack_lp64 -lmkl_blacs_intelmpi_lp64
+
the LLIBS
line is telling make which libraries in particular to pick out.
So, we want to go into the lib directory, and then the intel64 directory (since LLIBS is pointing to $MKLROOT/lib/intel64
). Let's see what's inside with the ls
command:
ls /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/intel-oneapi-mkl-2023.0.0-gnkrgwyxskxitvptyoubqaxlhh2v2re2/mkl/2023.0.0/lib/intel64
There's a lot of stuff in this directory! VASP helps us by telling us we need the mkl_scalapack_lp64
and mkl_blacs_openmpi_lp64
builds specifically. You won't always be told exactly which libraries, and figuring this out, if the information is not provided to you in the package documentation, can require some tinkering.
In general, the .a
extension is for static linking, and the .so
extension is for dynamic linking. For MKL in particular, the part ilp64
vs lp64
refer to two different interfaces to the MKL library.
Tip
+Notice that, inside $MKLROOT/lib/intel64
, the filenames all start with libmkl
, but in our makefile, we reference lmkl_scalapack_lp64
. That's not a file in $MKLROOT/lib/intel64
, but libmkl_scalapack_lp64.so
is. The notation is that "big L" references the directories that the libraries are in, and the "little l" references the particular libraries. For example:
+
LLIBS += -L$(MKLROOT)/lib/intel64+
-lmkl_scalapack_lp64This is just a convention, but is important to get right because your compile will fail otherwise. +
Now that we have the correct MKLROOT
set in the makefile.include, and we have an idea about how it's referencing the libraries within, we can move on to linking the HDF5 library.
Because HDF5 is an optional library, we could compile the code now if we wanted to. However, for the sake of practice, let’s uncomment the block in the makefile.include file related to HDF5 and repeat the exercise of linking a library:
+# HDF5-support (optional but strongly recommended)
+CPP_OPTIONS+= -DVASP_HDF5
+HDF5_ROOT ?= /path/to/your/hdf5/installation
+LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran
+INCS += -I$(HDF5_ROOT)/include
+
Our job, again, is to give the makefile the correct directions to our library. In this case, it’s HDF5. Let’s see which HDF5 modules are available:
+module avail hdf5
Returns
+hdf5/1.12.2-intel-oneapi-mpi-intel hdf5/1.12.2-openmpi-gcc
So, we see that HDF5 has been built with the intel-oneapi-mpi toolchain, and also with the GCC/openmpi toolchain. Since we’re building vasp using the intel-oneapi toolchain, we need to load the corresponding module:
+module load hdf5/1.12.2-intel-oneapi-mpi-intel
Again, we must locate the root directory:
+module show hdf5/1.12.2-intel-oneapi-mpi-intel
We see the line for setting the HDF5 root directory environment variable:
+setenv HDF5_ROOT_DIR /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/hdf5-1.12.2-dzgeixsm2cd3mupx4ti77ozeh7rh6zdo
Like before, we copy this path into our makefile.include:
+# HDF5-support (optional but strongly recommended)
+CPP_OPTIONS+= -DVASP_HDF5
+HDF5_ROOT ?= /sfs/nopt/nrel/apps/libraries/01-23/spack/opt/spack/linux-rhel8-icelake/intel-2021.6.0/hdf5-1.12.2-dzgeixsm2cd3mupx4ti77ozeh7rh6zdo
+LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran
+INCS += -I$(HDF5_ROOT)/include
+
We’re ready to compile! In the case of VASP, the compile command is make DEPS=1 std
but in general, the command may be make all
or similar (consult the documentation of the code you’re trying to build).
If you’re working with a code that has a testsuite, now is a good time to run the testsuite to make sure that your compile was successful.
+ldd
command (e.g., ldd path/to/executable/executable
), which will show you the dynamically linked libraries of the executable.module load
the necessary modules to prepare your environment. (See environment preparation step of VASP example) module show
command If you’re still stuck and unable to successfully link the scientific libraries you need, get in contact with HPC help.
+ + + + + + + + +HSL (Harwell Subroutine Library) for Ipopt are a set of linear solvers that can greatly accelerate the speed of the optimization over the default MUMPS solver.
+Go to the HSL for Ipopt site and follow the instructions to request the source code for all the available solvers. Note that the solver MA27 is free to obtain, but MA27 is a serial solver. Other solvers will require a license. Please request a license that applies to your use case.
+The default version of Ipopt distributed with Ipopt.jl
on Linux links to the OpenBLAS library. This causes issues when linking the HSL library to the MKL libraries. For this reason, to use HSL linear solvers with Ipopt on Eagle, either we must compile Ipopt from scratch or compile HSL with OpenBLAS instead of MKL. For performance reasons, we have elected to compile Ipopt from scratch so that we can use the MKL libraries.
The following provides detailed instructions for compiling Ipopt with HSL and Mumps on Eagle.
+Metis helps the HSL solvers perform better. Therefore, it is recommended that you also install or build the Metis library. If you do want to install Metis, it must be done before compiling the HSL library.
+On Eagle, the easiest way to install Metis is to use anaconda:
+module load conda
+conda create -n <conda_environment>
+conda activate <conda_environment>
+conda install -c conda-forge metis
+
Info
+module load conda
loads the default anaconda module. You may use a different conda module based on your needs.
Note
+Anaconda packages sometimes have issues when they come from different channels. We tend to pull everything from conda-forge
hence the channel choice above.
pkg-config is a helper tool for specifying compiler options while building your code. It is available by default on Eagle.
+We will be using the GNU compiler suite (gcc
and gfortran
). These can be accessed on Eagle by loading the appropriate module. This should work with any version of the GNU compilers. We use version 8.4.0 here. These can be loaded by typing module load gcc/8.4.0
.
We will make use of the following environment variables.
+# Location of metis.h
+export METIS_HEADER=${HOME}/.conda-envs/<conda_environment>/include
+# Location of metis library
+export METIS_LIBRARY=${HOME}/.conda-envs/<conda_environment>/lib
+
+# Directory for keeping source code and build products
+export MYAPPS=${HOME}/apps
+# Location of header files
+export MYINC=${MYAPPS}/include
+# Location of static and dynamic libraries
+export MYLIB=${MYAPPS}/lib
+
These can be added to the .bash_profile file (or equivalent for other shells). Remember after adding these to source .bash_profile
(or equivalent) or to open a new terminal and do all building there.
+To make the Metis header and dynamic library easily accessible to the HSL, MUMPS and Ipopt libraries, we will put symbolic links in the ${MYINC}
and ${MYLIB}
directories. Do this by doing the following:
cd ${MYINC}
+ln -s ${METIS_HEADER}/metis.h
+cd ${MYLIB}
+ln -s ${METIS_LIBRARY}/libmetis.so
+
This has a couple of advantages. First, the coinbrew
build will automatically add the ${MYLIB}
directory to the rpath of all constructed libraries and executables. This means that we don't need to add ${MYLIB}
to the LD_LIBRARY_PATH. The other advantage is that anaconda puts all the environments libraries and include files in the same directories with libmetis.so
and metis.h
. Many of these libraries overlap with those used by HSL, Mumps and Ipopt but are not necessarily the same versions. Loading a different version of a library than those compiled against can cause unexpected behavior.
Follow the Ipopt installation instructions here to finish the installation of HSL solvers on Eagle.
+The following installation has been tested on Apple's M1 ARM based processors.
+We will use Homebrew and ThirdParty-HSL to install HSL libraries (and IPOPT). As per the default IPOPT installation instructions, we will rely on GNU compilers for the installation. Run the following commands
+# Update homebrew and download packages
+brew update
+brew install bash gcc metis pkg-config
+# Create a directory of your choice to install HSL
+mkdir -p {$HOME}/UserApps/IPOPT/HSL/hsl_install
+cd {$HOME}/UserApps
+# Clone ThirdParty-HSL
+git clone git@github.com:coin-or-tools/ThirdParty-HSL.git
+cd ThirdParty-HSL
+# Place the HSL source code here
+cp -r ${HSL_SOURCE_CODE_LOCATION} coinhsl
+
Assuming that you allow Homebrew to install to its default locations, we will declare the following environment variables
+# Location of metis.h
+export METIS_HEADER=/opt/homebrew/Cellar/metis/5.1.0/include
+# Location of metis library
+export METIS_LIBRARY=/opt/homebrew/Cellar/metis/5.1.0/lib
+# Directory for keeping source code and build products
+mkdir -p {$HOME}/UserApps/IPOPT/HSL/hsl_install
+export MYAPPS={$HOME}/UserApps/IPOPT/HSL/hsl_install
+# Location of static and dynamic libraries
+mkdir -p ${MYAPPS}/lib
+export MYLIB=${MYAPPS}/lib
+
Go to the requisite directory and run the following commands
+cd {$HOME}/UserApps/ThirdParty-HSL/
+mkdir build && cd build
+../configure F77=gfortran-12 FC=gfortran-12 CC=gcc-12 --prefix="${MYAPPS}" \
+--with-metis --with-metis-lflags="-L${METIS_LIBRARY} -lmetis" \
+--with-metis-cflags="-I${METIS_HEADER}"
+make && make install
+
This should install the HSL libraries in ${MYAPPS}
. Finally add MYLIB
to your DYLD_LIBRARY_PATH
. You can append the following line to your .bash_profile
to make it permanent or call it every time you need to run Ipopt with HSL solvers.
export export DYLD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}:${MYLIB}/UserApps/IPOPT/HSL/hsl_install/lib
+
Ipopt has a feature called the linear solver loader (read about it here). This allows for loading linear solvers from a dynamic library at run time. We will use this feature to use the HSL solvers.
+The only thing you have to do is to make the HSL dynamic library findable. This is done by adding the directory containing the HSL library to the environment variable DYLD_LIBRARY_PATH
in MacOS and LD_LIBRARY_PATH
on Linux-based systems. See above for MacOS and here for NREL systems. To use the new linear solvers just use the linear_solver="<solver>"
argument to Ipopt.Optimizer
.
Info
+The Ipopt build that comes with Ipopt.jl
seems to expect the HSL library to have the name libhsl.dylib
on MacOS. The repo ThirdParty-HSL builds the library libcoinhsl.dylib
. The simplest fix is to do the following:
cd ${MYLIB}
+# Create a symbolic link called libhsl.dylib
+ln -s libcoinhsl.dylib libhsl.dylib
+
The following Julia code is useful for testing the HSL linear solvers are working
+using JuMP, Ipopt
+
+m = JuMP.Model(()->Ipopt.Optimizer(linear_solver="ma97"))
+@variable(m, x)
+@objective(m, Min, x^2)
+JuMP.optimize!(m)
+
Scientific math libraries are a collection of highly optimized software tools that provide functions and algorithms for performing mathematical operations commonly used in scientific applications. They provide developers with a variety of tools for solving complex problems. These libraries are highly optimized for performance and generally designed to be portable across different platforms and operating systems.
+We support some of the most widely used scientific math libraries including:
+For details on how to build an application with scientific libraries, see our how-to guide
+For more information on a given scientific library, see our individual library pages under our "Libraries" drop-down menu.
+ + + + + + + + +Ipopt (Interior Point OPTimizer, pronounced "Eye-Pea-Opt") is an open-source non-linear optimizer using the interior point method.
+Ipopt is commonly used in solving power flow, e.g., AC Optimal Power Flow, and controls problems. Please refer to their project website for the source code. The documentation can be found here.
+The default installation instructions can be found in the Ipopt documentation here. The remainder of the page describes what has worked for NREL HPC users.
+We will use COIN-OR's coinbrew repo to build Ipopt along with the dependencies ASL, HSL and Mumps libraries.
+Note
+Follow the instructions to setup the environment for HSL before proceeding with the steps below.
+module load gcc/8.4.0 mkl
coinbrew
script before using it: chmod u+x coinbrew/coinbrew
cd
into the directory./coinbrew fetch Ipopt@stable/3.13
stable/3.13
of the Ipopt repository as well as the dependencies COIN-OR repositories ThirdParty-ASL
, ThirdParty-HSL
and ThirdParty-Mumps
(other versions of Ipopt can also be downloaded in this manner)cd ThirdParty/HSL
coinhsl
that points to the HSL source code (or rename the directory)cd ../..
Configure and build everything:
+./coinbrew build Ipopt --disable-java --prefix="${MYAPPS}" --with-metis-cflags="-I${MYINC}" --with-metis-lflags="-L${MYLIB} -lmetis" --with-lapack-lflags="-L${MKLROOT}/lib/intel64 -Wl,--no-as-needed -lmkl_rt -lpthread -lm -ldl" --with-lapack-cflags="-m64 -I${MKLROOT}/include" ADD_CFLAGS="-march=skylake-avx512" ADD_FCFLAGS="-march=skylake-avx512" ADD_FFLAGS="-march=skylake-avx512"
+
build Ipopt
tells coinbrew
to configure and build Ipopt and its dependencies--disable-java
says to build Ipopt without the java interface--prefix
says to install the library in "${MYAPPS}"--with-metis-cflags
gives the compiler the location of the metis header "metis.h"--with-metis-lflags
gives the linker the location and name of the metis library--with-lapack-lflags
gives the location of LAPACK and BLAS libraries as well as the needed linker lines. Here we are using Intel's single dynamic library interface (google "mkl single dynamic library" for more details on this).ADD_CFLAGS
, ADD_FCFLAGS
and ADD_FFLAGS
say to use those extra flags when compiling C and fortran code, respectively. Specifically, -march=skylake-avx512
tells the compiler to optimize code for the skylake CPUs on Eagle which is recommended for perfomance reasons.Tip
+When linking with MKL libraries, Intel's link line advisor is extremely helpful.
+Note
+When compiling Julia with MKL libraries, the single dynamic library interface is used to link against. This is why we are also using that linking method. Using a different linking method will cause unusual behaviors when using Ipopt with Julia (e.g. through JuMP).
+Note
+When running your custom Ipopt build on Eagle, you will need to do two things:
+module load mkl
+
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${MYLIB}
+
To use our custom installation of Ipopt with Ipopt.jl
, we do the following:
Ipopt.jl
the location of our Ipopt library and executable
+ ENV["JULIA_IPOPT_LIBRARY_PATH"] = ENV["MYLIB"]
+ENV["JULIA_IPOPT_EXECUTABLE_PATH"] = ENV["MYBIN"]
+
Ipopt.jl
with the above environment variables set to pick up the new library and executable
+ using Pkg; Pkg.build("Ipopt");
+
Ipopt.jl
has stored for libipopt.so
. This should be the location of your compiled version.
+ using Ipopt; println(Ipopt.libipopt_path)
+
Documentation: LAPACK, scaLAPACK
+LAPACK is a highly optimized library of linear algebra routines written in Fortran 90. These routines include matrix multiplication, factorization (LU, Cholesky, QR, etc.) least squares solutions of linear systems, eigenvalue problems, and many others. LAPACK routines are available in both single and double precision, and for complex and real numbers.
+LAPACK depends on BLAS (Basic Linear Algebra Subprograms).
+ScaLAPACK is a parallel-distributed version of LAPACK (i.e., scalaPACK is MPI-parallel)
+Both LAPACK and ScaLAPACK are available as either standalone libraries (netlib-lapack
), or as part of the "package-of-packages" libraries MKL and LibSci.
Documentation: LibSci
+LibSci is a collection of numerical libraries developed by Cray for scientific and engineering computing. LibSci is optimized for performance on Cray architectures, including multi-core processors, and supports both single-precision and double-precision arithmetic. It also includes multithreading support for parallel execution on shared-memory systems. Like MKL, LibSci includes the following math functions:
+And additionally, libraries that are unique to Cray systems including:
+Documentation: MKL
+MKL includes a wealth of routines to accelerate technical application performance on modern multicore architectures. The library is designed to take full advantage of the latest Intel processors, including multi-core processors, and can significantly improve the performance of numerical applications. Core math functions include:
+Note
+If you are mixing an Anaconda environment with modules to build, always activate the conda environment before loading any library modules like MKL. cmake discovery, for example, is very sensitive to the order in which these actions are taken.
+With the Intel toolchain, linking against MKL is as simple as adding -mkl
to the link command. This by default links in the threaded MKL routines. To limit to strictly sequential (i.e., not threaded) routines, use -mkl=sequential
; to enable multi-process Scalapack routines, use -mkl=cluster
.
+To link MKL with GCC, the mkl
module includes some convenience environment variables defined as the appropriate LDFLAGS
setting. See the module show mkl
output; the variable naming is intended to be self-explanatory.
If you have needs not covered by these, use Intel's interactive MKL Link Line Advisor website to discover the appropriate linking options. Don't use mkl_link_tool in your build automation, as Intel only provides a 32-bit version of this tool which will cause builds to fail.
+MKL will provide optimized library code based on the most advanced instruction set able to run on discovered hardware. So for floating point math, although GNU and Intel compilers will generate application code with SSE 4.2 instructions by default, MKL libraries will use AVX-512 float point instructions available on Skylake processors.
+As the code executes, rapid transition between different such floating point instruction sets may cause a significant performance penalty. Consider compiling the base code optimized for AVX instructions, i.e., adding -xcore-AVX512
for Intel and -march=skylake-avx512
for GNU.
Using -mkl
by default generates the code to use multithreaded MKL routines. There is an extra initialization overhead associated with using multithreaded MKL. With the smaller problem size or with sparse vectors it may be more beneficial from the performance standpoint to use sequential MKL routines ( -mkl=sequential
).
Documentation: PETSc
+PETSc is a suite of data structures and routines for the scalable (parallel) solution of scientific applications modeled by partial differential equations.
+On Kestrel, PETSc is provided under multiple toolchains
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ petsc:
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Versions:
+ petsc/3.14.6-cray-mpich-intel
+ petsc/3.19.3-intel-oneapi-mpi-intel
+ petsc/3.19.3-openmpi-gcc
+
petsc/3.14.6-cray-mpich-intel
is a PETSc installation that uses HPE provided PrgEnv-intel
.
+Therefore, the MPI used here is cray-mpich and the compiler is intel/2023.
petsc/3.19.3-intel-oneapi-mpi-intel
is a PETSc installation that uses intel-oneapi-compilers and intel-oneapi-mpi for the compilers and MPI, respectively.
petsc/3.19.3-openmpi-gcc
is a PETSc installation that uses gcc/10.1.0 and openmpi/4.1.5-gcc for the compilers and MPI, respectively.
Intel Parallel Studio is a set of tools that enable developing and optimizing software for the latest processor architectures.
+Some of the tools available as part of the Intel Parallel Studio include:
+Intel VTune Amplifier XE is a performance profiler for C, C++, C#, Fortran, Assembly and Java code. Hot spots analysis provides a sorted list of functions that use a lot of CPU time. Other features enable the user to quickly find common causes of slow performance in parallel programs, including waiting too long at locks and load imbalance among threads and processes. VTune Amplifier XE uses the Performance Monitoring Unit (PMU) on Intel processors to collect data with very low overhead.
+The recommended way to use this tool is to run the profiler from the command line and view the data using the GUI or generate a text report from the command line.
+You can list all the available profiling options for the machine you're profiling on, from the GUI or from the command line using amplxe-cl -collect-list
.
Include the following in you batch script to get a HPC-characterization profile of you application:
+#!/bin/bash --login
+#SBATCH -J <job name>
+#SBATCH -N <nodes>
+#SBATCH -t 00:30:00
+#SBATCH -A <Allocation handle>
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+# profile the executable
+amplxe-cl --collect hpc-performance ./executable.exe
+
GUI:
+amplxe-gui
Intel Trace Analyzer and Collector is a tool for understanding the behavior of MPI applications. Use this tool to visualize and understand MPI parallel application behavior, evaluate load balancing, learn more about communication patterns, and identify communication hot spots.
+The recommended way to use this tool is to collect data from the command line and view the data using the GUI.
+Example batch script to collect MPI communication data:
+#!/bin/bash --login
+#SBATCH -J <job name>
+#SBATCH -q <queue>
+#SBATCH -N <nodes>
+#SBATCH -t 00:30:00
+#SBATCH -A <Allocation handle>
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio and load the Intel MPI module
+module load intel-mpi
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# to profile the executable, just append '-trace' to mpirun
+mpirun -trace -n 4 ./executable.exe
+# this generates a .stf file that can viewed using the GUI
+
GUI:
+traceanalyzer
Intel Advisor helps with vectorization and threading in your C++ and Fortran Applications. This tool helps identify areas that would benefit the most from vectorization. It also helps with identifying what is blocking vectorization and gives insights to overcome it:
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+
You can list all the available profiling options for the machine you're profiling on, from the GUI or from the command line using:
+advixe-cl -collect-list
This tool has a lot of features that can be accessed from the GUI:
+advixe-gui
Intel Inspector XE is an easy to use memory checker and thread checker for serial and parallel applications written in C, C++, C#, F#, and Fortran. It takes you to the source locations of threading and memory errors and provides a call stack to help you determine how you got there. This tool has a GUI and a command line interface.
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# set your tmpdir, and don't forget to clean it after your job
+# completes.
+export TMPDIR=/scratch/$USER/tmp
+
You can list all the available profiling options for the machine you're running this tool on, from the GUI or from the command line using:
+inspxe-cl -collect-list
This tool has a lot of features that can be accessed from the GUI:
+inspxe-gui
The new Application Performance Snapshot merges the earlier MPI Performance Snapshot and Application Performance Snapshot Tech Preview. MPI Performance Snapshot is no longer available separately, but all of its capabilities and more are available in the new combined snapshot. This tool lets you take a quick look at your application's performance to see if it is well optimized for modern hardware. It also includes recommendations for further analysis if you need more in-depth information.
+Using This Tool:
+# load application specific modules
+module load comp-intel
+# Setup the environment to use parallel studio
+. /nopt/nrel/apps/compilers/intel/2019.5/parallel_studio_xe_2019/psxevars.sh
+
+# serial/SMP executable
+$ aps <executable> # this generates an aps result directory
+# DMP executable
+$ mpirun -n 4 aps <executable>
+# this generates an aps result directory # to gerate text and /hmtl result files:
+$ aps --report=<the generated results directory from the previous step>
+# the result file can be viewed in a browser or text editor
+
Before you begin, please make sure that your application is compiled with the debug flag (-g), to enable profiling and debugging.
+When using the suite of tools from Intel Parallel Studio on Eagle, we recommend that you set your TMPDIR
to point to a location in your SCRATCH
directory:
export TMPDIR=/scratch/$USER/tmp
Important:
+Please make sure that you clean up this directory after your job completes.
+Documentation: Linaro Forge Documentation Page
+Linaro MAP (Memory Access Profiler) is a tool that provides insight into how memory is being accessed by an application. It can help developers understand the memory access patterns of an application and identify performance issues caused by memory bottlenecks. Linaro MAP can profile code running on multiple cores as well as code running on a system with hardware accelerators, such as GPUs, allowing developers to identify memory access patterns specific to these accelerators. The profiling data generated by Linaro MAP can be visualized in a variety of ways, including call graphs, heat maps, and histograms, making it easy to identify patterns and potential bottlenecks. The tool can also generate reports that provide a summary of memory usage and access patterns, as well as recommendations for optimizing memory usage. Here we will go through some of the information you can obtain with Linaro MAP using VASP as an example and in the next section MAP we show how to start up such a MAP profile. If you need help with profiling your programs, reach out to HPC help and we can work with you.
+Here is some profiling information obtained for VASP.
+Across the top we see our metrics data for the default metrics: main thread activity, percent time each rank spends on floating-point instructions, and memory usage. The horizontal axis is wall clock time. The colors represent the following:
+Across the bottom we have different view tabs. The I/O view displays your program I/O. The Project Files view allows you to navigate through your code base. The Functions view shows a flat profile of the functions in your program. The Stacks view allows you to follow down from the main function to see which code paths took the most time. Each line of the Stacks view shows the performance of one line of your source code, including all the functions called by that line.
+You can select different metrics to view from the metrics menu:
+ +As well as zoom in on specific times in your program run.
+ +By clicking on the functions in the “Main Thread Stacks,” the profiler will take you to those calls in your code. Here we see that the call to the Davidson algorithm takes 68.6% of the program time.
+ +Digging in further, we can find that most of the time is spent in the CALLMPI function, and the activity shows as blue indicating this MPI communication and wait time.
+ +See the next section MAP for how to obtain these.
+ + + + + + + + +Linaro-MAP can show you how much time was spent on each line of code. To see the source code in MAP, you must use a version of your code that is compiled with the debug flag. For most compilers, this is -g
. Note: You should not just use a debug build but should keep optimization flags -O0
turned on when profiling.
For more information, see the Linaro Forge Documentation on getting started with MAP. In particular, if your program uses statically linked libraries, the MAP profiler libraries will not be automatically linked and you will need to do so yourself.
+Note
+Ensure that your program is working before trying to run it in MAP
+There are two options for how to run MAP. The first method is to use the remote client (recommended to reduce latencies from X forwarding the display.). The second method is to use FastX. Both are described here.
+Download the remote client from the Linaroforge Website Select the client for your platform (Mac/Windows/Linux) and ensure the client version number matches the version number of the Linaro suite you are using. You can see all the versions of linaro-forge available using:
+$ module avail linaro-forge
Once you have the client installed, you will need to configure it to connect to the host:
+$ module show linaro-forge/##.#.#
to get the path, do not include "/lib..." in the path)Once the remote client is correctly set up, start a terminal and connect to the desired HPC system.
+$ ssh USER@$HOST.hpc.nrel.gov
Continue to the profiling section
+To run MAP with FastX, follow instructions to download and install the desktop client and connect to a host on the FastX page.
+Once you have FastX installed and an appropriate build of your program to profile, start an xterm window from within FastX connected to an HPC host (We recommend using DAV nodes if available on your system). Then continue to the profiling section
+Once you have an appropriate build of your program to profile and either the Linaro Forge Client or FastX installed, you can obtain profiling data through map with the following steps. We will profile VASP as an example.
+$ salloc --nodes=<N> --time=<time> --account=<handle>
$ module load linaro-forge
$ module load mkl intel-mpi #for VASP
Start a map session using the command map --connect
if you are using the desktop client or simply map
if you are using FastX.
+ Optionally, navigate to your working directory and give map the path to your exe
+$ cd PATH/TO/YOUR/WORKING/DIRECTORY
+$ map --connect PATH/TO/YOUR/PROGRAM/exe
(remove --connect if using FastX)
+ If using the remote client, it will send a Reverse Connection request. Click 'Accept'.
You should now see the linaro forge GUI appear and a submission box with some information filled out if you followed the optional directions. Otherwise use the GUI to input them now. +Make sure the path to the application includes your program exe. +Make sure your working directory includes your input files, or specify your stdin file and its path. +Adjust other parameters as needed for profiling.
+ +Start your profile by clicking “Run”
+You should now see the profiling data we described in the previous section MAP. Please refer to that page as well as the Linaro Forge Documentation for more details on what you can learn from such profiles.
+ + + + + + + + + +Documentation: Linaro Performance Reports
+Linaro Performance Reports is a low-overhead tool that produces one-page text and HTML reports summarizing and characterizing both scalar and MPI application performance. (Only ~5% application slowdown even with thousands of MPI processes.) These high-level reports can help answer:
+Here we show the information you can obtain with Linaro Performance reports using VASP as an example. In the next section, we will detail how to obtain these reports. If you need help with profiling your programs, reach out to HPC help and we can work with you.
+Here is the header of performance report obtained for a VASP run on 1 node with 36 processes:
+ +This shows time spent running application code, sending MPI calls, and time on I/O. In this case, we see that we are MPI-bound, which makes sense given that we are running a small, simple test case on more MPI tasks than necessary, which creates unnecssary MPI communication overhead.
+The rest of the report shows a further breakdown of each of these categories:
+ +All you need to do is load the module and prefix your execution command with perf-report
:
$ salloc --nodes=<N> --time=<time> --account=<handle>
$ module load linaro-forge
$ module load mkl intel-mpi #for VASP
perf-report
:$ perf-report srun -n 36 PATH/TO/YOUR/PROGRAM/exe
This will generate an .html file and a .txt file that you can view in a browser or text editor. You should now see the overview we described in the previous section.
+ + + + + + + + +Documentation: Cray-MPICH
+Cray's MPICH is a high performance and widely portable implementation of the Message Passing Interface (MPI) standard.
+Note Cray-MPICH is only available on Kestrel.
+In order to use Cray-MPICH, it is recommended to use the HPE Cray complier wrappers cc
, CC
and ftn
.
+The wrappers will find the necessary MPI headers and libraries as well as scientific libraries provided by LibSci.
Depending on the compiler of choice, we can load a different instance of Cray-MPICH.
+For example, if we decide to use PrgEnv-intel
, we can load the module PrgEnv-intel
which will invoke an Intel instance of cray-mpich
that can be used through cc
, CC
and ftn
.
+We can also use the usual MPI compilers mpicc
, mpicxx
and mpif90
/mpifort
but it is recommended to use the wrappers.
Cray-MPICH takes into consideration the processor architecture through craype-x86-spr
and the network type through craype-network-ofi
.
For codes compiled using intel-mpi
or mpich
, we can load the module cray-mpich-abi
, an HPE provided MPI that allows pre-compiled software to leverage MPICH benefits on Kestrel's network topology.
Documentation: OpenMPI
+The Open MPI Project is an open source Message Passing Interface implementation that is developed and maintained by a consortium of academic, research, and industry partners. Open MPI is therefore able to combine the expertise, technologies, and resources from all across the High Performance Computing community in order to build the best MPI library available. Open MPI offers advantages for system and software vendors, application developers and computer science researchers.
+The Open MPI framework is a free and open-source communications library that is commonly developed against by many programmers. As an open-source package with strong academic support, the latest ideas may appear as implementations here prior to commercial MPI libraries.
+Note that the Slurm-integrated builds of OpenMPI do not create the mpirun
or mpiexec
wrapper scripts that you may be used to. Ideally you should use srun
(to take advantage of Slurm integration), but you can also use OpenMPI's native job launcher orterun
. Some have also had success simply symlinking mpirun
to orterun
.
OpenMPI implements two Byte Transfer Layers for data transport between ranks in the same physical memory space: sm
and vader
.
+Both use a memory-mapped file, which by default is placed in /tmp
.
+The node-local /tmp
filesystem is quite small, and it is easy to fill this and crash or hang your job.
+Non-default locations of this file may be set through the OMPI_TMPDIR
environment variable.
If you are running only a few ranks per node with modest buffer space requirements, consider setting OMPI_TMPDIR
to /dev/shm
in your job script.
If you are running many nodes per rank, you should set iOMPI_TMPDIR
to /tmp/scratch
, which holds at least 1 TB depending on Eagle node type.
Kestrel | +Eagle | +Swift | +Vermilion | +
---|---|---|---|
openmpi/4.1.5-gcc | +openmpi/1.10.7/gcc-8.4.0 | +openmpi/4.1.1-6vr2flz | +openmpi/4.1.4-gcc | +
openmpi/4.1.5-intel | +openmpi/3.1.6/gcc-8.4.0 | ++ | + |
+ | openmpi/4.0.4/gcc-8.4.0 | ++ | + |
+ | openmpi/4.1.1/gcc+cuda | ++ | + |
+ | openmpi/4.1.2/gcc | ++ | + |
+ | openmpi/4.1.2/intel | ++ | + |
+ | openmpi/4.1.3/gcc-11.3.0-cuda-11.7 | ++ | + |
+ | openmpi/4.1.0/gcc-8.4.0 | ++ | + |
Documentation: IntelMPI
+Intel® MPI Library is a multifabric message-passing library that implements the open source MPICH specification. Use the library to create, maintain, and test advanced, complex applications that perform better on HPC clusters based on Intel® and compatible processors.
+Intel's MPI library enables tight interoperability with its processors and software development framework, and is a solid choice for most HPC applications.
+Kestrel | +Eagle | +Swift | +Vermilion | +
---|---|---|---|
intel-oneapi-mpi/2021.10.0-intel | +intel-mpi/2020.1.217 | +intel-oneapi-mpi/2021.3.0-hcp2lkf | +intel-oneapi-mpi/2021.7.1-intel | +
Documentation: MPT
+HPE's Message Passing Interface (MPI) is a component of the HPE Message Passing Toolkit (MPT), a software package that supports parallel programming across a network of computer systems through a technique known as message passing.
+Hewlett-Packard Enterprise (HPE)—Eagle's creator—offers a very performant MPI library as well, built on top of and colloquially known via its underlying Message Passing Toolkit high-performance communications component as "MPT."
+Eagle | +
---|
mpt/2.23 | +
mpt/2.22 | +
Note:
+MPT is only installed on Eagle.
+Documentation: MPICH
+MPICH is a high performance and widely portable implementation of the Message Passing Interface (MPI) standard. +MPICH and its derivatives form the most widely used implementations of MPI in the world. They are used exclusively on nine of the top 10 supercomputers (June 2016 ranking), including the world’s fastest supercomputer: Taihu Light.
+Kestrel | +Eagle | +Swift | +Vermilion | +
---|---|---|---|
mpich/4.1-gcc | ++ | mpich/3.4.2-h2s5tru | +mpich/4.0.2-gcc | +
mpich/4.1-intel | ++ | + | + |
To run MPI (message-passing interface) jobs on the Eagle system's NVidia GPUs, the MPI library must be "CUDA-aware." +A suitable OpenMPI build has been made available via the openmpi/4.0.4/gcc+cuda module. +This module is currently in test.
+srun
does not work with this OpenMPI build when running interactively, so please use orterun
instead.
+However, OpenMPI is cognizant of the Slurm environment, so one should request the resources needed via salloc
(for example, the number of available "slots" is determined by the number of tasks requested via salloc
).
+Ranks are mapped round-robin to the GPUs on a node.
+nvidia-smi
shows, for example,
Processes:
+GPU | +PID | +Type | +Process name | +GPU Memory Usage | +
---|---|---|---|---|
0 | +24625 | +C | +./jacobi | +803MiB | +
0 | +24627 | +C | +./jacobi | +803MiB | +
1 | +24626 | +C | +./jacobi | +803MiB | +
when oversubscribing 3 ranks onto the 2 GPUs via the commands
+srun --nodes=1 --ntasks-per-node=3 --account=<allocation_id> --time=10:00 --gres=gpu:2 --pty $SHELL
+...<getting node>...
+orterun -np 3 ./jacobi
+
If more ranks are desired than were originally requested via srun, the OpenMPI flag --oversubscribe could be added to the orterun command.
+An example batch script to run 4 MPI ranks across two nodes is as follows.
+#!/bin/bash --login
+#SBATCH --nodes=2
+#SBATCH --ntasks-per-node=2
+#SBATCH --time=2:00
+#SBATCH --gres=gpu:2
+#SBATCH --job-name=GPU_MPItest
+#SBATCH --account=<allocation_id>
+#SBATCH --error=%x-%j.err
+#SBATCH --output=%x-%j.out
+
+ml use -a /nopt/nrel/apps/modules/test/modulefiles
+ml gcc/8.4.0 cuda/10.2.89 openmpi/4.0.4/gcc+cuda
+
+cd $SLURM_SUBMIT_DIR
+srun ./jacobi
+
To run multiple ranks per GPU, you may find it beneficial to run NVidia's Multi-Process Service. This process management service can increase GPU utilization, reduce on-GPU storage requirements, and reduce context switching. To do so, include the following functionality in your Slurm script or interactive session:
+export CUDA_MPS_PIPE_DIRECTORY=/tmp/scratch/nvidia-mps
+if [ -d $CUDA_MPS_PIPE_DIRECTORY ]
+then
+ rm -rf $CUDA_MPS_PIPE_DIRECTORY
+fi
+mkdir $CUDA_MPS_PIPE_DIRECTORY
+
+export CUDA_MPS_LOG_DIRECTORY=/tmp/scratch/nvidia-log
+if [ -d $CUDA_MPS_LOG_DIRECTORY ]
+then
+ rm -rf $CUDA_MPS_LOG_DIRECTORY
+fi
+mkdir $CUDA_MPS_LOG_DIRECTORY
+
+# Start user-space daemon
+nvidia-cuda-mps-control -d
+
+# Run OpenMPI job.
+orterun ...
+
+# To clean up afterward, shut down daemon, remove directories, and unset variables
+echo quit | nvidia-cuda-mps-control
+for i in `env | grep CUDA_MPS | sed 's/=.*//'`; do rm -rf ${!i}; unset $i; done
+
For more information on MPS, see the NVidia guide.
+ + + + + + + + +Change working directory to the location where you'll build the package. A convenient location is /scratch/$USER
, which we'll use for this example. cd /scratch/$USER
OpenMPI can be found at https://www.open-mpi.org/software/ompi/. This will automatically redirect you to the latest version, but older releases can be seen in the left menu bar. For this, choose version 4.1.
+There are several packaging options.
+Here, we'll get the bzipped tarball openmpi-4.1.0.tar.bz2
.
+You can either download it to a local machine (laptop) and then scp
the file over to the HPC cluster, or get it directly on the supercomputer using wget
.
+
wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.0.tar.bz2
+
tar -tf openmpi-4.1.0.tar.bz2
+
tar -xjf openmpi-4.1.0.tar.bz2
+
-v
option. openmpi-4.1.0
directory.
+cd openmpi-4.1.0
, at which point you are in the top level of the package distribution.
+You can now proceed to configuring, making, and installing.We will illustrate a package build that relies on the popular autotools system.
+Colloquially, this is the configure; make; make install
process that is often encountered first by those new to package builds on Linux.
+Other build systems like CMake (which differ primarily in the configuration steps) won't be covered.
+If you need to build a package that relies on CMake, please contact hpc-help@nrel.gov for assistance.
We'll use GCC version 8.4.0 for this illustration, so load the associated module first (i.e., gcc/8.4.0
).
You can use any version of GCC available to you on Kestrel. + The paths in step 3 are for Eagle, please make the necessary changes for Kestrel.
+Now that you've acquired and unpacked the package tarball and changed into the top-level directory of the package, you should see a script named "configure".
+In order to see all available options to an autotools configure script, use ./configure -h
(don't forget to include the ./
explicit path, otherwise the script will not be found in the default Linux search paths, or worse, a different script will be found).
We will build with the following command: +
./configure --prefix=/scratch/$USER/openmpi/4.1.0-gcc-8.4.0 --with-slurm --with-pmi=/nopt/slurm/current --with-gnu-ld --with-lustre --with-zlib --without-psm --without-psm2 --with-ucx --without-verbs --with-hwloc=external --with-hwloc-libdir=/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hwloc-1.11.11-mb5lwdajmllvrdtwltwe3r732aca76ny/lib --enable-cxx-exceptions --enable-mpi-cxx --enable-mpi-fortran --enable-static LDFLAGS="-L/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/11.0.2-4x2ws7fkooqbrerbsnfbzs6wyr5xutdk/lib64 -L/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/cuda-11.0.2-4x2ws7fkooqbrerbsnfbzs6wyr5xutdk/lib64 -Wl,-rpath=/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hwloc-1.11.11-mb5lwdajmllvrdtwltwe3r732aca76ny/lib -Wl,-rpath=/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/cuda-11.0.2-4x2ws7fkooqbrerbsnfbzs6wyr5xutdk/lib64" CPPFLAGS=-I/nopt/nrel/apps/base/2020-05-12/spack/opt/spack/linux-centos7-x86_64/gcc-8.4.0/hwloc-1.11.11-mb5lwdajmllvrdtwltwe3r732aca76ny/include
+
--prefix=
: This sets the location that "make install" will ultimately populate. If this isn't given, generally the default is to install into /usr or /usr/local, both of which require privileged access. We'll set up the environment using environment modules to point to this custom location.--with-slurm
: Enables the interface with the Slurm resource manager--with-pmi=
: Point to the Process Management Interface, the abstraction layer for MPI options--with-gnu-ld
: Letting the build system know that linking will be done with GNU's linker, rather than a commercial or alternative open one.--with-lustre
: Enable Lustre features--with-zlib
: Enable compression library--without-psm[2]
: Explicitly turn off interfaces to Intel's Performance Scaled Messaging for the now-defunct Omni-Path network--with-ucx=
: Point to UCX, an intermediate layer between the network drivers and MPI--without-verbs=
: For newer MPIs, communications go through UCX and/or libfabric, not directly to the Verbs layer--with-hwloc[-libdir]=
: Point to a separately built hardware localization library for process pinning--enable-cxx-exceptions
, --enable-mpi-cxx
: Build the C++ interface for the libraries--enable-mpi-fortran
: Build the Fortran interface for the libraries--enable-static
: Build the .a archive files for static linking of applicationsLDFLAGS
: -L options point to non-standard library locations. -Wl,-rpath options embed paths into the binaries, so that having LD_LIBRARY_PATH set correctly is not necessary (i.e., no separate module for these components).CPPFLAGS
: Point to header files in non-standard locations.NOTE: The CUDA paths are not needed for CUDA function per se, but the resulting MPI errors out without setting them. +There appears to be a lack of modularity that sets up a seemingly unneeded dependency.
+After lots of messages scroll by, you should be returned to a prompt following a summary of options. +It's not a bad idea to glance through these, and make sure everything makes sense and is what you intended.
+Now that the build is configured, you can "make" it.
+For packages that are well integrated with automake, you can speed the build up by parallelizing it over multiple processes with the -j #
option.
+If you're building this on a compute node, feel free to set this option to the total number of cores available.
+On the other hand, if you're using a login node, be a good citizen and leave cores available for other users (i.e., don't use more than 4; Arbiter should limit access at any rate regardless of this setting).
make -j 4
+
Try a make check
and/or a make test
.
+Not every package enables these tests, but if they do, it's a great idea to run these sanity checks to find if your build is perfect, maybe-good-enough, or totally wrong before building lots of other software on top of it.
Assuming checks passed if present, it's now time for make install
.
+Assuming that completes without errors, you can move onto creating an environment module to use your new MPI library.
This training module will walk through how to build a reasonably complex package, OpenMPI, and deploy +it for use by yourself or members of a project.
+Sometimes, the package version that you need, or the capabilities you want, +are only available as source code.
+Other times, a package has dependencies on other ones with application programming interfaces that change rapidly. +A source code build might have code to adapt to the (older, newer) libraries you have available, whereas a binary distribution will likely not. +In other cases, a binary distribution may be associated with a particular Linux distribution and version different from Kestrel's or Eagle's. +One example is a package for Linux version X+1 (with a shiny new libc). +If you try to run this on Linux version X, you will almost certainly get errors associated with the GLIBC version required. +If you build the application against your own, older libc version, those dependencies are not created.
+Performance; for example, if a more performant numerical library is available, you may be able to link against it. +A pre-built binary may have been built against a more universally available but lower performance library. +The same holds for optimizing compilers.
+Curiosity to know more about the tools you use.
+Pride of building one's tools oneself.
+For the sheer thrill of building packages.
+Now that the package has been installed to your preferred location, we can set up an environment module.
+a. If this is your first package, then you probably need to create a place to collect modulefiles.
+For example, mkdir -p /scratch/$USER/modules/default
.
b. You can look at the systems module collection(s), e.g., /nopt/nrel/apps/modules/default/modulefiles
on Eagle or /nopt/nrel/apps/modules/default
on Kestrel, to see how modules are organized from a filesystem perspective.
+In short, each library, application, or framework has its own directory in the modulefiles
directory, and the modulefile itself sits either in this directory, or one level lower to accomodate additional versioning.
+In this example, there is the MPI version (4.1.0), as well as the compiler type and version (GCC 8.4.0) to keep track of.
+So, we'll make a /scratch/$USER/modules/default/openmpi/4.1.0
directory, and name the file by the compiler version used to build (gcc-8.4.0).
+You're free to modify this scheme to suit your own intentions.
c. In the openmpi/4.1.0/gcc840
directory you just made, or whatever directory name you chose, goes the actual modulefile.
+It's much easier to copy an example from the system collection than to write one de novo, so you can do
cp /nopt/nrel/apps/modules/default/modulefiles/openmpi/4.0.4/gcc-8.4.0.lua /scratch/$USER/modules/default/openmpi/4.1.0/.
+
cp /nopt/nrel/apps/modules/default/compilers_mpi/openmpi/4.1.5-gcc /scratch/$USER/modules/default/openmpi/4.1.0/.
+
Please note that the OpenMpi modulefile on Kestrel is of TCL type + It is not necessary for you to know the language to modify our examples.
+The Lmod modules system uses the Lua language natively for module code. +Tcl modules will also work under Lmod, but don't offer quite as much flexibility.
+d. For this example, (a) the OpenMPI version we're building is 4.1.0 instead of 4.0.4 on Eagle or 4.1.5 on Kestrel, and (b) the location is in /scratch/$USER
, rather than /nopt/nrel/apps
.
+So, edit /scratch/$USER/modules/default/openmpi/4.1.0/gcc-8.4.0.lua
to make the required changes.
+Most of these changes only need to be made at the top of the file; variable definitions take care of the rest.
e. Now you need to make a one-time change in order to see modules that you put in this collection (/scratch/$USER/modules/default
).
+In your $HOME/.bash_profile
, add the following line near the top:
module use /scratch/$USER/modules/default
+
Obviously, if you've built packages before and enabled them this way, you don't have to do this again!
+Now logout, log back in, and you should see your personal modules collection with a brand new module.
+[$USER@el1 ~]$ module avail
+
+---------------------------------- /scratch/$USER/modules/default -----------------------------------
+openmpi/4.1.0/gcc-8.4.0
+
Notice that the ".lua" extension does not appear--the converse is also true, if the extension is missing it will not appear via module commands! +As a sanity check, it's a good idea to load the module, and check that an executable file you know exists there is in fact on your PATH:
+[$USER@el1 ~]$ module load openmpi/4.1.0/gcc-8.4.0
+[$USER@el1 ~]$ which mpirun
+/scratch/$USER/openmpi/4.1.0-gcc-8.4.0/bin/mpirun
+
Conda is a package manager which allows you to easily create and switch between different software environments in different languages for different purposes. With Conda, it's easy to:
+Manage different (potentially conflicting) versions of the same software without complication
+Quickly stand up even complicated dependencies for stacks of software
+Share your specific programming environment with others for reproducible results
+To create a basic Conda environment, we'll start by running
+conda create --name mypy python
+
where the --name
option (or the shortened -n
) means the environment will be specified by name and myenv
will be the name of the created environment. Any arguments following the environment name are the packages to be installed.
To specify a specific version of a package, simply add the version number after the "=" sign
+conda create --name mypy37 python=3.7
+
You can specify multiple packages for installation during environment creation
+conda create --name mynumpy python=3.7 numpy
+
Conda ensures dependencies are satisfied when installing packages, so the version of the numpy package installed will be consistent with Python 3.7 (and any other packages specified).
+Tip
+It’s recommended to install all the packages you want to include in an environment at the same time to help avoid dependency conflicts.
+To see a list of all existing environments (useful to confirm the successful creation of a new environment):
+conda env list
+
To activate your new environment:
+conda activate mypy
+
Your usual command prompt should now be prefixed with (mypy)
, which helps keep track of which environment is currently activated.
To see which packages are installed from within a currently active environment:
+conda list
+
When finished with this programming session, deactivate your environment with:
+conda deactivate
+
Creating environments by location is especially helpful when working on the HPC systems, as the default location is your /home/<username>/
directory, which is limited to 50 GB. To create a Conda environment somewhere besides the default location, use the --prefix
flag (or the shortened -p
) instead of --name
when creating:
conda create --prefix /path/to/mypy python=3.7 numpy
+
This re-creates the python+numpy environment from earlier, but with all downloaded packages stored in the specified location.
+Warning
+Keep in mind that /scratch/<username>
is temporary, and files are purged after 28 days of inactivity.
Unfortunately, placing an environment outside of the default folder means that it needs to be activated with the full path (conda activate /path/to/mypy
) and will show the full path rather than the environment name at the command prompt.
To fix the cumbersome command prompt, simply modify the env_prompt
setting in your .condarc
file:
conda config --set env_prompt '({name}) '
+
Note that '({name})'
is not a placeholder for your desired environment name but text to be copied literally. This will edit your .condarc
file if you already have one or create a .condarc
file if you do not. For more on modifying your .condarc
file, check out the User Guide. Once you've completed this step, the command prompt will show the shortened name (mypy, in the previous example).
Over time, it may become necessary to add additional packages to your environments. New packages can be installed in the currently active environment with:
+conda install pandas
+
Conda will ensure that all dependencies are satisfied which may include upgrades to existing packages in this repository. To install packages from other sources, specify the channel
option:
conda install --channel conda-forge fenics
+
To add a pip-installable package to your environment:
+conda install pip
+pip <pip_subcommand>
+
Warning: Mixing Conda and Pip
+Issues may arise when using pip and conda together. When combining conda and pip, it is best to use an isolated conda environment. Only after conda has been used to install as many packages as possible should pip be used to install any remaining software. If modifications are needed to the environment, it is best to create a new environment rather than running conda after pip. When appropriate, conda and pip requirements should be stored in text files.
+For more information, see the User Guide.
+We can use conda list
to see which packages are currently installed, but for a more version-control-flavored approach:
conda list --revisions
+
which shows changes to the environment over time. To revert back to a previous environment
+conda install --revision 1
+
To remove packages from the currently activated environment:
+conda remove pkg1
+
To completely remove an environment and all installed packages:
+conda remove --name mypy --all
+
Conda environments can become large quickly due to the liberal creation of cached files. To remove these files and free up space you can use
+conda clean --all
+
or to simply preview the potential changes before doing any actual deletion
+conda clean --all --dry-run
+
To create a file with the the exact "recipe" used to create the current environment:
+conda env export > environment.yaml
+
In practice, this recipe may be overly-specific to the point of creating problems on different hardware. To save an abbreviated version of the recipe with only the packages you explicitly requested:
+conda env export --from-history > environment.yaml
+
To create a new environment with the recipe specified in the .yaml file:
+conda env create --name mypyhpc --file environment.yaml
+
If a name or prefix isn't specified, the environment will be given the same name as the original environment the recipe was exported from (which may be desirable if you're moving to a different computer).
+To speed up dependency solving, substitute the mamba command for conda. Mamba is a dependency solver written in C++ designed to speed up the conda environment solve.
+mamba create --prefix /path/to/mypy python=3.7 numpy
+
By default, the conda module uses the home directory for package caches and named environments. This results in a lot of the home directory quota used. Some ways to reduce home directory usage include:
+Use the -p PATH_NAME
switch when creating or updating your environment. Make sure PATH_NAME
isn't in the home directory. Keep in mind files in /scratch are deleted after about a month of inactivity.
Change the directory used for caching. This location is set by the module file to ~/.conda-pkgs
. A simple way to avoid filling up the home directory with cached conda data is to soft link a location on scratch to ~/.conda-pkgs
, for example ln -s /scratch/$USER/.conda-pkgs /home/$USER/.conda-pkgs
. Alternatively, calling export CONDA_PKGS_DIRS=PATH_NAME
to specify somewhere to store downloads and cached files such as /scratch/$USER/.conda-pkgs
will reduce home directory usage.
Interacting with your Conda environments on the HPC systems should feel exactly the same as working on your desktop. An example desktop-to-HPC workflow might go:
+conda env export > environment.yaml
)conda env create -f environment.yaml
)#!/bin/bash
+#SBATCH --nodes=1
+#SBATCH --time=60
+#SBATCH --account=<project_handle>
+
+module purge
+module load conda
+conda activate mypy
+
+python my_main.py
+
As mentioned previously, Conda stores environments in your home directory by default. This can cause problems on the HPC systems, because Conda environments can require a lot of storage space, and the home directory typically has a limited amount of space on the HPC systems. T
+By default, the conda module uses the home directory for package caches and named environments. This results in a lot of the home directory quota used. Some ways to reduce home directory usage include:
+Use the -p PATH_NAME
switch when creating or updating your environment. Make sure PATH_NAME
isn't in the home directory.
Change the directory used for caching. This location is set by the module file to ~/.conda-pkgs
. Calling export CONDA_PKGS_DIRS=PATH_NAME
to specify somewhere to store downloads and cached files such as /scratch/$USER/.conda-pkgs
will reduce home directory usage.
Following are some guidelines and suggestions regarding where to store environments:
+Path | +When to use | +Caveats | +
---|---|---|
/home |
+$HOME/.conda is the default location for environments. For one-off environments, or if you don't create environments often, this is a reasonable location for your environments and doesn't require any extra flags or parameters. |
+On systems such as Eagle, $HOME is limited to 50 GB. |
+
/scratch |
+/scratch or /projects are well-suited for multiple-node jobs because these locations provide enhanced filesystem performance for parallel access. |
+The contents of /scratch are purged after 28 days of inactivity. |
+
/projects |
+Ideal location for storing environments that will be shared with colleagues that are working on the same project. | +Storage under /projects is contingent on having an HPC project allocation, and the project allocation has its own storage quota. |
+
Task | +... outside environment | +... inside environment | +
---|---|---|
Create by name | +conda create -n mypy pkg1 pkg2 |
+N/A | +
Create by path | +conda create -p path/to/mypy pkg1 pkg2 |
+N/A | +
Create by file | +conda env create -f environment.yaml |
+N/A | +
Show environments | +conda env list |
+N/A | +
Activate | +conda activate mypy |
+N/A | +
Deactivate | +N/A | +conda deactivate |
+
Install New Package | +conda install -n mypy pkg1 pkg2 |
+conda install pkg1 pkg2 |
+
List All Packages | +conda list -n mypy |
+conda list |
+
Revision Listing | +conda list --revisions -n mypy |
+conda list --revisions |
+
Export Environment | +conda env export -n mypy > environment.yaml |
+conda env export > environment.yaml |
+
Remove Package | +conda remove -n mypy pkg1 pkg2 |
+conda remove pkg1 pkg2 |
+
jGvdMLP+9kxX=v zl{axlUK)CQ!UWnY?0ThWq9_o=WsswCIQ=+$OqzI;BSSFY~ z7pf(u3A9H2+DVKheiE{A38!$L3qA+0Td(WUpE7#yc{-;9JMp-0uSZrXx;WR?$Oz7+ z36^%`uXWl@Aj5}eRW>AIN@Ap{>^1q51^pW&XTDYUSny$MY2{_^y{57Eu1uch3!8Zc z&w8O)_mE{X0t?-UuJI?JS|LyrGJrZS TeDbUiO_Ef<~0G*`2 zd?8GNEsuL^B@LynJ#+i@`#}V|GUSDRZWHBw^znpsXR0j-W<8`E %(ducl0}i9%T6Y#I4Z5xs<+M2qF$Wr0TFJ6A7XrG0ea zR8YM5;bXN`@cL 2kFNl|@6JN&`d3Hv8`W>gkwvb3C z=EbE#x}8PsLCPgORnJwJZ-Tu)4kTZ@Ap0r}({yf6-W(j2DnK6cgUyu#sk9h5Klc(q zv0Sm#3h`B!8gKbzT?W#u)oKiM2!u~?GBL6=my?l_@ZCQwWEhDee*~a)W-1>Mcd*s% zCxyY}E^zIW$JwrtHh P wW;t) z-k*F_Rf#pvI@VF&2*~UOD7Hi+2yI;`l>=YRGx}S Au7?O z$f2GUpjT`4!=Tw9vVw2ta$-oTmQYmoXojjBVxM_$y&ND=8M}M*Gj97Wx3y6erH6@F z0F93$z0@w`4F$jtm_5jBaQ=F-5C0w+HJ;?M!R~iH^M{0~mqwu1NNLOW-jxBv3jRYX z3$-xV{~=@7$MI3J&NP+c%ZQ;*S2NLY+aXm)3FiO`4;7ZW`m(E(t1Zeq+O21Qa1SZ> z<$=;XNiV~_G}KFg>OpG=f+m#*_(-;aWGh_OsueFnh(;{()IOlk%}8QAk51cDME;sT zRo=$koLu%}a7Uwg(Dt;h{uNW5ReQ4 LbgodO6NrD)j;1w1oMyBnUakN`s7cn@YY7L?KQoY;aT5)j~ zK31}ul}9jJTbPzwih% cD!CpfPPR +KRX7d%h z<`h9gR>i1p=RuiIaxu;6LT5IQ3I5AZ6{2)K5c_K?oquQ8A>7dhiG2# |EjY&niRDZbPm1O5<-hPo+_?xnf<70TdF!t zj)@ihNobF!H%+}=JmeywLk0ELEjX~hS3xa+Wcc%nksMvWKH%`u`{lWt1!!V?+Y)pN z!vjD16SQJms#5I@pP-visllMYS{fy@Z-%@?9kpD_LSLM{N#xXzv3cUQb-|!dAPRBX z$wIDrzx7eaeF;N9jD9y=NCFxHa3V$$5hBQ5JAa5erKRd)h=AWNPHLX_uZt25d Xs0mto;0^;Y;R#9m=6T|HfeutH+6(%WIAM?p>_-g7Q-|xD$bYJ}ziCQM|&v zOnUu?K2H0Rdq1}M9#T{~by>4*uju_D(6?OZZpnkWM$wAkgOl5wWp3|pfy&qGLbj{2 zL(=}!uR09FRTI}ioUt5e(uu#HW>Ln20Eoe?K=9riBhz7!+fZNi<&X^^#A@cI?UD3s z&}d4bQ7lYR&~jqjD0ag(;;euTU2gVmCt7ro&VO^0E~M?$7 -S_^xKCQt#rHrer z8ue_8GH7ly%3>B~u}%y4C$2*tl&hv87lU+Di|Td9N9&i6y)4QOo&l*< ^#3D2pTIz4ROMEfoQ6_(@(e(_ss|hF bq~bX?zA1Albo9YDC}|u1qD^RgM;3NJWPsf`Wj_0 zSMK=MWvsz8LTGSA@oTSqnY|zP$#WCy^@kr{68>IlTf9vd9ELA#B*u~ro6*6wlqC@+ zY^4KvmJa|sRK(qk9^bt 06eI_MA|x1Ds{SwJ)pqut3~ zY$&U>-%sDs)JclD7X#p5k5zy%ME03V|5mpQXEOqT2wN`vbyRUI3JfAh3f@5JD{>gS z& >6zMUgZxa48{Qr^D~3PK`OO&jX!?9U$lHrSXAUImUa(g|10z~rs?E% zE@g)a+6DNf@}~LgRD$ 161WoTIE(EwSDVklovEWyflv$b2&lWIm;33P;Eotnl2 zF21qJX$B{L(>Tq5UfB>dld62cm~719FBc?_n1<@e5(4Sl3IH8VEk#ZJjr`;WbN|H~ z3RWWm7Ndn}Dpu=LBYE(-2!LxtNPPLIhI#SUPx?;mXep<7SsgxdybibLhdG)Mfhew! z0V!KUh83*^CEj}-LjXQ(4Mj+*yb}M5j?L%oAn!P#*f @C%^E97wf4;Vc*=qmXI`}&0aV?j}0$0dNEd|4+i-->mT)=2q3F` zSAjk(7(Z==w)byxXUEIcvKRN+{YUpml-y8AxUr783iz )}A0{Cib&(gGk&|QaBs1tX_t1D`!@a1UH;}U%^vy8rG!0!%f?0Vi zO@s7eY0uRwUb~(G<19W`^LH#qPUscU8z@JYd{CT{nVv3ly?KJ=RISSi(9HIvZq~Ew z-ohDPU!F{btsfc!5uq*-BKCacglFN&8l7{2He1?~nE0t09!yOUdUkdXAG 4Jv@bnM+#4@JopQ!8aL;0Ah;4)J+7`2P64ck4JcHAOx=lWDIVbR(dV2_;HX!Zx zM5T^^D=P=Lz5iRdP8C=`6g6a(HnVT3=dc{Once`~_BW8+C?#3hgv&<9tWw8}#9w#6 zxxS;kGFW51J&{H<2KeGWny8ITPX6|Js5>W1Xw8Ny!QcLZkMY$XT*M_=Z6fI%@zSqn z6`;?HGq-{@w6~62G$=mHUhc6rW-=_xE*tC9)Bf)cj(c_a#0HN;V3e~?Mbern-}v(3 zn|PqQ$%~eg`~ ^4+RJH`af%OF3*RB=uLhlf<)=#?R~bJb9@@O1*@Q5#VgA zg*B(0o!4Xte6{Qc%^Eh#rxwyWjImmx0fu^ 0xNa KIq@6W>{L#${{msgvDH^4zFd*c|ec~L6lwN9ZvV1txdnmkt~GKn8rEVE?M z1k-0TXVt_jN8b$As+53E!dgqy1AxDaQ|6YIcaI!r>1#{1Tjfh>s^FL-i3SlRbEVfi z{L@fd)x}R-m5E2 Z9VGtd_;jt*rRbFPF; zoz9-K39H+w^p7P}@JQ-4-bqIWgkg&JG=MJJTL27jTA(Dys8+771i62L82%S8f|dz} z-CW?sD1DQp5J3H3x5hGTuTYzMq`p&9ispr!SmgDd%vB=@J#EPZ#GvBZ5(O#k6h*6Q zI2RAnUeWIqqcTI&AurfDbAGr#a3=azS7mc)Ljy>ObNx3Ul3<>GQ f!cCdAk+6uOlsAnQrOFgQv{}VM*HkI!mSeJZB9t1I( zpKwfE-D?xg353~D*>k;%BR~Mc9B+ly&T>EG_rDU9C=LqSQ9bHe+k)G5lAi5s=d0>B zYcp9zzOu{(I&?(}l?cRX4r1I!H^!&vFI;Dd?+ hu}B|hZL`i^LdPJR_@Jy;mqc$oE${CVewSaPKdcf1yQ^{9XfV^x7v_( z6D!3BmP>6!z}<+SZ1$$BtE(%5irQNojPARK@-iDb>u{+Y*196UI(g|LWvV3f2`>BN zb5Uqmk>1ce`BGmY{0|=ZKvxls6&cJfCJt8}v`#?vYSRk?H@YNuytA%FG5G1({ @Z1k&z71WmWe4^dfyl^X_qnfP>^q zzVdAuW5X)ux N8wM4EIw1{E2==_oDqy;F6ZG8m`@y*GsG1 z@TGS&TuH=$CxG@v$wBtr&Z#p=t2LFQ92x~JADgH`QT?lo4n|D_Jo%YRAS0v9N~Z#x z%AmBzKB330Bs##sc4khtd`w`Us(-^P!T;g5{{qwlzeHE9BC0{!NzWJ|Tz#7}*Xv0> z&65)jd>;KOBap)S(KfbHmFxOD6W9H-qP+9Aq=$8QCaZSL{4@exM{>E1)W!_Aei3t> zcLg?TZ(gcl+JC%kA$K@ah^BW`PC6HtH9($I9H(+7yO^xZkb>xHEvJ@9n{8zFm@m=H z uMZD zeI%&!2;i4S#I9|I)A?PkU@%4DCjPwCz9N4btNlAKt);%WuT|=V%W?kiFe$58Krb #G{m)nV-jtX=^!@CE zGIpmX4`qP5RX?a}&Tk_j(HH%yilBilvw7~KO&x<|;SJTdzD)_Qv~`8j4G-PuN4`PF z$%*=3k7x(t^+&|OpZJB*gmI4iH~g`ya!SlPweX>$A2!b?RPki N6Pz-hm|=kd=^6H4#oI*I4rre9g0G$d(7B4AJkOrxvUgb zmc1u^cPy-y~ioez(c97BygcJ!I@RpJ~Xuti5ELz~ZSrp2Ey4BLnN)g!U_wHw$G+ zlfK>cB5Gxa@@=WPAxYQ$_aRdOs3=LmA!w;7aR>Yv6;c2QKso*5b)|Io_er42ezTs% zPHIhcH0|~;C75ZSI&F1(JKN**#p9r&=To@}&SlmlQIC<|E=xjU0M=YD3FQn@2v5%x zCFN0KneTl3F{++FjLDaBhs4OD?hEZ}hbh#N?3VqU=*&^FaYDJHKL+cx_K+gZGhG~q zyl-Gz2UcgUl-mDo&6A|}#7Bf{5F}m04*DXD1C *lszQoYA5^nqX}nybk6nmf#q`!+k zjzuX !x(u3gw%jnPF+RzynL_U?%vBLB_MG+H5sK zoyZ{8)xw0=naqB&XOV!Kqy8xYoGeHJ;D$Xfh~34?S07js27@avKu}@qx!Iu?So4!e z6x8dijH-GE%`tR>=_gz5)52psI>TFx2b&S9dQB~N0NIcZteS1_1lDIgqi_^dy||Rd zDc+!BU+oQy4UcVIdGC^U>XloUEK`pspk)`N0NP}50BwsonUl5*u)mq3c#x}%v3d%>A{HH5Q L7Kk`UAqshC zi}Z$viiQ6et<(d%So#?Gn!()4riizA(JS)ewGTB&>RE4m8v0o5^tQ nGMDuEy}5vGCwSg=TDt)?65w)mMkRzHz#QE%BK^)2$;vKF|HlZ zT+nPc0f#P>`t9q<;FtHa>=Z}2zF71gKInAj9fP4frudOrmO5rqmpV-NBXCfZTVx)e zi%!(6m)mxjoRj$M9%*S}?=Ev`sB~Q)pVKgZK!r21f}=B7c{^rDjZj?{G#3$4Ebv$Q zCD1*fqXd( A2b2`N10J^-7PfRY^@azrdYgvx?Tb z35S+SFxl_2+fjlU-lYIA(GQ{8!CfcNwX7;|dM&t3NLe71) FnnTqX>WA0B*n+`@sAz^!MJuX*r*bD*RXQrlGOD9jQu@}UMlF*vpB8!89 zZ8NpA)M_60v>=nq4T1C=JV@plJMYJT*L^ZW7Yh*k1vYt=P(&Y#2CIpVn36 LqB)@*ifS4(327N9mBxcy*YpkB(MUuWty zp_~!r#UdnOsk;^@n{x?sn!vsOXm}*hm!?v+?cX59U0r+hmY`cGGaKwz^hDVVt~WKU zsl{#UHWQY !W)`~X zSyb$*My;oSx(;h$E5eJhiExf1H~QO5n0c{k@4CN(MfqZ;$94mA+ow5&X02% = zo$VBTxut%ozK>=}Imm{FRFl`_T+UA^FV1pe1n~8J5#EqqwE~jVqqCH0B(p7EOh1w+ z!dr@(G|JNUjO}&oB3>bwkIp_KOLo>{=(|Ew1%YRp7TjuViT)bN63;~~Ib$VUuUcr1 z;dr;wl>$Xm+mI1Cw$JVL3!)pfY56^=gSw+b_~cAxh9psO= ^g+exkiC}IrwzLYw2%ca4oZQdfW$SVwC+857 z)!{b3Ah}#7=Q(s;+#^XZv8^0-^XH@KR)O{7>zSF~@BfC6lv-1Hw_=B~^R*I31!3RJ zaDmj{rM+mE_wUI;pR5%cObJSTTB0ZcgKiarHgLh1TIKe^@o2C?kKESj#8fhz x+eD#Xgi>w2&Si-a@0AOb^Fz{$8h;{ES zke8R|to8h7#Uo~BWhJP+)g!bWNfJ2`v^{c~AoD=IK4l&kauvHc(-$nTyw|l{@W#LM zo(A6zrlrL%iEYo6VcdWz4-h4=Eog7NmuV@UHp9lq5F@Xjnv=)nlBeDP-6PvP$L;Tt z&5er8m21x^zq#5P1ERd0#!`U;phLbYTC=IapcYf-9ub0?B)prZHKTwg=DrBoBfe56 za<^mJ4Q$0mT-2>V-?@^ScLO*6GqFx7k 42!Aq*JF71HH*QGIj%d_|0aUUVC z`& Uu0?4nt1|B4>8vU%?Fv$6jX%&T=YF=29qG6lEOT zX$3SxDC?=`^WSY{V>l<=Y*(G;1G_(|c2H$nSfYZ(S>g}?x@DRDVe+YWsONeh`vq3Y zq1O7=Jv2JyHHshC4Rx^1T8}Gj5QFV6#w>p112~;;Qoh=tV3m6^=SDg~IuVw_@0!SK z&SEC9a}?SsRMaVJCp)doWTdhyb@i8T!PmL|onZf;QsF26A;=U=mqSsY?i8;3bU4sD zq{gMc4SR6vVYJzHMBXsM99Fv0bFKUZ 4dB@gILAiypati>M_@RP!e~;q4G)}h}5SCK4ZO*g}KlaExxDVS4 zfap7*v&n *o|#3XLRIS5_zHhMHNu UkFS5%t67(c=}H_WbS1X+;8SBtm8;{l1AaoD#i@U=bNt@Ra8Ix`z2|bP z-=FbTKYlyYgcq+I(4~4Kir#ux4C5_QUXYoB?(?BYl@*7xxuaGtrIixycq{09kwsMP z#k=2`fi{cX`uCnZ=40DF0;kMOq3!!1o`^fG>{a`H@1(s%7hYEhyw~!t`Dn-$V%sSx z{bA{=Vq2@pzWxQ6VOSaaKjI%W zgSM7dq-cWI=$JxU$w*6ylgpIDbd4))eWX{cOYN&li?q5N^WB3CzR}xKI)?@3J g#C59uQcT;Gcs}L43%jH+z|vM+=^9tV z-@}{HKkCX3o;+`W-x`h?({Ctpkb~DPNT45h=a3AvnhGYp6_nrqGejsboJ|d*ol8IC zs!S7Bp)ZvCZ8g6}PZPeuOTW+?PmYL>X0Ch@4+z71waHUva1Z`We-|VEgxrTMS=sEr z_Z!k5zT7kxDwcwLbNpt_)PM4!{J^X%^!Cr$X7$1_e~Ge}PoxASOr0B3A;V^RvJwP| ziQ6nwPWJe9yAFWe$px(sFWVaV)Egdusqdn{TheSOhnA|m|L$@z*}Nc37Vz9|{>6=r z;I8_V7G@V;npJdsbJEA_<$4MWk0!d1=5F|!2l3~Xl`%{UH}_p0og0_yu9d@lhySw| zJgxqwK&r&}%kPS(ZYSQRCcCw4!BC&}$G@75L$K%WGn@SrHRbA#@G!&PGD9EaPY})3 zDjh%)>;Aj@r0envjAuvgj>T}y%G2y$GOJeK?Rjw_CFe+wzZjRxsZ;pnG1UvG&8jp9 zeUOG*IbjY?Wp3DjTrRkG5A+E3?KlTpcxdYHX!cZ){OyeOrAuLDQnv((y))a(I}2Cg zImNCE-d*>>c{K%rQaQC >hzuGR|kCX$A68+CCW=UN3y&@_R<04-VVqt>X39OAK1+ zmJN8RY(%ZOA%9;*`gaCBGe}^8ML;d{h%|G$ACnj)a?*p959s^J(Ksax8VTO~d*MSI z5sEbO{KCbc*9s2eD>gL#Csg*lyToq4EG;(`7Cq*R77af28@v4*%8ZvIc*54p891D> zPJYJTaw!FMHG~cq*=RfYXfyKXwl_*|pSe3Ld2$!N3}cbz_;qchAj7h_vXb&=ZsZ7B zTh?<|#9qETOb&2i{$0P#8JM-diS)Z`Z*007_b{z%oA%n3JVSjqm3JKzVr5 `4);}vc=3d_%QISoc-~wUE;Ix z$IqXe9W$vNttT1n s?W?vEgaBSJKT 6KBuAHKYAN`i=;=+$!YN{y3=}oAi;wb+V#+5 z!s~HsLCu?0wU${OkCjXz{H>CJ1;0J*X5)9y?{p^b#Z!#FqaL6A>G|+A>(IHg2j~@^ z)Ugu*@Pl|UOPhkGxihZ}S`4pOBq?})2oNj+>1Iw*^kt24kx1pPhEKyS=bg231g>0U zn1oC|rdLINXRj#pgZ&dPBg59Ly2X=Pssk+QH&>oxTpDB06JByS6MsHPujKKiFoV6< zTBlFsRSvb$dGpy3FL~b1Io1WS4@q+tGWu>Jt{-o`6 HcJCV3g}c_6+YLaS9Dy^N7a{=OSm?apy@o#=>s&23;V&zza@UWw z(ThtD`-8{R`=!sTAtFH!FEMgd0wxqdB~LGfZ9e6ex_ {{TGZ!_lXhfwb z&AEy^Et_%a*{xt)y^5U6O;otbb6)lWG4Dt={Q+3XctI-wem`qbprF((z3{x79kYSa zd#;oS+ Y8don6LyT$x~AHhor`#rp3Ak3Wj_ATl9x z7!y|{-9g&;f)XyF{^gGCzxiA1Pi3Lq5Z%t?I0L!t5VP;{ZE2{6U z%=^DRH2L}+K=JJptv$Pggt=(Z(ho|HtlpL|hJC$ZDUMZwQa0%UTS%cGPYwORW0bqu z&xb*;go@ZxUbD_>B{N+uLc6;r`?%Uqam9UJBzv@qzdSpk-}nO7fw~PibC*H#oIwy~ zNZDcCsGJV#nt~qMo|CElE-b8mptGfpM4jJfcyl+DOyx<1_oLl$K8l{#GtXT3)uU@} zc d;-L)Lp7lS*1 z9F9omfGZn`?cNHnaAzMs5R$txeQE!s>wXeu9^SB5wms#8LJYsy{_ygK$qm+oyRwfC z{vs#4Zi^o~w?fr>zB91q!WoaG2p&&{gN;#3vFEb*zvW%h4@qxLJR|uyVu4U`#?N12 zAlkX!_qB5meE9s>rEiiRw4b5ub~ip$ue>;|%KYyb-aLIOSa>Q+f+ew!EuZQ5IyJ_t z?_Q64c)2cT(Lcb{u0A&Q^j|M|z5wLOd}082JP`PCHn>`)^g8iNRFdWRr_t|XFV`ZC z@lk#3x*>p92}SFTf*0)kZ`4$6Fkb#liY?{1gO A+s@ypwRwgY zd bqt)(9+7-Wqf=s8ti+}tjScd*2{04C_}_PYE- z@Z_sH+E6?IMBE^BxkA2wg8Nb9Ug(&7s;PS)FmOu2K`h>I^*JZ+)HPdglmszt_Qg zcY~&r&;v5yOrY*r^LG!xdXmH@goNpYis*13oBBW9X1Pk&Fjjfbd=PRk%^ 5K q?wwF!5R1W)MJ-G2I#*^m*JZU%pX|!r^o`W^-PCo1>AJc%C z9~9ggq9^lKG3!PyoPp`dLFO%{>U)!J92e6C+3&wPAM`zR``wl8cS~Zo&IjY!KjMcj zF@&Jf;w_JZ8H|aRT5MH_yLS{6N<)(L5~$PZ&Sky>sWcL$k`nc#*;nXk!Cor1O)k z4Yh$hA knB0>2z{VM~XiWE*d84TOUb9;2Fl{QVTF-q*TUH+uzY!FG zi?{ev_1op;W=c+CLT@|Z2Im_YSe2Eps5@1R?ehx#rvv99a_X&=^Hy?hnH9F|y4dG& zZ=qnhCWb0YTpmIGyA=AQztmtozg1_)?zy(G!zNZc12y9?*CKgHVY5;$yTKV8v^wXR zzi&&1~J0(*TN_muEn|wv3 z38|P?&dt8iz&URont#?>cBDa{G&$P#wFr7A)plt5TaL(ciIcnZ_LKb3bny!YQHIOO z_|uCB*C(-U8C@4VoRHhY^5R`Obad+e?BSk&cc!_u|@>L}rs6JGA7}FdQry*~>rO zN~RvoFX?GnBaUz5ALILroEjBnFyBG@ZF_8S43HbFGnX&)#J?aI*`;y9@0WkP>q#J( zCwDzozMW7hl|EYfAyt;v{VK+oMp!=V@oP}8xfc(+S)t0_oXSO{$@<(K?Zf%OK4M;K zO^u=5`%?d2_kMhg8^+`-%?Ml%yJG!1F1qybr89}-Q!$+jNjjl?nUA<&)wb;z<}soM zKRC4Pp_Sy+ZCBPGc3lv?9jgld^7&Ml-A%7#>%C#wQ~2uD3F}SY;5 )!j zYXYppj3F!XQ9TM^mxgjLg{(#T+Mg$s6G!>8W{vl-rmIo~Y=y8$r~*e= L}fp^vz zxPpF{CmS *?Sx@C5AmmoieUtlU9%p3(jVlE+0WY?P*RlYSk2|U zCSQ0Jmq>|`Pv{(oYrkis`eC9c`9{+jJ>5G{QNM`0UYR(fyGPFURT` zOf^&F7}uIM0zmRby?ArWB mHv=r=6Ad!|SJ&@IG%3$o|jY?l+pZKv~O>2Ze)v zszcD^VtA?3aLa|;e*s5~efS{XkIg!|>e9}CHG%M5K|LaBxJv^7Uo)lj$zvi)y-VBr zd}u;?Rw@bK0knX+qdUw=tsrKnC+LtZp3{xSEXp3~jCm=fD5S)R#nkVyNA8Ir!0Iyb zEdhvaI_<@x-_VTVWU;oY`MnuH%b^k2`j*VY`};hkOU_R=mFoF$XB{4W$2KlJd-fLp zi;}!1U^f0U+f%T}#CT {wa1 zBAM6@Xm)5fWr5uclI`d|2(wJ2e1{vKaNX^$Fl6ynhUu(OYhtpgL$76_?x}fl$7cri^S#CK-6G87G!N*MDnjN% zzV2dvh2GrC%A>2z4D1*0&Z?_jx#PJ=n98pB *nW-gS_$&HOmOsm;X~7 W-QTX$th`>?NyzWUvJuIp>)(xc_?d~)ATv-acU()&+& zu@|Ua;U%j(+PRBa?-SSg7dqqsnG1`C7Xx*@hvZMYVZp-r=1qQxnqeT9og5$h9k(3^ z(l0k>puo1_`0n+QMD0^~eHU&<$-18T^ctp6K^vZJ61MS;_A0C+LL+VDZq}~%d8G2* zSSkNccZ=DP22Vqh%!YH?Q;ke6$Cxn|7xK-9b36y{X1i|>_QSvSpKWq`7A@KS?7`qw z2A1=Zu56FN;U`Py`|q1%e^x)db%p)(!gXIsxSF@6U-`h+Q|)Oa@s?kv9pJmY6}M07 z*uoI>;GS41FUl=IYB;2v`hPr~g `T6r_=sP63IbyGu$yx op^+Aljsc{* zySr1m8>D-vnLB)c_ujt%KJPhapS9O|_L)Naw~C=VVS1raTzDsgsR5ec9of#_4ama6 z %Yi9MaOMW6rTM0vzHt(Myy@j`8 z7jnCe9h84jsvoADE?{)%<{wNdiJhsUQKz=^KkJC#!yfortnZ($!VW%%w~|NqyUI#K z^Q;gfnHhXUHncBl?<$rs_XxjYO~G$%EDC>i@$^Ia**U|4O OWto7!0@ajF^R(PnDme7r60CjG7oquLuf|^CBvTp;l;rr;G~P6 zmBT^G1bRW` ! zinPj4(VKelSJp`j tJfXHz)!N9%1UwJ4uc)>MWOXe??Q(o4`kGYfa&trF(%}8hhgR-k z>8c{B)go#Aup4S!zOpgz^M~dT-Bh1s4Nv!D#D{??O8D?E1-h2}gCG;-D }K>UTT90{Gmw z8&hEUK5krqJO*&LOj%TGM23c)60iw{T#0y`f)%}%tZDJcH !Y`- zy%iR}p(@`=8xxnGZtjg!QXCaQf_+6s(oUl wKh94j5=8;t(|weEi~N z<|KM0b2*(}U}Dc}w@JZ}`Q5y$hyC4B)sn+BE0t@D#>vKz{!71Qq0GU9z@?(@OKmg% ztB{P10z3 U9*;ECG} f_wJYM* zf~^*k(i^zE`c=1&dyVb g79C2BlL8;^~|EfMO809U2G*aKJ>5t|C1 zQSnU4NA)&~L`{x_@a0(KO+cr5W>(k=hm#{_CGo@Gp&r+R%K?pk^+z_EJ#|Zu;j{qd z- S zRtoeFks2Yih$&dvwJPqD3iDGF@Ag4RQf{~%^(WpuUS6@}NKyLajwDI5eDF~m5O`FC z(gqyvG)H@}`iJQV+Nk;MZxBXmjOT?~*Rhit{%#pt2DUaBq3QCxLvNYi$5F^J*284` z6cspjzU3{*DhC6CFE^%o*lp`H4{DR*;L-Zl7loS}>|GAu9+@Lbek>G^4`3Y#Tx*s` zf3|Gi?;?5j0y3@(JQz1U)EHFPU3vX(8rjs!aJhj|S>_9d5oC(Y%eQ`m(3>;d>PjnO z33Af64?DMi;t8orbz@?+8;%1uhcwFvx1rdC+5?{DQW$KnJ-72V^>)8wuX!6jwD1Dwc*ExTeh*NxQssz8Z=}1 zTmJ{j_XX)+kEY2xNurQGF3xK%w;a7!{JmkM{_}*4=JiY=cQPUGRK6)JT;7=f8xZp) ztc9PNiHdY58jrUKeQuuN_?fQ27JFGT;uD+49PK}z$~D(K)tKtOd;u!AM#==i&)5bL zxkp0g*fc%dz+usIWmopE%28~cQrpxAGp{QMLiU4Ki&j|fO*30Q=_kX4_#GQOshCQZ z@xTt5lsJjyTooROn|V;F ol2Zx$_}}l-r)~B zRPDKS@@z`c&WZ+@Q;SrOjBUmZw14ipm0-eU3GuXhb%YaC NjA0@|29Lj1 z4<%x}jLfVoYDQAV({W#yCPFW1NJC~MgCi+n09MshCHy$>=UM0=MmFib`DFr}_}Nkv zY|T6X uT1oAGl24;mFdsPB1LyXY_2zC*8OO^3C S(?{8pSdDig)0KXdW;dWP_?Xv*E+s!ly_X^8uV^x%N{KV#{^=R%w8Ae{$xif z2_-k)-ch `c)UE-xxS&M*D!b1e?XN$6yR}YruE}~tcl0Zi=?2AI#6W*s*&}PF&YhowNq^l zEURt}EE}k$@ryJA?A|@V{u2!s=_9@&P BhbHk1f+G4%&xE!+tLm&dbFykO1UC?QjdB z@{iBuz@za9;l|Nci7d0o`b_RaNf&^7ifH9&X)q|a*zOMN!UI^lM_FT;)_eD=G#KeS z2OJR`kv^zc_>D?JCK4*$D*@9iAex<6!!!_eS|!tSJNNXBnB74|xzIc +==)avR5 zV-QHspvOwX%KhCC>d!tYvD6v02|T;-VV=|l4m50)DX;IF&`e&xWUa^dXXrNHui~5P zbz6*u0dY*y9#l57@d)D83py_eM5Rf|N>EZ)G0skko_;G?>H~Qi<=K$vgXFL?*!np% zz!GS=#jW$Qay8+RM)*rwPH%i5DT^0IESgIyT@pc$3)K?)R_ITbS-W#P;vK5L2m*k` z1bo2YZ7t7ZINc7F0_4>xTR_d~TX#LrA48&0@k=48E6EahS{mRm6@MUT(z-7iI)B5? zT5K2~{nk(9B8o1xgf P9asRHq-hvT`jyDht} z&H(JcL}4&^btzYHaHMJ)vpu=tM87JtH_G>GMNA%AW{yFY8ARdHs8LEEM@{I{F=b>j zQkRGty93=EePSsU&LrlD$6gp<#3pm3 zjUMiD``{+_mX1_hmv3o-BS1l1x8msw!_tA3X_e3ShuTV@Xp#7n^!+lT6%WysJyad8 zdskG%b6G`#CjA$V4a~A2drkj~a3+0`p84=SH7(=5#V2^7Q!UGU5zp@7LB!drTKhw0 z=c1P}r5YE4_ZNZWGGR?WoS_m=h6bps1F97$$67YO8g%_gG~Qomxt3#f@X7uqgyRdO zs(A$#9N%-WkCwiZ_)MGJSztnx7I7f{-a_f-f59-!@(}wOL^5oA$UHOnt?VFHIva!F zIeYZzIkdC9SOz6P(_R#ooP}6~@FU(J;*la_;tRbfy7;mWujX}q`}CKbrPc{6cfo8e zbnL0Tork`27qt6~#A+hF%8^0OuNby1;8gtWyPpOxTGRs@=4<~-!9+Be+s{yAQBLN{ zzQt=~C@t5@&$CQCp+obH9M;0F)&8%h!E7lx`Y9h8udW@YCbQvLW$i0;8m_E&ZEYRF zaz0rp3g5oI5HxikeJ{`A6-|a!HEfl{;3thndb1oLgQ#-0Rb4IOWR%>5Pd%xUt%e`$ zQnB}Dfz>NI8f##UOUKGq3l#$J?Rs8b<4z;$y1jVQVjQ=?+eSTvPxSfaNH<4O;PNCa zlBHDI-<<>ABnpQ4sY2rM&l*9k`5vO8o=Xna%xR_f^W#a+(iH@(750PV?W*VWpIkJC z0OMZ@Mvebvi84H@4nV~xDSeI1X@GIHQj-Xj#}8&ZR$C0Ym8Ep%cmn9pne}gb_#7xO zFGUziW_f^YOAGd+eFckdYGs}D2}=hvZH)SF=X^K@Zzp~~#{ @&L|SSs#Upb&-miJJVRgfW+|xu&D5h@g*ODT#FxR#{*^ zezFlp b&czNBztTOpl11S@ryl2u?Y9HkQa8J(?c8!wV8^tWg5{=BPR^-TY{HClF zDW4yW)3MxJUUkXJyH>R~nyl&ARnOk6vMR8=Gh>f2O>xudAny7H2@+*2cH+>(1Ns!R z^c3QMS1T<$SDO`&tyuMVN`!?6b-W9~3a#@uGOTbt)&cmC%k72TLxyT^SF8U%v#+0e ze_%(EK<0hpq+JF}P#1w&bVw~(TSTreL3Xa8o-tNV4a0s5d4&bx7ib9$ucc_J7GR7y zC?mOvi-hkbNY||A2Pc>@zUCH9nJdltP;mmM7xxkjjI9^Se}Ag2n)X0o&OhV(+5I zKQrg~C5xBp2CaNweb+4+kwp~bVmmf-%&V_6#fxhnAn@lv>gv0Koin%1CM%*J&vm~J zfZp{1Gn2mPLhHEQ3I9M%`Lkcdal+SU13I)eEIamAY@YVyRGj&^5#z1YoMgW&J?EvW zm{i5YPG(?%(a7>2FIjD9@xXmwKP}poiOkhghIG*w2)#dkOmz90ksj#r1^v;qW2IUv zX$mRnIk$KuKfD!wb##pH>oK5VtIDVGR7oHy&hn-zxIQeuT^5_sya(p4fL5N7)N#e{ zTg>r1d^nN!83P;<$v3`Cf|{8k)+zvpQy)|GfNNR9_)suttQ5h?ha}vFbMaOW&n6 zrQtgczaz0~9M`k|s =KQHQIfCXMEKsV s@Kqr>Z1sSr(LyI!dk_`2uU*_9UfE z|Gog->QxAIm6hC@8#>UO31%gTGZY!02N^Y;xL?qSL`}7f6lVPKif=373;Ff|f#75~ zsD|WWGW9b-7l(LTto*FT5vV&%GN9whShvx2M{ZPfMH$T&&dzq`5*e9q%B9&9W+WrN z8p$cS)~CU>FU3g~7?G%Gbp%+rdNpLUcjAA;-?8CZo69frNSWOmJV$)AG6lQY7zt|O zJDi)KE8)Cl9S`2vZI*gq5#~`l$+FxkKpH%AzA79k;5)nggfV5*PRe38pS{S(X7+%L z{tDx@kWeisY8Re}r&~PqcGfmz8$n K=PELl zNqp(J+)l2-{yy=)BQeAwCW_#w6YK%kpOw#z#>(}N>Dy kgXUovTG)H=G_@UWa3}8o;(#3|05jT4V+EjvTBK)hBa@6T zJ@SQ7i#Cu54QIQ`u!?|S=|8wpNnOdE*BJF}+?a7FKf&J`D=t!;jNq=cR4$ulkeLXX zQu^38U{+e}1rZ|=T4W-F3WbLf2w-sad3RE*di0E=5ru3val>-{-BNziy(a~7Q^zBN zPnl`6k&YnYgYN_lYet6_$8bgjP~ZVkpW5u9uz7S!?QhBbJO$ee%#34Xr<8^XoTH5c zRvDZ18`$~iy|UY72;cx8+ZGz8azHh)=OUa}(Yx>QH2H?U$px_LS<4iVESBk-ubYjl zNuGF1a46{tz6u*JSX~;F+JK4V%*aorig|N9X=r+DA2m(=2+*|=9Vcm%Vb@^Z#SsaE z+KqpvO_2UbSrs24(~nM@TK$;-f%OVLis0AtaXo#o`r3B`_FrB4FUV)aA?&H>tXy7C zmpDZjeWaZFu5P~g39_6-0m7IJ{{)&i@%#tQ^JilHizGznqJJB6z-KA|#KbT!FJFVr zL7+|A9WN42{tSRCxkTmQpf(4w{Y1p9V&Ei-%9vpu?$g7JmHRNv1C~mnM(l!>tmzBy z5{o#z8*GFm{Gp&=R+$Dt=j~cue0Q};VG>Ie8a9-SQ^+p-2$G-q1PJ> vZ?iQeP;H#l@}^VT8dtcmW?wMp?gkc0u}ke$rZ8 z-nnY`@j1$o#y2gSqb1~82)4Q7tc7K8hf|jh%|8)Drv-lW9_4m7Q;|ImXcG`(^{R4& z BEw}~r9AST&dzvLeT~l68&cbEXTyIxD-#BaS@MP7$FO%pQXjMHz z8pD%f^;1&w`sKP0VLc(Dr`rDRoOp5tK%%eLAg4=rzPT?YN{JJpC~(?+5`D28{&4=j z-})8g%yC~0d9MCVAnf~>Y_5#8VJYn&X&BtG4YmB?=XvKa(Fax sAKQxlo?1CIlVVDav~ zU}ROc4|++vo6Y6mXy@_R58>+jjj8%`uA8t&P12vmyOsBRrUi}c`Y4}TA5}kg)FanA zYpz*2pSM9Ptb{Q&X1JZPgKsU&;bQNe< J^gN<7r?*w#K6$re>9;&oK0IS=Zk?cCQIOP~eh&Qe(#did=ob%M1>a_Y7K-sG z-YP`G&Xzd^rEGUJ67TNgBf_RdfwZS*%zU^2j?lWa+H75rlt8)!gcN~Pb{SO{uJ?%X zL&JY5F~A?}!Tpke(w+SYx$aTJEjzBoA5OLA5}CGZtaA7*3k(W)%}4s)NO&)tgp_PL z@ys#@UgZCyup76Inn#Q}lOL;jKRs)cbnLS9YyX%D#~6$jphjJf_`c(_W9akJFJM|L zIc&;({U$?b>;4qbNvzrdbpk(BQ0-_zFK;sFU7leEMb~RzhRPBaZ~Pwlwu51n^V+f& z^zG9#3QW2lu0B=5yncL~EGXQ0rXNo}L^G#LuL#uwlRV^I`NJO!)O{tpUJ7O7-i3XL zG5F+!Bd7h5kjW+aU% u4nxAzm7uH?-xdo1{2sQ zb-77x*@0lG=hf#Idmb;Y=|F4Jc&l3gT_eOM%NE~BP^uG2eMY+Cv~iWVwpxh#X^d3~ z`u&mo+I0#{jZ|m;XjMff1#10EB-wXihvigrX^{>3X`EE5BSGV>mXAS4SAeqIYRN&T z(Si6=yYB?qbSWn>%eLunTU|3RP7b3pj@6T@G@$SgNkWpST2+#wWw!f82BhORPH c{MW@B|{L;<2v257fMwr9ioW!qRYN=28lojG0uw&?v$VBg!`t3;r<2^ z76`o@ Tk%y@dfI+}r#YGjZb{$Ga?ERBJF>{7<@z z^inR;OS;8-&5MPnG_@rD*Q~#$1lXANS}%KYkNkMwPD*S0MX$36u9a si$o)T`GT)5!c-= zIZV0HcP1K(@dnkMtA0+aqo8Qbnxnua& uIBrKi-?E~ysyMCed>=o49*9caG3Q@x*DCFqR5@yDVj_Ep2}OsX;m5VJ%)s)fvd z2kH%0IINN3{Z=IpR}@$GoK~=``x;Tkf%g{Pr@V3X$s5i|rS9?_+U>ok)?ld#t1?3; zUatCk-k7{MH3jezc~g_lh0E)!Za*umJyTN;2;w-5pn<_A+cL3#V6W7~M(;WqUwzkn zYnP&m;8>LjV6B0hKtU|r_}|jMV)Nl2;D85ZD2>|>8m^anrTeO_%jxG#V$Fpb=dq+R zS!tE19J9zlTt&!uXxP+4??v!@*qvoKf_$F0@kIzfKEHg2%D;$G=4=EHr#Z-f{1dDE zhvT0yca(SpJw;zy{eTaly^HR^wJf(kA!65XU}W4e>0O2|Y;yViV&qwGr(}35R^wQU zCN4r@ru L;hv+hlOI75}>M0|u`MkcOcXE=RZ$0khE9j_ddA0bmJ;r3$ $$r2jp|AR395O?|E4~hwUWSl;&dzm@+E@IRk1?M`_{IO$T-HvbI+N@)3U*d z`33j*cf`67%ublRufmaWecc${EnJ}1$qbw#Ydd{~u9qnfQL(_U=OU6x- aGeLcLq;+1uz@hqw z0aysC4#8}?At7tUqw{&2i#biY7gOAgxeK&UVY;ezD$b;{O&5wOsv;@ @ae9N8TVY?kO1voBX8m6uby#mh<{DjM!+fIDV{Vg&h5S(+ZvoN zOYw7*Wk_Ej%)RrutoM$&v5;avL3G-oD!{SVRXt6wD}>v#gzGSWWDJcBm^8YT+Z%|o zK-~OW6%ky7b56DWK!V13Ie&J`=(U%d_ @Lf!2_+8YEXO*H|!le*}I7A00w4kft$UNJaH{2BP z{!MMGgt>`butKj0*?k^!rPu`%!_Ls8dnFK9Sr=au085eZc TLWY#LZI!aZ&l`u dhV=zBXgr#vgRU z9k% a!9Tr`28 dStJ=J+)-ja3W9}Z~CidUL zHm9l*6X|m9^{`!uniV+LG{bG3C@kN8%xU(ZtSHwHSLI&~`D84)tTzzxD&ObEI3G*& zyU}qNc3Qr&G9*QXL*QLrIQ=C)qepz9M;EaSn^8?UzX(|rRWie7e2m##fL#pxlARN} z$y*D`{ns!>RqpkR(vmYyE$egfJ>+ekc?Pog#k1)1!y)D&rcu=qV;`xJ=)6dtk~}~_ zF1sT8eLR~8!V6~3p>OcH{R4~7^;PHGsPW{*JRK6SmJ86!^h0?z==hHykUJ9;cw~Mi z>Tbzbl((+h#my;{YTqu$<`ul8Nif|d(#wr94~nY!lQbhn-VS3tJ9j7jK{xB)Fhgb^ z5P?>ypY;`%{Ns0BO>HMst?{TUtdNa0Fs|o^nz=%{lNyMVWU9yP`C`T(e=o<)qm%7# zbz~0oqFiBk(aD`l#eYpCVHg!DIv#A4>X8t-6P &%-8X?BV&ahsvg-`-L~&XtVex0jK5s(^Mt=7px5Be4w}%`%lqi7rc=JBXs9K zlu{}mJAxCIH-|Tdv|-BZ%d0YziZR_X!xoixtF&RwxZmqcX|5=$F!?b9cakr_y`334 z+?3IS4ysj~>`~(PPcHPhiEl??)FYSIky5}PxOJ#EwDOh=4`wB10S5GjA;^A}d0cY) z;hhJn2r&){#r3{F7T^40wN9b~xLnZM>RNtWnb!Z-{Gn6P#Z0~5jma(9stkHjZ-pxy z5X(+pm+ZMK$8k{ZWzxj4bLqgEMmR1rp#&ARiZi0|?(G3~DZ7CNu$;;%!OEWjw5gyH zwJp3D^-MtOgle=iH8DsCGDrrq33fiBIHDw^=f1=b9}1a`GS}Yx;vM-(T1m^OX!i1f zEp?6|wi||V!4~c(Y>)v#r+8up7(cI}v&tWbotOcNd*W@+FyQ&nVxGWG#Mrl8VaMkh6qfjZte;`SkBX4+0jh?Tls<(yTEC8q zY7~&qKRNR{0L-yPDPWPO<$Ak7Ojshf8A_jd%SY&7ud>-?! >6)M36kxd!5_tIF0N-hbP0_1Mb>HSv!ciFd!(0P4Is8i1nJ8 zvKj?%<9+C$D5XqUSBJ|_WXOjf##-kxD12nox7KP|aVOdSyOX4VOBEESXrHg84{7y_ zb}v@z7~8%Y(1Zk#7$u6MVRi9?Q~uodVl^ez$PCcFIL}+sir_ByWGCBZ^@+F-Xe-?U z2(q{&ReR0>E^_SeCHn<@&>Hyh8$4xfR>Ap*^y@hPw^^dOBCI9MK5jR 9u6ok@bPZ~YyA2xVL|E-#+w#&(%7)g_ax#1B&qn+&)`h{@=6rM=S!9A!VP zAr6Kgl0;7>Ye2=2-*kNVGLd_gn@HV4K-!!T&4JcEWpaq$Yzy<(aF-BuC1?g=unaT1 zlc7!=;5sLE($K(f{Ki79Zz$QT25wdE5yh&<_%kOa(EGO6 NUBi zq;`-VZ9sa~S8xxd_^fK2i)tvF3kCRqv0@CtQ>F$hw>+?!*MH&3Q#wDCWu7rUv_w^A zf@{~GJeXdR_p~=%YtOrH`S_3W+lS-1cuB7rjUX52f4zmS567uiUV5+31-$?HsNAmK z0Q8TEY}XYyMa)?^ke~c;dVk->kUt0Kx8e1#+npLGaN^QItDgH_#ug`o*ZKs8HW8(k z{Wxz?>$~~iqtG%sAo_Mkp=@la)1p`ydE#l&B7KgN;lrx1$X3DAhi6BXOWYBv28VCW z`g_AT-}Ci0#{-UhWIm1HF*exop65qP?8Zb=4+EMd%(pu(IPDpxN^pt<%Y=ITO%2yI zwyCj K7h-AfXcNlqc+``m=trcv>h>UDDV>hSMw7wms|O z(?9#2L%SG5+<;p *8<}sVOt#Oi79~AAZJ8IdB-10r>Q6Y%o2S zE7Ez_{ 3_IV+?Qw>R=H4CC z2GXI#dbUh4SqynyEF520&>g7%fjVJ8FuRS~E~cH-5{-kj`RsURv^`eO>>pc )d(_1WRTW@JZ7#KSoZNn?V*;t8n_Mk~E*f?=RxSCWKu-QLF9RGvuBJ zsoUxxb1TFYu5QHbQF7PHnVw^JT5={7+X=<)@`Gu$Q*>u}65gfBtjb26!Wp4%mH!G& z#uu)qI?p=9y>_Fm2;2)T)2b7W#974o2>$al86Y?(n*7g3TdofNyI@N1#>&mSr9T?9 z-OE!*u)}POYQB}psvEc7eSE#q)4@vxIoyNE>k0L1R9prqDTxqotW6DnTj(@B&Ff?3 zQoZ&>@~SLT8}s7wEG}32P*u`c@Av640$uLVge8w8N%Zi_!7O7MQ#rJ2fAfiGr|hTk zOwC->9Dr!C??l-rX&0TiO=Kl&Q|D-Z-I1x)$66yXF*B~zL3i%! XFm zPigCYh}6RMC5Z|_YOnO{0SD?z;%y{M^8UYx{M{Lpv*OTy+imerNm{JNo>tcj%BV&@ zGGTMY{Bf0JsR=L?PnZArTK7w%(Ig7w7QzSv%^d`&1j49MPX|~{EzC?Wg&Rl`-W(fz zMLBQ70Z}4NNKO3l4U0C;2&95_UGAv-xy(R@edX2>tlfg%bDispiy0{4F<*LsC`npY zrgtv%@Ymy4Q<0$iu7`Fn87GDvH$7fB@=4V^leE@1Ag2~$P*JOe;ePCy|KC8gZ)l{b zT< -S_#mit|LV)Oc83F18Tv21Lj~ zA`afl`vY)QgG+^<0EBj=n!mclqnkhb8=<=J2N|CT&R<)4@CfcFym#y+!sMm;gl(dD z!@}* dY z$}*))8WHw3;C*6AYy+Q3kqCX4un&YFvUdD#Fjx=pHW_wO;ga7zqsG-xN{RmY8aW|} z2Is*e)+VynF5-ms15Nos5Y9Ld68;3gJkx&G{?~^BZ -0WZtEdyRe_ntCGP*YkZp~;y;_%$>{u%4U%cq zu+xkSnnqlj714&VHTRRJ-3E-t;^&*5*Oa|Wupi#>|LA`|(dK{&SIE<`=b6al4E`RZ zyd#&HdeLUbnikpFu??N(0S$hG1Y5lOi+~o40256?uP{~pjzRUy%`v#jy>ls&|3_N< zczmMd5o6{kXfDvcs5cy*!k#Oy4$SotoT-!ewxy$Mgkm;;~iBx014zon+yW<#X z vZY98FC5v+sX=S2?^Khxb!fE2f5oLq`faPtF!^TVU?= z;uL(&uO?Kn54(>w4OcBjE|G=s85!13+jop)z}^U5oT(A>E5f4R$ZN3@3-5ubMk*Gv zVU Ti^FEikI* z7rfit@71oIZ46uy@qOXNSl;)83GTzPMVN>yg8L5tn{WIE2V S=s&Q%VDWcYk{rGViytCx6}BJLTm4v# z5vve(N4lCPoNmNgqr+V6P0}7fCll&@j5Un?03yJFSJL*w;ckizFq*OMr1oMjx+GjX z{^2;8KUwY2czy~$MFP2k2|bx`?+Vf$WmaqY0K0d)vvSvHBDx Km047qX_!yX5H+0pv_!`g=p ziHO_v1AZInB1yvW2 H|R^ZVSyXvcjzt|eEcuVcLyx2wZ`=x&D5 zodU&K;19o>7%PmYJEil&i#pbf<{;#?d_E5Es%YFBN&NNhf2oMMENUN?testP>vN7< z?b*cyNBgI(BrW)QaR)Vy1TqLG4v=cM`ztWhe~QQDb|J6pJ-L^jSss2FdBg4|=egn! zW94p($Av+=`9r-1AozlW{7fh~vKv!}xFC_OqLSq(VAOP>E@~IoI-u^M7kZC0VLBRj z0DJBvs(D&t>Q6ps$5-}pqWH7RN9Sdotm)t5^YckJWlJJf8*_-=GR~jAyi79N%fO6T zi?gGq^2_piH2m;mGS zPl~|>4@wYAY{a58e?zdywdtU~?A1lj9vVO*KW(>n`!(<=p0+T4mD`72d8^s+#@N() z ZbgN(_7`3 zV6R(>*mq^Jm5t*&7oH}KBp)E-@DuFdl8$Sa5L1bATIEqP^=KWZv kB3@A;@wE4SM^y{@fUjvMAa1=-SYJGO25c3BJL_=-+Bi Q$96%%oqncvMk!(jcC}X65gnGJrAz?BSf8KxTLi;-`7q$ zZQJc!qbJYg=#zRMDiMTxstY2Mhdr=B`1ZeibJ*o2W8Sv1^}<_``Sah(jECXh_t>Eb zUSd`57i~sY{COeHEzn>; iz5aV<0#>!A&pjiYv29^IzX z&NNx-ujq0l4k+A)J)Ch1T2sh^)Yc!Ae=W<$xWRtQT9^4xQ3IuEN-s)v{890>4S2QO zjYK-$4zg3u*Gk?x2s7T9^HBx& YlF?seR4ESo 4oRK8JHP!PjHj$w&QZpTAy)U(H)K|>lUip z4jCqz+rg3iD30$R_(V3zvKpywPP(tdvif0`gCwwsgYAWkz9&4u<9to+QQqAr;p*5| z9o+wG09U!rh$k5VXyp*HogLPFk>)ugGbvYd9~(vWO_AFz9uhZ -OX0J7&K@$X?s zV5XR(MQSXi#8kG#uMEJ^6%Y8w+@3&(xylPIhuet|T~L=GTHu V|LF8j;#3_L6 zM{+r)> H8&&iB!7M+ccPot9S-Z$H_i*@HNZGPL60&_q^+$aW H|`^Tm6DId+bU1=Y &*m@y+tDOvnMrN$^mF--o)@%Ft#C z<#Okw62&phHBoTd@MFOb4(bi_Z)3f$$sN?d3~N6S=QQ&G#cAHxR#6WD7%0G;qBdv7 zPchFjepxCGO_?!#Sku&S@Jay>00O@rD@oCVdHY1U2?q8%9LMYZQYb?16S`KWJ|K>| zs?*^vwngZ!O$2|w9rl3N{#c3Tn?d4~3yW^8cm+R(n*d4Zj~tGh)sIKlu$f}F|6+c+ z&|bYCuHoK~jc3*^#32>i!NA5(I}=zL107jkz?|$nfA`10&?Rp&Fw-cHF&MZuIcCni z8;REw7H5JbFC~S+Zqd#Cb6)=`CtGo@<+P?@@bu{Nw6)MF*2x$s;wiY+A(G|VvO?Lq zQomBsP(NSr_HWBSW@*U)8f2;9U%~I-5z*;_@va0@n=9{=t@a<%lqCG=c&^7@7Jf0R zW<5+MJauGwpB<;maM>T?U~qd2le~~cQ6Hi8Sf2yRf&FGWChQ8|_Rahp1xqj-yG8^X zsTD^l7dD8Ul*ri>dE>6DSvwJo^=_Dw#tp-PLU&}m#@2mD9h&Ip8!T-xS@k 3%doj5v^e-6r^UcBqggxuYOMaBaKd4TF^%PO}fRUJRAA3Et$_Wbr|?wV{ {2t04nSgMf?H|B1IKH0}=WOTV E{%1aVrx`ZAXl*Jbf^!FFyAT zk-7O$L-Y|7SHGF{?-bgRy3<`d>D}#Sz(rsX*^vTCqOcKu^DKjT1eChw)ubRhS$~v8 z{tauZrT8$RRs~9=ZO^A&g~0jVmx#%4n(C!Hp8)Oi-A>hAT~Iy^*54gA>&9w6*~uR3 z3Hgk}*7 6M_;I~j*DU@j{O0j#j%Jo=J&ec+@ zn^3*p5ND1a?%dAUP)=dA995Yf3dtbECH )R8wH9T;eRh=MR>|MbhBTeQ z#%Pa|NKQa!`6FDroJZmoc>Gy(CChWG+V6kNpjMIW)TO9=D-*6hK1L^0L5w8N63W%5 zOR N >h=1aJ;0LpcFkZRU^VPZ`HI?aU%qcOmf2QCxCaskQ&hINnXzgM z#Q}E#zp&ih?i;ahaNNEdZQ6U59hj>E&8t)!ldh0=a|wfl9kLb2PG|x_DHx#}rljsP z{@1<0u&|$3aFpvl$kraSML-vvpBUY{jT=CuukB^u+>0 q-L_N8_nVw%GOdXq?ubFm8u3M^^F_Npu~`N;X`9CgT4UR8!Fo z3^9K(XIU}-(mQdr!oD4L2HrCpYeVg;9La$=)RV=OX7Ll|Fv&iRi?rt!?B@HsFI$Ok z^I)yAiuEgqa>P>oisUmfy9kX)(mx!~51CeWRPbX@Hom&>+hs{Z`;KI1DN15K(=BC_TiQ=`CQ81b+RJNhgY&+g$PT zoGIofeq+O7@xrPb-KbH0I@54_`xKPUl68SLg7_7i?qTp92qU@=U<>~&0PYC{9t*D3 z+A%W=AcL51X*sH-2*GdgDgCAN?5_d;z(HU&sVn!Xwa!9EGg}K|7N#bW5%{v~UzWPM zh#;ztPL!>)Ef7cW1mpLr{^W!4>$kcwA~g=AQPIER#;wnvEu~iCYRNKakGa(EcA1{v zlgv<5x)zSlW>$osxz|1MZ)|h3agf?HeeA@jMfm(x9OCA2y<6&Wej<8< U|Jxg#mRPKoY0LKuDnp7JgE{AR>RaLUE! zi^D?x(6CZ@+aj%SfL!o(uuKRwJ57OvsUh|@IRiVqTHpU+=_&)FY@06KNOwsiAtfcT zlpx*R-QC?K-OUn;bV|ok(ka~y(j5y6eCzXmzwdu@UlZrdnHe}Ay0-5u?%iAS8GOnr z&cc>pD`P25*oI$Ks3dQ(6BpoKH-6W1gU)~_tfhb_wyvlv^|3>O8;MBR6|(K&UEwn# z&kHvd_EZy}>&IWn#I30?o6)XY#!D=pbPlWR?PgmX`PbqNIZ7MXvmFi?0%cTjW4H%g zqY#`>Q9B9Tw3+2OJe{nM5H4?_o?82ue1}Nrj~?PgQ*25P%p9KT!@4}nDWyPl{P1Ej zu3X7MYAQSe4O&P5laHeFeOh!G!bd*3=@edL+$IR{Jq^f%TBe+SPs`Vm@+KxMW&=#m z#4SJN-hpy-pGDi80_PDJZ7xa$LmPz!fP|rd5lOqR*Qf63K=Ui~Vq6rLpz}`^Sem;{ zy7PM6ZgJ1cD--hCLRFO=4>w*rkFd at}C0 zQYsF(_Yf*lA4e9ZkB`4sh^{^J0P6w9T=#RnwVU+lgf`QTNG23nj#BdHgQU>$ECdpt zPLP)4CZ5C5_rgLB4Iqv!3O@cHbndQnmY7QdVHXXhyd=woQUjBCEyI_34C9}piZS*) zyja=7C;dmKq=~M1(Qn#bco~!Ws8~W~_em5YL>Ur)$^Qn256Hi Z&al0w&|Jvpsis}tO_!aCD8l!ZJb(g;q;Jf(TmFC63>O!-2 zC@|xGD>@}<5hwK=^r sCEJ#}Fw#AmJ+8JVC%YW7iW8gy4l T z)4W$SNh!*ls~Ozfd`iC7BHj=bb0-SkZVIcx>Ts0>Ixc=JC8bI~P4qR +0gE?FJkZKb zjHll*chmX~j%R2@bQ}kAW2|s=LTO|Sk03JHBb&k`&j^;`1#q*$jl{2D5- Vad4Ar7Lalk6Y8cNf^e$Ve976$QO-2hxvSdUO6PUm_CPB+0?Jpa BD-!ZO$4uyJAHKR^wC@} z>wGFGRAL??yV$|}9OM15FX+~vf`%Na@uE2B!q^< l=#~?&aSgd>d?|Iys@{L+R(THf^8CXhTWg_ zkMu+nW_ EX%PG&@{~!?v*TQWuLjUe+UbC9 ^E{_>8j;mYGC$(1IXu)`Twt!Ynq2 zL ?nvHp0QTa3-W d_je3;D6 zKYAGXU|#-LC`r7?gd4V8 kD+G|H
?kKpjFbUy*g|{gC3waT{m@#H=1#mp+FR74S z;PjxC@gOe@_IK*V%qV-d!iU0$66s2|;&)fe0h1|_gB>Z$w-enlvTF|$lR>RF6vB;} zLPU#IU42m@UjeVvPe`xsmQm*j-6n%vYs#${i+ft{x3V)XF9!L!MSofm>@1`xQR9 wiJCtn45>6=EgsSIO@P0 zu8bvk@}<>#*Fui-QpgT0r*yd(IU0*;9&$u2Hh8p?_^6h1-ckmQ8s_9eK!H17PHk5s zGYZU5#)ez>tQz)jxE}lEm&X3&PZ|Z3Y)u5h^f QHbV4CKr$6!sjuNf&n7n7vF%sr?ke=9yqGB}OasKI6_| z ^@F #;@AnnKlDEP?9Dsa-kK8VN@J?Sul?p4JqAJcy3X>!|YDnP_BW` 4}5pwmmle*CPoa4o1&Vx-{ zJ`uy~K9)|@A%|bMgR~8LG5rXixV;78FTP41_Z1acPHi8k9aY&k1zH*zu{fjj6h-i$ z2T1oxxFz0`*IxE(@c_Ke;~OE|P6a?UE)ZAkxYumlm#lPy?+?0^b`WG*c>XmX7|`>U zK {?j0-%*P&>JT1Hx+ENk(2A_`)O=U9 zf3J`p>+p?eHSQ(BzHa-?pm$0zx}JW#v(3|>pY!f0on{H44Sm9jaKZS7+z2ez^PFJb zZ3Gj`B`3n33gjdUpRzdg#zL&T%HX0~DI)2Brw8EuW>=>Fq!MtFaWZhRNZ}(kqD>k~ z5wqPKKpbC&ZGB$5ltJ!T;MQyi8^5-szyo5&I?c0$uGc>ko*+oUG5}RLt~XayG<;-{ z>y{1`8S1h9(-@+r9;g}#WPy1S%3!sIOQ2N#v!}Q>HL3?k$P&%3H1hW21|8QefV3J~ zE @N7IiRII{Jc ?rbFRY_2kg4FEt8UA;?h7t^ zPHBtVDhe9|kJfB{S2pLr?jDn&SGa0U>VLcLR7D}A9RyjGkvSFk6TAib#~PvuvG!-u z15(+5y9dZQH00o+0&XJxBM213_)retSbQ~PdM{~@ofnX@>-#}Z$M{P4J)2ySM*D}4 zc7!&QN 1aIv9L=YaX@;OS{Xw*Ak!3CDxH`30uc3Gzfe29C6h zi<;t+Nu0SJLsA!pkLn|V{p@&iK$6^nmQRoxR_=SR;_bNx#zUc8HFf8&cn|@?*32wf zRh2;1=uP-bQY@Pb1Yotb%8$bJC<~=Y)ZpKYVx7eDvaitRi_jhVdw&X6fvsHR5M-pa z*$PE {c6oJ3s8#F)2yd(!&LkLUtDal+$|%Ujc13;9vGdaCuH zhEVZRF@azL{68>(+b?k3>*OrkCUVYnUlab4G?g$P@FK5a1aYGIN4R7|ubodiXPJi2 zrsjpCww>KmVh`>CrmuL 03LXcY$0D9-d=2E<~>C*C{U6d^{#K=&68Bs;unooV-}L6!2%>M`1?W z6REX@)famNBU9#Mhu)7QYvVn5JzHar?yv7?t#Te&acV6}$s01$S{{ObNe8BMopvVQ zS)e0Z=nWE^`3p15MgYqcxe72M#MznX8HMZoB;*eJNISXI)aU#@ohiw?(u%3KsfsWr zY$ErqivYc#A0e7|znaY+lZlkCxm_R#)G&r{eI>m_H77M)&%>JZRyGLDPObI{YI?^g zIICRlr )l=ng(Nx7ZuhyLCj-<8aQJ}8+iKq8^!g|f}S_@FGFxZ zIX$3!fE|t4AH=99NL)2IRuLN8kp$odp}~z&e@IM+7>NK% M z#!1-fFEFER(8OGdcjtz1t0ZX huQS9-4ojm826&j+EFKf zr7N+A7^6cAzQAMW#hH#;yRK&ciK;}?8UPl^!57VM5(2wkRKt=19Lbn+xdg)80H7#L zs0I21Gg%h%bL_@stbt~wxQ**(`U{y|DXNeg1icmtyCgG}l8oZ?R%G+`D?AWub`YHe z#cFy=s$ukJ4zd%ixwO2I`@K)~Nr~9A6Owx_@&1Y&08ux@0i#K5f~bEG ?ap((;jeA0W`)qw)xb5;RIo_04)^K9Ed z#OJcljpb_`pmxfyqp*1mO9(o#r5VN(hPBD}7BF6T_WMq$tbp>tsc +to+qwQlVR1AeXtq1M)33wZ+^Fh z+^a4$zdIHBxz}4qbq|bP=XL0Z>A?KnP>i~%kh}*C?L!=tYlMrx#MWm0UgqKQeP?3k zPy5-61Jk>45|i*ovV&{?Sx#X)q)$ah31-r7NKqh`n#FRqiZSNRp@i#zN!)hYGe6JQ z^J$+htkXZ)EVvu&zex}e_YK4BT#2^2CId>Yfl!uzsK;iVq#G2#cdmDtv?2It3PIiE z_FUtfj#*T67@|@H2~RKkd7qT`ZWwvKn+fEc^JUe=)#+6owCnfiL4{!?TtGO%x;J`M zkh>`7xyY}49uq&;p8Iptz2WCe^RMP~V-3XC&ARd)2d^HDdK{Lkf`$&4VGj0 g4Os!>$NRKdA2B??fc zRfC1m`Ir_?v@zyO=Z5KBDy_7VNJ)610U^_5>iB)vXU15giF$`&eSXmQ>!4YfCbAX3 z(D}0tS~+GtA;}B5tuX|Z6twG9TSzE%mNR280I?;lK*f*G%ed65R~)xBJv)|ZfnIUs zZT|+|Rb}H8;uo?2{FzIMZY*6_`b%aOjy;9p0(O@dn=NnrDwB>LC*O1=_s1R|jL00N zE&mUD8Z@+W)O{FD<*z2zc>&0 6mw}8P=L*9oN2dI_T+6Jg+T38iZ0Bf%jjMM+L{MWPv+Uo(nwe4r;s4 z_YgC +_KAl*xQ1~WgUFPY;`x4!#u zim_d}1+a_GtdI7;0n~bS3kU$Tm@hJ!K>R&MDQdh|T&_Cg9!Gj%$HyuHxgKp&yX@qJ z>ug*ItB`RZO3&1+tMHn$eoHT+oh;e*o$wD%cGwsY%naJsk9k>Fi^vtRi1$eY7c}G> zA Z8!;2k7mtx@*eII#mNDigF7 drh-+63bsoCub*`G?|J46uD6I&YIdX6spE4vT%TkZm?t#@DB%3Ij4PYaL}JDN9d zpxEwatM@YpA@nFr_HufmQ3+cKdR^yVODUq|Z)nSn%%& PRd}yO`(*D2z&j^^&MELz5RXF&mfCFT9KFfDaLD zF^76e9qtrR1gYQLTvz`Y{xPCg(>~ax$I^q4OK^soa2)NXlODOWe&%txlzVA3TuA_= z+-RrJI3qFr b@M241({S38hx$Y0#$lZQT)lm(_w@h^@D1{fg!&s
t`k=7Fl&FD`4YP!KwFTCB|*OTyop*YbJ}DJ zt0JRY^pa&XgQqTuSkwJmQrP!l`;*u-e%{LW-p)mhnoS^8<*#J_u?yd5 =}< zrfhaLnlzE>UOqSeirSd!``m4-`@!-tcE-PZCi&>^c*^%=o=K^(ieFs|l&~TzJV x zI@B*CV{ftVm!o>^u57Bk%bwm^2HwAG;+v @laP}(W8>F8BW@|FUI1$A%C9IVuH zLA`@X8FQ!-k1-XRDB1_VT?1B73B1PJ=4H6ZHde6l7$5$T^sVm6HcPlDK3Fs0ZNfzr zSF1eARLYQxTirf6(3~i{{7@?zXz4i*Ieu!Eg;5kd^=oZn*IsXDY4GkHWL5Bwq834a zztwzqDEo-GWhRE@P2)|g9nU?3VH@Edu7$?}(;ZKT>KWr-iU6Q4M7_d _0ua<^2M8|rBrvyBI8aAcjOw_@+s66Cs)R9feTBUD|=%JQ$c1~)QhL1v`g%8l_ z(%pQgfukZu&GtVBkKS@(ae~`Um1m;Kh0Ce)Vhc_pm+WtQAb+P2_h+%$d_@Q!96OXw zViUxdFyG75z f#V3zNGYK&CFQlGrIsL5|nw19j)-* Y$Sw|&b2Mb>&0p6^EWasd4NXC1X zG5ZcPcF=$&Lb(g I6o z_rF<<7VuJBkYT&!)81ylDtpvd^IJRDrMjP*%xj=aF5^U7CY)@q@tBa~o`t$a`L)%a z9QF|uRkGN4hb>Di8s{UaKPH@hik!%{*76)4KQLK^rH?TZ2yvJt_iJAN1WMq_LOW{m zBHI9JQ3*ql{Pl?rEQaWgAx4m8tD;D4G-#=0ravr}=o6fDZu;E!qUuZVH7;BLhGgRe z*fY_eA(M(OtIquFpyJ2s+8;-AJaabppJ$g^zv6Xx6j?hdkt4AhrzWnfXG^oYi@rKJ z@eduQp3NqVwfCd2 NqaW!8gvJrOFeT+OdS!*u#?IB;I!gG1%gq1Cq% zb_DU`%8~L5w+Goe@GScW_#9?dH&{rLm_)(uV$3>2>l^w?-9s9uku?c2-h-FCLm !qEWQ>ddZvDi7;BMwSGVQN7u^NE-bXBB1TL9Z_C9u)ZoRsPa$}wqF1}Pc zT(xq2K2SMyJ@q8~CE^5G5|rD&fF@}))3^nmQb%0wC0#dimQ(Agm6#F4Aw#7MWBPcb zA5nvt@nZ*k74T&OidF`0jHHTeP~nHxjn#e==Rgqo>$r2OzadW0T=%U_t@J9cf5okO z_2_||Aj(7s@fJ^e^|ZrvHrfe*3AwNq_OUp3apW`ceYK{>yu27So`~QaHWSl#;_DYX zVs|pmYj-VnJscbRChgdKz6R2ekguR@M0 f9D9P9hK0K8AK?7R)> zB@A(KDWwBLSN{l??XdHb<` 4J?__w79*ud~(riLQrJ2hGXNa +_F~=WCZHG|e<>xRZ3 jR*3Ov;0&}h>VNgb=z z^2hbpyKEuQkln$@$6tf{uevSATpuS-EK~~pZA$xnrQT|WqDa#Qk8AW0=j7eilN&Nc zZl?EP)|H!&7=b0?!2r|pfp^5&TJ)AcEcZEasl>{0ZrX4**%Z|_M{_geV^QPh85f)L zCT(qR+EO3a!QWItZ~eq665bbUf#5E$x6PVOiq&iP#p#G=to4@_7G-*a)X)_Ll*JmS zHO}|Q7mP8s*!f8V*RBU%#d~;3@JZK2e2a^3LbT+8^VlMZPVw*SSB^6=R+s8+3ixE5 zb9SBFhI40{EG>Dy_}yk5QJZVt$omAo&;e)iK?VVDm7ZZ8egKv@r)uAu^T^t;D+6q= z4&7JwWU9C(QlE1a5sl+1YigGxNGUUDV=7597= S^!_?K?ED!S!}xt^TH^Xo(s zVgq8x3j3!}{RyuAdppzT7V}G%r3_)y#fjP+Gq1Dt=JoYUg*wP4ZbR-iCAd>H@GGmu zmgn7awjp*wRm6aQuFCyV^>wy?AoH})4z)T9FeWrKR89o&pC6!9P>0uA^Wd$sqI7Gg zq74}i;v~zPk7d8dhTe87T5m6;%o;#JYx zrS <$ mIAJW+92=!34At+%J~+eEj- zLfi)|j9R|N-iZ>}V=)6eaY<9(uW3cY?|M&a+0;^g78Y(Hm0|fJM&OHGKK(I;+muMj z$kD-;igfC1z#J>HET9!PY9)#{h}!YhrmIuJHMxq* HyhmRpchJM66C9t$w+IqNtch0VFRjZN+qd*+RTHr(Y4rK?NVWSi|NoAH%KaQFYCP z-CWGf$Le_F?lR!lAB6>;*%}=>DTK-=YLTNkbbD^$eY)R*m=0^j#U*yuF+mo?wZX0Z z3fDmv5Yrg!0zsmMo=hKHoEf<<=1))A`hm=mh0< Yh3s zU#PcmC+m8OQ^cs#u&rlqCC&!qeF{YUsd&^3ZqmY5b)MNct9crVevBGlOTE7q5Q7+x z-($b*UnA}_qw~+JU?!)Qu+b<~jp2jkp>1gb)Y^+NW>Il5v0nO|?9fngX8XV4e04dZ zd}PZpmTf8gYVbp{Ig-!774jb#P?5>*)^gXoA9=XpoLELd4uL<;VF>9=rFfe)j&_$U zM++jx>u>HeNTCaIzQzXM1+czCrEPplU1%e&lAs;Q2jK2RKa?$#l9`0KT&-fn35Q;O z<^({VJ>uD!isVcXU(QS0Q62&? VvNHrxJNKIE>7WZWeVlWJeL;sz%%3VkNBZ$-;s>0 zl;ye_U(61@=Z>ATcQLC)GKW~HdvMALsQ!5**m;;c&oLq`vHC8abCTsX4~Vp8aMsct zI}E?xt#x%%F3!_0kKachb&Iw^Bp&0I6G )FbR%$ zLmm53lY_YmGui^CL^}hXd&t3dX*`d(yU?qQ$Ri*5R^IsM6|12V {RigM Ha#UEq*iZ+1@`JkjnnZ=`; zq4XQA?P9w*hVa$MggK6@670F`jcWiW1cq&Lw9hJPF+)BT8P)vqDO(&8w~Qc^l3%^v zeL5ll+oVd5Oj@-tbuc(4^42U0(^#BbO+rNm8!#x~$HcnK<&rwUK9AqeiTyFxXdYo{ zJ-X9vPY(k;)sDxnu BGH7~R&~v9s90(uOEC1TsabtI_Ua+(`Ep@oap(2d%*8*tIM#}_A0z1M3m8^J zg52i`a&(h;x;E`EE~D?eJ<&l{fEWdmVl`qcE jBc~`obb-F2Ln9 zam|F<;2a;1*ZVEWlk^Zp^NS^y+*AB-0QUJ1R3r@4sABezR#MeOxC$T!yZJ 0oM8+sZZKWemg2iIx~v>u$O z8%+rGv+u|gv)9v#V|6J(Yt_LOl7XNt+(>=QGvXPl8M+Jv(DWzbUE=m^yQfZ}9$Mks z8J=xN@LYa4`YMc%f0!Bkm%V3i>MK~D2Ed7mJqovB1Jp< @Ywr#5HD?4Xhsx(cp)-PWVUaZwyRN1G8sxxp_n_qRlg|x_BV0AvWXz;9 zWc_um0<@(L*&~*}H&h2&?{>N=`D*Ce#av#K}!ia^$%Uh!bMI~>-NL28Qmx6)^k z54lg`!kh1>@i55K5PT4?m*M*sIA+cM9&hF(?*|fPPFaeHnKE va3LvlIIp%%vK>(8rzH zY}Y`+dEp{E1rXq&LtLDS- 6|dQiPs$r7-$F< zFtI!wel`-Z<6%Z8>}BzP7l=e uw%*~FtaQ_7md)KNT)EX>b!cNZiAAL^q`2J5OY9Q6$h&Ao z={>+CN%D{ qkOHrAh*GCb&_9! zrRV@yIU&i!Ny2nK3sa|K^WI}v`Gl8l9at25bgTFb`?)brt}rS7&8m|#1!2=vARW}& zk5G)GGvUTyj7Rql6ck5OW_bXAL ^@RfHlqkE`Qgn*i~Pp060qd 10uvf7bUu#YameB4Dc zp%Nt{jPSxRRB1g<*_Z3R(_1b7Ql?P{>UJD?&*(Q3#A`G8`i2|J;0P6uS!N!m46TNA zanlAkuBEs-3qP31AFe+*0Kw}xDVtC^bwmso-9q=`67j+1u5lm>p>s<8R3`SL?4X5# z QKv@)H|X_9&<8!T#BY_m<>ajzRU|2-A&%W@qKu(x(lEFr!{x zhL3rhtq;o1?2#V5U%#6HL;=R)ZY#rwbXVLn0WFT@70M~eq{JK2=o1}TA*-{tFYdH} zS^Ho|%zqVSN;%Zj3IB=T=}i9B^V4 6WBpTgH? zoD3u6C_dOn5%?*t8y1X>-&<$z1GA~$2*wkmaLV6BAW@}Xkb|%OjlXdDoPGjjA@@-r z=(W)a_w@N{PO!2hap%fj6iHZ$B4v~4> >lzWfuSdvLZo24-s@IZGX-LAzd0ksmwZc7l?(&fTG7 z8#|065%RxOWWLxMTei|9274Iq{g{0^k`0s!j?M^sVM#M*hH@*k>iEAx#Z!{-F4MxU zNN65Uv+GL5;}L{| r6P>q`Gudx-NtY?VSVx}>-|Qf z&CMN!%7