Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various fixes #95

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ elseif (BACKEND STREQUAL "hip_amd")
set(USE_HIP ON)
set(USE_HIP_AMD ON)
message(STATUS "Using HIP backend for AMD GPUs. (Experimental!)")
elseif (BACKEND STREQUAL "cpu")
message(STATUS "Using CPU only (no GPU backend). Experimental!")
add_definitions(-DUSE_CPU_ONLY)
#set(USE_CPU_ONLY ON)
else()
message(STATUS "Backend ${BACKEND} is not known!")
endif()
Expand All @@ -49,6 +53,8 @@ elseif (USE_HIP_NVIDIA)
# This will hopefully work in the future
# project(SIMULATeQCD LANGUAGES CXX HIP)
project(SIMULATeQCD LANGUAGES CXX CUDA)
else()
project(SIMULATeQCD LANGUAGES CXX)
endif()

set(CMAKE_CXX_STANDARD 17)
Expand Down Expand Up @@ -109,39 +115,42 @@ elseif (USE_HIP_AMD)
set(CMAKE_HIP_ARCHITECTURES "${ARCHITECTURE}")
endif()

if(NOT ARCHITECTURE)
if(NOT ARCHITECTURE AND NOT BACKEND MATCHES "cpu")
message(FATAL_ERROR "No GPU architecture set!")
endif()

set(USE_GPU_AWARE_MPI OFF CACHE BOOL "Set to ON to build gpu-aware MPI code (default = OFF)")
if (USE_GPU_AWARE_MPI)
add_definitions(-DUSE_GPU_AWARE_MPI)
endif()
set(USE_GPU_P2P ON CACHE BOOL "Set to ON to build with GPU Direct P2P (default = ON)")
if (USE_GPU_P2P)
if(USE_HIP)
message(FATAL_ERROR "GPU_P2P is not supported by HIP!")
if(NOT BACKEND MATCHES "cpu")
if (USE_GPU_AWARE_MPI)
add_definitions(-DUSE_GPU_AWARE_MPI)
endif()
if (USE_GPU_P2P)
if(USE_HIP)
message(FATAL_ERROR "GPU_P2P is not supported by HIP!")
endif()
add_definitions(-DUSE_GPU_P2P)
endif()
if (USE_GPU_P2P AND NOT USE_GPU_AWARE_MPI)
message(FATAL_ERROR "USE_GPU_P2P only works with USE_GPU_AWARE_MPI")
endif()
add_definitions(-DUSE_GPU_P2P)
else()
set(USE_GPU_AWARE_MPI OFF)
set(USE_GPU_P2P OFF)
endif()


# Additional compiler flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC")
if (USE_CUDA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC")
set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} -O3 -Wno-deprecated-gpu-targets --std=c++17 -arch=sm_${ARCHITECTURE} -Xcudafe --display_error_number -prec-div=true -prec-sqrt=true")

elseif (USE_HIP_AMD)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC -fgpu-rdc")

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fgpu-rdc")
add_definitions(-D__HIP_PLATFORM_AMD__)
set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS} "-O3 -std=c++17 -D__HIP_PLATFORM_AMD__ --amdgpu-target=${ARCHITECTURE} -fgpu-rdc")
set(CMAKE_EXE_LINKER_FLAGS "-O3 -fgpu-rdc --hip-link")

elseif (USE_HIP_NVIDIA)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC")

add_definitions(-D__HIP_PLATFORM_NVIDIA__ -D__HIP_PLATFORM_NVCC__)
set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} -O3 -std=c++17 -D__HIP_PLATFORM_NVCC__ -D__HIP_PLATFORM_NVIDIA__ --generate-code arch=compute_${ARCHITECTURE},code=sm_${ARCHITECTURE} --generate-code arch=compute_${ARCHITECTURE},code=compute_${ARCHITECTURE} -rdc=true")
set(NVCC_LINK_FLAGS "${NVCC_LINK_FLAGS} -rdc=true")
Expand Down Expand Up @@ -299,6 +308,7 @@ if (USE_CUDA)
add_library(CodeBase OBJECT ${SOURCE_FILES_BASE})
target_compile_definitions(CodeBase PRIVATE
ARCHITECTURE=${ARCHITECTURE}
GIT_HASH="${GIT_HASH}"
SINGLEPREC=1 DOUBLEPREC=1
COMP_R18=1 COMP_U3R14=1 COMP_R14=1 COMP_R12=1 COMP_STAGGR12=1
${ALL_HALODEPTHS}
Expand Down Expand Up @@ -361,6 +371,14 @@ FUNCTION(add_SIMULATeQCD_executable TARGET)
set_target_properties(${TARGET} _${TARGET}
PROPERTIES
COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}" HIP_SEPARABLE_COMPILATION ON CUDA_SEPARABLE_COMPILATION ON LINKER_LANGUAGE CUDA CUDA_RESOLVE_DEVICE_SYMBOLS ON)
else()
add_executable(${TARGET} ${ARGN} ${SOURCE_FILES_BASE}) # single target
add_executable(_${TARGET} ${ARGN}) # compound target (e.g. in "tests", "applications")

target_link_libraries("_${TARGET}" CodeBase)
set_target_properties(${TARGET} _${TARGET}
PROPERTIES
COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}")
endif()
ENDFUNCTION()

Expand Down Expand Up @@ -645,6 +663,10 @@ elseif (USE_HIP_NVIDIA)
hip_add_executable(_SimpleFunctorTest src/testing/main_SimpleFunctorTest.cpp src/base/communication/communicationBase_mpi.cpp src/base/gutils.cpp)
set_target_properties(SimpleFunctorTest _SimpleFunctorTest PROPERTIES
COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}" HIP_SEPARABLE_COMPILATION ON CUDA_SEPARABLE_COMPILATION ON LINKER_LANGUAGE CUDA CUDA_RESOLVE_DEVICE_SYMBOLS ON RUNTIME_OUTPUT_DIRECTORY "testing")
else()
add_executable(SimpleFunctorTest src/testing/main_SimpleFunctorTest.cpp src/base/communication/communicationBase_mpi.cpp src/base/gutils.cpp)
add_executable(_SimpleFunctorTest src/testing/main_SimpleFunctorTest.cpp src/base/communication/communicationBase_mpi.cpp src/base/gutils.cpp)
set_target_properties(SimpleFunctorTest _SimpleFunctorTest PROPERTIES COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}" RUNTIME_OUTPUT_DIRECTORY "testing")
endif()
target_compile_definitions(SimpleFunctorTest PRIVATE HALODEPTH_0=1 DOUBLEPREC=1 SINGLEPREC=1 ARCHITECTURE=${ARCHITECTURE} GIT_HASH="${GIT_HASH}")
target_compile_definitions(_SimpleFunctorTest PRIVATE HALODEPTH_0=1 DOUBLEPREC=1 SINGLEPREC=1 ARCHITECTURE=${ARCHITECTURE} GIT_HASH="${GIT_HASH}")
Expand Down Expand Up @@ -754,6 +776,7 @@ set_SIMULATeQCD_property(configConverter PROPERTIES RUNTIME_OUTPUT_DIRECTORY "ap
SIMULATeQCD_target_compile_definitions(configConverter PRIVATE HALODEPTH_0=1 SINGLEPREC=1 DOUBLEPREC=1 COMP_R18=1 NSTACKS_1=1 LAYOUT_ALL=1)
add_to_compound_SIMULATeQCD_target(applications configConverter)

set_SIMULATeQCD_gpu_backend(src/applications/main_CheckConf.cpp)
add_SIMULATeQCD_executable(CheckConf src/applications/main_CheckConf.cpp)
set_SIMULATeQCD_property(CheckConf PROPERTIES RUNTIME_OUTPUT_DIRECTORY "applications")
SIMULATeQCD_target_compile_definitions(CheckConf PRIVATE HALODEPTH_0=1 COMP_R18=1 SINGLEPREC=1 DOUBLEPREC=1 CPUONLY=1)
Expand Down
24 changes: 14 additions & 10 deletions src/applications/main_CheckConf.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ struct CheckParams : LatticeParameters {
}
};

template<class floatT, bool onDevice, size_t HaloDepth, CompressionType comp=R18>
template<class floatT, size_t HaloDepth, CompressionType comp=R18>
struct do_check_unitarity
{
explicit do_check_unitarity(Gaugefield<floatT,onDevice,HaloDepth,comp> &gauge) : gAcc(gauge.getAccessor()) {};
explicit do_check_unitarity(Gaugefield<floatT,false,HaloDepth,comp> &gauge) : gAcc(gauge.getAccessor()) {};
gaugeAccessor<floatT, comp> gAcc;
__device__ __host__ floatT operator()(gSite site){
__host__ __device__ floatT operator()(gSite site){
typedef GIndexer<All,HaloDepth> GInd;
floatT ret=0.0;
for (size_t mu = 0; mu < 4; ++mu)
Expand All @@ -25,13 +25,13 @@ struct do_check_unitarity
}
};

template <class floatT, bool onDevice, size_t HaloDepth>
void check_unitarity(Gaugefield<floatT,onDevice,HaloDepth> &gauge)
template <class floatT, size_t HaloDepth>
void check_unitarity(Gaugefield<floatT,false,HaloDepth> &gauge)
{
LatticeContainer<onDevice,floatT> unitarity(gauge.getComm());
LatticeContainer<false,floatT> unitarity(gauge.getComm());
const size_t elems = GIndexer<All,HaloDepth>::getLatData().vol4;
unitarity.adjustSize(elems);
unitarity.template iterateOverBulk<All, HaloDepth>(do_check_unitarity<floatT, onDevice, HaloDepth>(gauge));
unitarity.template iterateOverBulk<All, HaloDepth>(do_check_unitarity<floatT, HaloDepth>(gauge));
floatT unit_norm;
unitarity.reduce(unit_norm, elems);
unit_norm /= static_cast<floatT>(GIndexer<All,HaloDepth>::getLatData().globvol4);
Expand All @@ -54,7 +54,7 @@ void CheckConf(CommunicationBase &commBase, const std::string& format, std::stri
} else {
throw (std::runtime_error(rootLogger.fatal("Invalid specification for format ", format)));
}
check_unitarity<floatT,false,HaloDepth>(gauge);
check_unitarity<floatT,HaloDepth>(gauge);

GaugeAction<floatT, false, HaloDepth> gaugeAction(gauge);
floatT plaq = gaugeAction.plaquette();
Expand All @@ -70,7 +70,8 @@ void CheckConf(CommunicationBase &commBase, const std::string& format, std::stri
int main(int argc, char *argv[]) {

try {
stdLogger.setVerbosity(INFO);
stdLogger.setVerbosity(RESULT);
rootLogger.setVerbosity(RESULT);
const size_t HaloDepth = 0;

CheckParams param;
Expand All @@ -81,6 +82,9 @@ int main(int argc, char *argv[]) {

commBase.init(param.nodeDim());
initIndexer(HaloDepth, param, commBase);
rootLogger.setVerbosity(INFO);
rootLogger.info("Checking Gaugefile ", param.GaugefileName());
rootLogger.setVerbosity(RESULT);

if (param.prec() == "single"){
CheckConf<float, HaloDepth>(commBase, param.format(), param.GaugefileName());
Expand All @@ -94,6 +98,6 @@ int main(int argc, char *argv[]) {
catch (const std::runtime_error &error) {
return 1;
}
rootLogger.info("Gaugefile seems to be fine.");
rootLogger.result("Gaugefile OK! (readin, plaquette, unitarity)");
return 0;
}
10 changes: 7 additions & 3 deletions src/applications/main_CheckRand.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ void CheckRand(CommunicationBase &commBase, const std::string& rand_file){
int main(int argc, char *argv[]) {

try {
stdLogger.setVerbosity(INFO);
rootLogger.info("Checking Randfile...");
stdLogger.setVerbosity(RESULT);
rootLogger.setVerbosity(RESULT);
const size_t HaloDepth = 0;

CheckParams param;
Expand All @@ -27,12 +29,14 @@ int main(int argc, char *argv[]) {
param.readfile(commBase, "../parameter/applications/CheckRand.param", argc, argv);
commBase.init(param.nodeDim());
initIndexer(HaloDepth, param, commBase);

rootLogger.setVerbosity(INFO);
rootLogger.info("Checking Randfile ", param.Randfile());
rootLogger.setVerbosity(RESULT);
CheckRand(commBase, param.Randfile());
}
catch (const std::runtime_error &error) {
return 1;
}
rootLogger.info("Random state seems to be fine.");
rootLogger.result("Randfile OK!");
return 0;
}
8 changes: 4 additions & 4 deletions src/applications/main_configConverter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ int main(int argc, char *argv[]) {
CommunicationBase commBase(&argc, &argv);

param.readfile(commBase, "../parameter/applications/configConverter.param", argc, argv);
if( param.compress_out()==true && param.format_out()=="ildg" ) {
throw(rootLogger.fatal("ILDG format does not support compression."));
if( param.compress_out() && param.format_out()=="ildg" ) {
throw std::runtime_error(rootLogger.fatal("ILDG format does not support compression."));
}

commBase.init(param.nodeDim());
Expand All @@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
} else if(param.format()=="milc"){
gauge.readconf_milc(param.GaugefileName());
} else {
throw(rootLogger.fatal("Invalid specification for format ",param.format()));
throw std::runtime_error(rootLogger.fatal("Invalid specification for format ",param.format()));
}

/// Print out:
Expand All @@ -60,7 +60,7 @@ int main(int argc, char *argv[]) {
} else if(param.format_out()=="ildg") {
gauge.writeconf_ildg(param.GaugefileName_out(), param);
} else {
throw(rootLogger.fatal("Invalid specification for format_out ",param.format_out()));
throw std::runtime_error(rootLogger.fatal("Invalid specification for format_out ",param.format_out()));
}

return 0;
Expand Down
28 changes: 18 additions & 10 deletions src/base/IO/logging.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/*
* logging.h
*
* L. Mazur
*
* The logger class, which is used to output information to screen.
*
/*
* logging.h
*
* L. Mazur
*
* The logger class, which is used to output information to screen.
*
*/

#ifndef LOGGER
Expand All @@ -22,8 +22,9 @@
#include "stringFunctions.h"


enum LogLevel { ALL, ALLOC, TRACE, DEBUG, INFO, WARN, ERROR, FATAL, OFF };
static const char *LogLevelStr[] = {"ALL", "ALLOC", "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "OFF"};
enum LogLevel { ALL, ALLOC, TRACE, DEBUG, INFO, RESULT, WARN, ERROR, FATAL, OFF };
static const char *LogLevelStr[] = {"ALL", "ALLOC", "TRACE", "DEBUG", "INFO", "RESULT",
"WARN", "ERROR", "FATAL", "OFF"};

class Logger {
private:
Expand Down Expand Up @@ -110,10 +111,17 @@ class Logger {
template <typename... Args> inline std::string debug(Args&&... args) {
return message<DEBUG>(std::forward<Args>(args)...);
};

/// Something seems odd but the program will continue running fine nevertheless.
template <typename... Args> inline std::string warn(Args&&... args) {
return message<WARN>(std::forward<Args>(args)...);
};

/// Use this for test results
template <typename... Args> inline std::string result(Args&&... args) {
return message<RESULT>(std::forward<Args>(args)...);
};

/*! Use this when something goes wrong but the program can still continue
* Example: a test gives the wrong results
*/
Expand All @@ -129,7 +137,7 @@ class Logger {
};
};

/// This logger prints something on each node. It is created in base/communicationBase_*.cpp and its verbosity
/// This logger prints something on each node. It is created in base/communicationBase_*.cpp and its verbosity
/// should be set at the beginning of main()
extern Logger stdLogger;

Expand Down
13 changes: 7 additions & 6 deletions src/base/LatticeContainer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,23 +12,23 @@
#define gpucub hipcub
#endif


#ifndef USE_CPU_ONLY
template<class floatT>
gpuError_t CubReduce(void *helpArr, size_t *temp_storage_bytes, floatT *Arr, floatT *out, size_t size) {
GPUERROR_T CubReduce(void *helpArr, size_t *temp_storage_bytes, floatT *Arr, floatT *out, size_t size) {

return gpucub::DeviceReduce::Sum(helpArr, *temp_storage_bytes, static_cast<floatT *>(Arr), out,
size);
}

template<class floatT>
gpuError_t CubReduceMax(void *helpArr, size_t *temp_storage_bytes, void *Arr, floatT *out, size_t size) {
GPUERROR_T CubReduceMax(void *helpArr, size_t *temp_storage_bytes, void *Arr, floatT *out, size_t size) {

return gpucub::DeviceReduce::Max(helpArr, *temp_storage_bytes, static_cast<floatT *>(Arr), out,
size);
}

template<class floatT>
gpuError_t
GPUERROR_T
CubReduceStacked(void *helpArr, size_t *temp_storage_bytes, void *Arr, void *out, int Nt, void *StackOffsets) {

return gpucub::DeviceSegmentedReduce::Sum(helpArr, *temp_storage_bytes, static_cast<floatT *>(Arr),
Expand All @@ -37,8 +37,8 @@ CubReduceStacked(void *helpArr, size_t *temp_storage_bytes, void *Arr, void *out
}

#define CLASS_INIT(floatT) \
template gpuError_t CubReduce<floatT>(void * helpArr, size_t *temp_storage_bytes, floatT* Arr, floatT* out, size_t size); \
template gpuError_t CubReduceStacked<floatT>(void * helpArr, size_t *temp_storage_bytes, void * Arr, void* out, int Nt, void *StackOffsets); \
template GPUERROR_T CubReduce<floatT>(void * helpArr, size_t *temp_storage_bytes, floatT* Arr, floatT* out, size_t size); \
template GPUERROR_T CubReduceStacked<floatT>(void * helpArr, size_t *temp_storage_bytes, void * Arr, void* out, int Nt, void *StackOffsets); \


CLASS_INIT(float)
Expand Down Expand Up @@ -68,3 +68,4 @@ CLASS_INITMAX(double)

CLASS_INITMAX(int)

#endif
Loading