LatticeQCD · luhuhis · Jul 7, 2022 · Jul 7, 2022 · Jul 7, 2022 · Jul 7, 2022
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -34,6 +34,10 @@ elseif (BACKEND STREQUAL "hip_amd")
     set(USE_HIP ON)
     set(USE_HIP_AMD ON)
     message(STATUS "Using HIP backend for AMD GPUs. (Experimental!)")
+elseif (BACKEND STREQUAL "cpu")
+    message(STATUS "Using CPU only (no GPU backend). Experimental!")
+    add_definitions(-DUSE_CPU_ONLY)
+    #set(USE_CPU_ONLY ON)
 else()
     message(STATUS "Backend ${BACKEND} is not known!")
 endif()
@@ -49,6 +53,8 @@ elseif (USE_HIP_NVIDIA)
     # This will hopefully work in the future
     # project(SIMULATeQCD LANGUAGES CXX HIP)
     project(SIMULATeQCD LANGUAGES CXX CUDA)
+else()
+    project(SIMULATeQCD LANGUAGES CXX)
 endif()
 
 set(CMAKE_CXX_STANDARD 17)
@@ -109,39 +115,42 @@ elseif (USE_HIP_AMD)
     set(CMAKE_HIP_ARCHITECTURES "${ARCHITECTURE}")
 endif()
 
-if(NOT ARCHITECTURE)
+if(NOT ARCHITECTURE AND NOT BACKEND MATCHES "cpu")
     message(FATAL_ERROR "No GPU architecture set!")
 endif()
 
 set(USE_GPU_AWARE_MPI OFF CACHE BOOL "Set to ON to build gpu-aware MPI code (default = OFF)")
-if (USE_GPU_AWARE_MPI)
-    add_definitions(-DUSE_GPU_AWARE_MPI)
-endif()
 set(USE_GPU_P2P ON CACHE BOOL "Set to ON to build with GPU Direct P2P (default = ON)")
-if (USE_GPU_P2P)
-    if(USE_HIP)
-        message(FATAL_ERROR "GPU_P2P is not supported by HIP!")
+if(NOT BACKEND MATCHES "cpu")
+    if (USE_GPU_AWARE_MPI)
+        add_definitions(-DUSE_GPU_AWARE_MPI)
+    endif()
+    if (USE_GPU_P2P)
+        if(USE_HIP)
+            message(FATAL_ERROR "GPU_P2P is not supported by HIP!")
+        endif()
+        add_definitions(-DUSE_GPU_P2P)
+    endif()
+    if (USE_GPU_P2P AND NOT USE_GPU_AWARE_MPI)
+        message(FATAL_ERROR "USE_GPU_P2P only works with USE_GPU_AWARE_MPI")
     endif()
-    add_definitions(-DUSE_GPU_P2P)
+else()
+    set(USE_GPU_AWARE_MPI OFF)
+    set(USE_GPU_P2P OFF)
 endif()
 
 
 # Additional compiler flags
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC")
 if (USE_CUDA)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC")
     set(CMAKE_CUDA_FLAGS
         "${CMAKE_CUDA_FLAGS} -O3 -Wno-deprecated-gpu-targets --std=c++17 -arch=sm_${ARCHITECTURE} -Xcudafe --display_error_number -prec-div=true -prec-sqrt=true")
-
 elseif (USE_HIP_AMD)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC -fgpu-rdc")
-
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fgpu-rdc")
     add_definitions(-D__HIP_PLATFORM_AMD__)
     set(HIP_HIPCC_FLAGS ${HIP_HIPCC_FLAGS} "-O3 -std=c++17 -D__HIP_PLATFORM_AMD__ --amdgpu-target=${ARCHITECTURE} -fgpu-rdc")
     set(CMAKE_EXE_LINKER_FLAGS "-O3 -fgpu-rdc --hip-link")
-
 elseif (USE_HIP_NVIDIA)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -Wall -Wextra -Wno-comment -fPIC")
-
     add_definitions(-D__HIP_PLATFORM_NVIDIA__ -D__HIP_PLATFORM_NVCC__)
     set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} -O3 -std=c++17 -D__HIP_PLATFORM_NVCC__ -D__HIP_PLATFORM_NVIDIA__ --generate-code arch=compute_${ARCHITECTURE},code=sm_${ARCHITECTURE} --generate-code arch=compute_${ARCHITECTURE},code=compute_${ARCHITECTURE} -rdc=true")
     set(NVCC_LINK_FLAGS "${NVCC_LINK_FLAGS} -rdc=true")
@@ -299,6 +308,7 @@ if (USE_CUDA)
     add_library(CodeBase OBJECT ${SOURCE_FILES_BASE})
     target_compile_definitions(CodeBase PRIVATE
         ARCHITECTURE=${ARCHITECTURE}
+        GIT_HASH="${GIT_HASH}"
         SINGLEPREC=1 DOUBLEPREC=1
         COMP_R18=1 COMP_U3R14=1 COMP_R14=1 COMP_R12=1 COMP_STAGGR12=1
         ${ALL_HALODEPTHS}
@@ -361,6 +371,14 @@ FUNCTION(add_SIMULATeQCD_executable TARGET)
         set_target_properties(${TARGET} _${TARGET}
             PROPERTIES
             COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}" HIP_SEPARABLE_COMPILATION ON CUDA_SEPARABLE_COMPILATION ON LINKER_LANGUAGE CUDA CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+    else()
+        add_executable(${TARGET} ${ARGN} ${SOURCE_FILES_BASE}) # single target
+        add_executable(_${TARGET} ${ARGN}) # compound target (e.g. in "tests", "applications")
+
+        target_link_libraries("_${TARGET}" CodeBase)
+        set_target_properties(${TARGET} _${TARGET}
+                PROPERTIES
+                COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}")
     endif()
 ENDFUNCTION()
 
@@ -645,6 +663,10 @@ elseif (USE_HIP_NVIDIA)
     hip_add_executable(_SimpleFunctorTest src/testing/main_SimpleFunctorTest.cpp src/base/communication/communicationBase_mpi.cpp src/base/gutils.cpp)
     set_target_properties(SimpleFunctorTest _SimpleFunctorTest PROPERTIES 
         COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}"  HIP_SEPARABLE_COMPILATION ON CUDA_SEPARABLE_COMPILATION ON LINKER_LANGUAGE CUDA CUDA_RESOLVE_DEVICE_SYMBOLS ON RUNTIME_OUTPUT_DIRECTORY "testing")
+else()
+    add_executable(SimpleFunctorTest src/testing/main_SimpleFunctorTest.cpp src/base/communication/communicationBase_mpi.cpp src/base/gutils.cpp)
+    add_executable(_SimpleFunctorTest src/testing/main_SimpleFunctorTest.cpp src/base/communication/communicationBase_mpi.cpp src/base/gutils.cpp)
+    set_target_properties(SimpleFunctorTest _SimpleFunctorTest PROPERTIES COMPILE_FLAGS "${MPI_COMPILE_FLAGS}" LINK_FLAGS "${MPI_LINK_FLAGS}" RUNTIME_OUTPUT_DIRECTORY "testing")
 endif()
 target_compile_definitions(SimpleFunctorTest PRIVATE HALODEPTH_0=1 DOUBLEPREC=1 SINGLEPREC=1 ARCHITECTURE=${ARCHITECTURE} GIT_HASH="${GIT_HASH}")
 target_compile_definitions(_SimpleFunctorTest PRIVATE HALODEPTH_0=1 DOUBLEPREC=1 SINGLEPREC=1 ARCHITECTURE=${ARCHITECTURE} GIT_HASH="${GIT_HASH}")
@@ -754,6 +776,7 @@ set_SIMULATeQCD_property(configConverter PROPERTIES RUNTIME_OUTPUT_DIRECTORY "ap
 SIMULATeQCD_target_compile_definitions(configConverter PRIVATE HALODEPTH_0=1 SINGLEPREC=1 DOUBLEPREC=1 COMP_R18=1 NSTACKS_1=1 LAYOUT_ALL=1)
 add_to_compound_SIMULATeQCD_target(applications configConverter)
 
+set_SIMULATeQCD_gpu_backend(src/applications/main_CheckConf.cpp)
 add_SIMULATeQCD_executable(CheckConf src/applications/main_CheckConf.cpp)
 set_SIMULATeQCD_property(CheckConf PROPERTIES RUNTIME_OUTPUT_DIRECTORY "applications")
 SIMULATeQCD_target_compile_definitions(CheckConf PRIVATE HALODEPTH_0=1 COMP_R18=1 SINGLEPREC=1 DOUBLEPREC=1 CPUONLY=1)

diff --git a/src/applications/main_CheckConf.cpp b/src/applications/main_CheckConf.cpp
@@ -8,12 +8,12 @@ struct CheckParams : LatticeParameters {
     }
 };
 
-template<class floatT, bool onDevice, size_t HaloDepth, CompressionType comp=R18>
+template<class floatT, size_t HaloDepth, CompressionType comp=R18>
 struct do_check_unitarity
 {
-    explicit do_check_unitarity(Gaugefield<floatT,onDevice,HaloDepth,comp> &gauge) : gAcc(gauge.getAccessor()) {};
+    explicit do_check_unitarity(Gaugefield<floatT,false,HaloDepth,comp> &gauge) : gAcc(gauge.getAccessor()) {};
     gaugeAccessor<floatT, comp> gAcc;
-    __device__ __host__ floatT operator()(gSite site){
+    __host__ __device__ floatT operator()(gSite site){
         typedef GIndexer<All,HaloDepth> GInd;
         floatT ret=0.0;
         for (size_t mu = 0; mu < 4; ++mu)
@@ -25,13 +25,13 @@ struct do_check_unitarity
     }
 };
 
-template <class floatT, bool onDevice, size_t HaloDepth>
-void check_unitarity(Gaugefield<floatT,onDevice,HaloDepth> &gauge)
+template <class floatT, size_t HaloDepth>
+void check_unitarity(Gaugefield<floatT,false,HaloDepth> &gauge)
 {
-    LatticeContainer<onDevice,floatT> unitarity(gauge.getComm());
+    LatticeContainer<false,floatT> unitarity(gauge.getComm());
     const size_t elems = GIndexer<All,HaloDepth>::getLatData().vol4;
     unitarity.adjustSize(elems);
-    unitarity.template iterateOverBulk<All, HaloDepth>(do_check_unitarity<floatT, onDevice, HaloDepth>(gauge));
+    unitarity.template iterateOverBulk<All, HaloDepth>(do_check_unitarity<floatT, HaloDepth>(gauge));
     floatT unit_norm;
     unitarity.reduce(unit_norm, elems);
     unit_norm /= static_cast<floatT>(GIndexer<All,HaloDepth>::getLatData().globvol4);
@@ -54,7 +54,7 @@ void CheckConf(CommunicationBase &commBase, const std::string& format, std::stri
     } else {
         throw (std::runtime_error(rootLogger.fatal("Invalid specification for format ", format)));
     }
-    check_unitarity<floatT,false,HaloDepth>(gauge);
+    check_unitarity<floatT,HaloDepth>(gauge);
 
     GaugeAction<floatT, false, HaloDepth> gaugeAction(gauge);
     floatT plaq = gaugeAction.plaquette();
@@ -70,7 +70,8 @@ void CheckConf(CommunicationBase &commBase, const std::string& format, std::stri
 int main(int argc, char *argv[]) {
 
     try {
-        stdLogger.setVerbosity(INFO);
+        stdLogger.setVerbosity(RESULT);
+        rootLogger.setVerbosity(RESULT);
         const size_t HaloDepth = 0;
 
         CheckParams param;
@@ -81,6 +82,9 @@ int main(int argc, char *argv[]) {
 
         commBase.init(param.nodeDim());
         initIndexer(HaloDepth, param, commBase);
+        rootLogger.setVerbosity(INFO);
+        rootLogger.info("Checking Gaugefile ", param.GaugefileName());
+        rootLogger.setVerbosity(RESULT);
 
         if (param.prec() == "single"){
             CheckConf<float, HaloDepth>(commBase, param.format(), param.GaugefileName());
@@ -94,6 +98,6 @@ int main(int argc, char *argv[]) {
     catch (const std::runtime_error &error) {
         return 1;
     }
-    rootLogger.info("Gaugefile seems to be fine.");
+    rootLogger.result("Gaugefile OK! (readin, plaquette, unitarity)");
     return 0;
 }
diff --git a/src/applications/main_CheckRand.cpp b/src/applications/main_CheckRand.cpp
@@ -17,7 +17,9 @@ void CheckRand(CommunicationBase &commBase, const std::string& rand_file){
 int main(int argc, char *argv[]) {
 
     try {
-        stdLogger.setVerbosity(INFO);
+        rootLogger.info("Checking Randfile...");
+        stdLogger.setVerbosity(RESULT);
+        rootLogger.setVerbosity(RESULT);
         const size_t HaloDepth = 0;
 
         CheckParams param;
@@ -27,12 +29,14 @@ int main(int argc, char *argv[]) {
         param.readfile(commBase, "../parameter/applications/CheckRand.param", argc, argv);
         commBase.init(param.nodeDim());
         initIndexer(HaloDepth, param, commBase);
-
+        rootLogger.setVerbosity(INFO);
+        rootLogger.info("Checking Randfile ", param.Randfile());
+        rootLogger.setVerbosity(RESULT);
         CheckRand(commBase, param.Randfile());
     }
     catch (const std::runtime_error &error) {
         return 1;
     }
-    rootLogger.info("Random state seems to be fine.");
+    rootLogger.result("Randfile OK!");
     return 0;
 }
diff --git a/src/applications/main_configConverter.cpp b/src/applications/main_configConverter.cpp
@@ -30,8 +30,8 @@ int main(int argc, char *argv[]) {
     CommunicationBase commBase(&argc, &argv);
 
     param.readfile(commBase, "../parameter/applications/configConverter.param", argc, argv);
-    if( param.compress_out()==true && param.format_out()=="ildg" ) {
-        throw(rootLogger.fatal("ILDG format does not support compression."));
+    if( param.compress_out() && param.format_out()=="ildg" ) {
+        throw std::runtime_error(rootLogger.fatal("ILDG format does not support compression."));
     }
 
     commBase.init(param.nodeDim());
@@ -47,7 +47,7 @@ int main(int argc, char *argv[]) {
     } else if(param.format()=="milc"){
         gauge.readconf_milc(param.GaugefileName());
     } else {
-        throw(rootLogger.fatal("Invalid specification for format ",param.format()));
+        throw std::runtime_error(rootLogger.fatal("Invalid specification for format ",param.format()));
     }
 
     /// Print out:
@@ -60,7 +60,7 @@ int main(int argc, char *argv[]) {
     } else if(param.format_out()=="ildg") {
         gauge.writeconf_ildg(param.GaugefileName_out(), param);
     } else {
-        throw(rootLogger.fatal("Invalid specification for format_out ",param.format_out()));
+        throw std::runtime_error(rootLogger.fatal("Invalid specification for format_out ",param.format_out()));
     }
 
     return 0;

diff --git a/src/base/IO/logging.h b/src/base/IO/logging.h
@@ -1,10 +1,10 @@
-/* 
- * logging.h                                                               
- * 
- * L. Mazur 
- * 
- * The logger class, which is used to output information to screen. 
- * 
+/*
+ * logging.h
+ *
+ * L. Mazur
+ *
+ * The logger class, which is used to output information to screen.
+ *
  */
 
 #ifndef LOGGER
@@ -22,8 +22,9 @@
 #include "stringFunctions.h"
 
 
-enum LogLevel { ALL, ALLOC, TRACE, DEBUG, INFO, WARN, ERROR, FATAL, OFF };
-static const char *LogLevelStr[] = {"ALL",  "ALLOC", "TRACE", "DEBUG", "INFO", "WARN", "ERROR", "FATAL", "OFF"};
+enum LogLevel { ALL, ALLOC, TRACE, DEBUG, INFO, RESULT, WARN, ERROR, FATAL, OFF };
+static const char *LogLevelStr[] = {"ALL",  "ALLOC", "TRACE", "DEBUG", "INFO", "RESULT",
+    "WARN", "ERROR", "FATAL", "OFF"};
 
 class Logger {
     private:
@@ -110,10 +111,17 @@ class Logger {
         template <typename... Args> inline std::string debug(Args&&... args) {
             return message<DEBUG>(std::forward<Args>(args)...);
         };
+
+        /// Something seems odd but the program will continue running fine nevertheless.
         template <typename... Args> inline std::string warn(Args&&... args) {
             return message<WARN>(std::forward<Args>(args)...);
         };
 
+        /// Use this for test results
+        template <typename... Args> inline std::string result(Args&&... args) {
+            return message<RESULT>(std::forward<Args>(args)...);
+        };
+
         /*! Use this when something goes wrong but the program can still continue
          * Example: a test gives the wrong results
          */
@@ -129,7 +137,7 @@ class Logger {
         };
 };
 
-/// This logger prints something on each node. It is created in base/communicationBase_*.cpp and its verbosity 
+/// This logger prints something on each node. It is created in base/communicationBase_*.cpp and its verbosity
 /// should be set at the beginning of main()
 extern Logger stdLogger;
 

diff --git a/src/base/LatticeContainer.cpp b/src/base/LatticeContainer.cpp
@@ -12,23 +12,23 @@
 #define gpucub hipcub
 #endif
 
-
+#ifndef USE_CPU_ONLY
 template<class floatT>
-gpuError_t CubReduce(void *helpArr, size_t *temp_storage_bytes, floatT *Arr, floatT *out, size_t size) {
+GPUERROR_T CubReduce(void *helpArr, size_t *temp_storage_bytes, floatT *Arr, floatT *out, size_t size) {
 
     return gpucub::DeviceReduce::Sum(helpArr, *temp_storage_bytes, static_cast<floatT *>(Arr), out,
                                   size);
 }
 
 template<class floatT>
-gpuError_t CubReduceMax(void *helpArr, size_t *temp_storage_bytes, void *Arr, floatT *out, size_t size) {
+GPUERROR_T CubReduceMax(void *helpArr, size_t *temp_storage_bytes, void *Arr, floatT *out, size_t size) {
 
     return gpucub::DeviceReduce::Max(helpArr, *temp_storage_bytes, static_cast<floatT *>(Arr), out,
                                   size);
 }
 
 template<class floatT>
-gpuError_t
+GPUERROR_T
 CubReduceStacked(void *helpArr, size_t *temp_storage_bytes, void *Arr, void *out, int Nt, void *StackOffsets) {
 
     return gpucub::DeviceSegmentedReduce::Sum(helpArr, *temp_storage_bytes, static_cast<floatT *>(Arr),
@@ -37,8 +37,8 @@ CubReduceStacked(void *helpArr, size_t *temp_storage_bytes, void *Arr, void *out
 }
 
 #define CLASS_INIT(floatT) \
-template gpuError_t CubReduce<floatT>(void * helpArr, size_t *temp_storage_bytes, floatT* Arr, floatT* out, size_t size); \
-template gpuError_t CubReduceStacked<floatT>(void * helpArr, size_t *temp_storage_bytes, void * Arr, void* out, int Nt, void *StackOffsets); \
+template GPUERROR_T CubReduce<floatT>(void * helpArr, size_t *temp_storage_bytes, floatT* Arr, floatT* out, size_t size); \
+template GPUERROR_T CubReduceStacked<floatT>(void * helpArr, size_t *temp_storage_bytes, void * Arr, void* out, int Nt, void *StackOffsets); \
 
 
 CLASS_INIT(float)
@@ -68,3 +68,4 @@ CLASS_INITMAX(double)
 
 CLASS_INITMAX(int)
 
+#endif