Minor fixes of GPU support and auto-cpmp.

ochubar · Mar 24, 2024 · 2830104 · 2830104
1 parent 8a61040
commit 2830104
Show file tree

Hide file tree

Showing 10 changed files with 63 additions and 245 deletions.
diff --git a/Makefile b/Makefile
@@ -23,6 +23,12 @@ examples_dir = env/python/srwpy/examples
 #example10_data_dir = $(examples_dir)/data_example_10
 export MODE ?= 0
 
+#HG20042024
+ifeq ($(MODE), cuda)
+export CUDA_PATH ?= /usr/local/cuda
+export CUDA_MATHLIBS_PATH ?= /usr/local/cuda
+endif
+
 nofftw: core pylib
 
 all: clean fftw core pylib

diff --git a/README.md b/README.md
@@ -253,16 +253,16 @@ pip install -e .
 
 ## VI. GPU Acceleration of SRW
 
-SRW has basic support for GPU acceleration of some routines through CUDA. Compilation of SRW with GPU acceleration requires the CUDA HPC SDK to be installed and, on Linux can be performed with:
+SRW has basic support for GPU acceleration of some routines through CUDA. Compilation of SRW with GPU acceleration requires the CUDA HPC SDK or CUDA Toolkit to be installed. 
+To compile on Linux, run the following from the SRW base directory:
 
 ```bash
 MODE=cuda make
 ```
 
-To compile on Windows, open the SRW solution in Visual Studio, set the target to the `_cuda` variants and update the library and include paths for the SRWLIB project. You may also have to copy the following DLLs from the HPC SDK install into the env/python/srwpy directory:
+To compile on Windows, ensure that CUDA Toolkit v12.4 is installed, open the SRW solution in Visual Studio, set the target for the SRWLIB project to the `_cuda` variant and update the library and include paths. You may also have to copy the following DLLs from the HPC SDK or CUDA Toolkit install into the env/python/srwpy directory to resolve the `DLL load failed while importing srwlpy` error:
 
-- cudart64_110.dll
-- cufft64_10.dll
+- cufft64_11.dll
 
 ## Authors and Contributors to SRW project
 

diff --git a/README.txt b/README.txt
@@ -144,6 +144,29 @@ IV. Compiling and testing SRW Library and its Python binding on Mac OSX.
 	sudo port install gcc47
 	Modify the SRW_Dev/cpp/gcc/Makefile so that CC=<path to macports>/gcc and CXX=<path to macports>/g++, and proceed to the compilation as described in III.1.2.2.	
 
+V. Compiling and testing SRW Library and its Python binding on Windows and Linux (via CMake/Pip)
+------------------------------------------------------------------
+
+	Run the following in a Visual Studio Developer Command Line/Linux Terminal:
+
+	cmake -B build
+	cmake --build build -j
+
+	The pip installable version of the package can be obtained by running the following in a Visual Studio Developer Command Line/Linux Terminal:
+
+	cd env/python
+	pip install -e .
+
+VI. GPU Acceleration of SRW
+------------------------------------------------------------------
+
+	SRW has basic support for GPU acceleration of some routines through CUDA. Compilation of SRW with GPU acceleration requires the CUDA HPC SDK or CUDA Toolkit to be installed. 
+	To compile on Linux, run the following from the SRW base directory:
+
+	MODE=cuda make
+
+	To compile on Windows, ensure that CUDA Toolkit v12.4 is installed, open the SRW solution in Visual Studio, set the target for the SRWLIB project to the `_cuda` variant and update the library and include paths. You may also have to copy the following DLLs from the HPC SDK or CUDA Toolkit install into the env/python/srwpy directory to resolve the `DLL load failed while importing srwlpy` error:
+	- cufft64_11.dll
 
 Authors and Contributors to SRW project:
 ----------------------------------------

diff --git a/cpp/gcc/Makefile b/cpp/gcc/Makefile
@@ -4,6 +4,8 @@ SRW_SRC_GEN_DIR=$(SRW_SRC_DIR)/core
 SRW_SRC_LIB_DIR=$(SRW_SRC_DIR)/lib
 SRW_SRC_GENESIS_DIR=$(SRW_SRC_DIR)/ext/genesis/genesis_july08
 SH_SRC_PARSE_DIR=$(SRW_SRC_DIR)/ext/auxparse
+#HG20032024
+SH_SRC_AUXGPU_DIR=$(SRW_SRC_DIR)/ext/auxgpu 
 SH_SRC_GEN_MATH_DIR=$(SRW_SRC_DIR)/ext/genmath
 LIB_DIR=$(SOFT_DEV_DIR)/../ext_lib
 
@@ -47,8 +49,9 @@ CUDA_INCLUDES = -I$(CUDA_PATH)/include -I$(CUDA_MATHLIBS_PATH)/include
 CUDA_LIBS = -L$(CUDA_PATH)/lib64 -L$(CUDA_MATHLIBS_PATH)/lib64
 
 SRW_SRC_DEF += -D_OFFLOAD_GPU -DUSE_CUDA -D_FFTW3
-SRW_INCLUDES += $(CUDA_INCLUDES) 
-#SRW_CFLAGS += -std=c++17 #HG01012024
+#HG20032024 Add SH_SRC_AUXGPU_DIR to SRW_INCLUDES
+SRW_INCLUDES += $(CUDA_INCLUDES) -I$(SH_SRC_AUXGPU_DIR)
+#SRW_CFLAGS += -std=c++17 #HG01012023
 LDFLAGS += $(CUDA_LIBS) -lcudart_static -lcudadevrt -lcufft -lrt
 NVCFLAGS = -O3 -arch=sm_80 -dlto -rdc=true
 CUDA_OBJ=gmfft_gpu.o srradstr_gpu.o sroptelm_gpu.o sroptdrf_gpu.o sroptgtr_gpu.o srradmnp_gpu.o
@@ -128,6 +131,10 @@ lib: $(CUDA_OBJ) $(OBJ)
 %.o: $(SRW_SRC_GEN_DIR)/%.cu
 	$(NVCC) -dc $(NVCFLAGS) $(SRW_INCLUDES) $(SRW_SRC_DEF) -Xcompiler="$(CFLAGS)" -c $<
 
+#HG20032024
+%.o: $(SH_SRC_AUXGPU_DIR)/%.cpp
+	$(CXX) $(CFLAGS) -c $<  
+
 else
 lib:	$(OBJ)
 	ar -cvq $(PRG) *.o

diff --git a/cpp/src/core/srercode.h b/cpp/src/core/srercode.h
@@ -282,6 +282,7 @@
 #define WARN_ELEC_BEAM_IS_NOT_ULTRARELATIVISTIC 23 + SRW_WARNINGS_OFFSET
 #define GENESIS_RAD_HARM_CALC_NEEDS_ELEC_DISTRIB 24 + SRW_WARNINGS_OFFSET
 #define ZERO_WFR_RAD_CURV_PH_TERM_NOT_TREATED 25 + SRW_WARNINGS_OFFSET
+#define GPU_COMPUTATION_FAILED 26 + SRW_WARNINGS_OFFSET /*HG21032024*/
 
 //-------------------------------------------------------------------------