Merge branch 'develop' of github.com:ccsb-scripps/AutoDock-GPU into t…

…ensorcores
ccsb-scripps · Jul 25, 2024 · b8e5512 · b8e5512
2 parents d3b3299 + 5aa1a3e
commit b8e5512
Show file tree

Hide file tree

Showing 7 changed files with 84 additions and 84 deletions.
diff --git a/Makefile b/Makefile
@@ -22,13 +22,25 @@
 # DEVICE=OCLGPU
 # ------------------------------------------------------
 # Choose OpenCL device
-# Valid values: CPU, GPU, CUDA, OCLGPU
+# Valid values: CPU, GPU, CUDA, OCLGPU, OPENCL
+
+OVERLAP = ON
 
 ifeq ($(DEVICE), $(filter $(DEVICE),GPU CUDA))
-TEST_CUDA := $(shell ./test_cuda.sh nvcc "$(GPU_INCLUDE_PATH)" "$(GPU_LIBRARY_PATH)")
-# if user specifies DEVICE=CUDA it will be used (wether the test succeeds or not)
+TARGETS_SUPPORTED := $(shell ./test_cuda.sh nvcc "$(GPU_INCLUDE_PATH)" "$(GPU_LIBRARY_PATH)" "$(TARGETS)" "$(DEVICE)")
 # if user specifies DEVICE=GPU the test result determines wether CUDA will be used or not
-ifeq ($(DEVICE)$(TEST_CUDA),GPUyes)
+ifeq ($(TARGETS_SUPPORTED),)
+ifeq ($(DEVICE),CUDA)
+$(error Cuda verification failed)
+else
+$(info Cuda is not available, using OpenCL)
+$(info )
+override DEVICE:=GPU
+export
+endif
+else
+override TARGETS:=$(TARGETS_SUPPORTED)
+export
 override DEVICE:=CUDA
 endif
 endif
@@ -37,9 +49,14 @@ override DEVICE:=GPU
 export
 include Makefile.Cuda
 else
-ifeq ($(DEVICE),OCLGPU)
+ifeq ($(DEVICE),$(filter $(DEVICE),OCLGPU OPENCL))
 override DEVICE:=GPU
 export
+$(info Using OpenCL)
+$(info )
 endif
+$(info Please make sure to set environment variables)
+$(info GPU_INCLUDE_PATH and GPU_LIBRARY_PATH)
+$(info )
 include Makefile.OpenCL
 endif
diff --git a/Makefile.Cuda b/Makefile.Cuda
@@ -88,8 +88,8 @@ else
 		NWI=-DN16WI
 		TARGET:=$(TARGET)_16wi
 	else ifeq ($(DEVICE), GPU)
-		NWI=-DN64WI
-		TARGET:=$(TARGET)_64wi
+		NWI=-DN128WI
+		TARGET:=$(TARGET)_128wi
 	endif
 endif
 
@@ -215,7 +215,7 @@ NRUN     := 100
 NGEN     := 27000
 POPSIZE  := 150
 TESTNAME := test
-TESTLS   := sw
+TESTLS   := ad
 
 test: odock
 	$(BIN_DIR)/$(TARGET) \
@@ -228,22 +228,4 @@ test: odock
 	-gfpop 0 \
 	-lsmet $(TESTLS)
 
-ASTEX_PDB := 2bsm
-ASTEX_NRUN:= 10
-ASTEX_POPSIZE := 10
-ASTEX_TESTNAME := test_astex
-ASTEX_LS := sw
-
-astex: odock
-	$(BIN_DIR)/$(TARGET) \
-	-ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld \
-	-lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt \
-	-nrun $(ASTEX_NRUN) \
-	-psize $(ASTEX_POPSIZE) \
-	-resnam $(ASTEX_TESTNAME) \
-	-gfpop 1 \
-	-lsmet $(ASTEX_LS)
-
-#	$(BIN_DIR)/$(TARGET) -ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld -lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt -nrun $(ASTEX_NRUN) -psize $(ASTEX_POPSIZE) -resnam $(ASTEX_TESTNAME) -gfpop 1 | tee ./input_tsri/search-set-astex/intrapairs/$(ASTEX_PDB)_intrapair.txt
-
 .PHONY: clean
diff --git a/Makefile.OpenCL b/Makefile.OpenCL
@@ -133,8 +133,8 @@ else
 		NWI=-DN16WI
 		TARGET:=$(TARGET)_16wi
 	else ifeq ($(DEVICE), GPU)
-		NWI=-DN64WI
-		TARGET:=$(TARGET)_64wi
+		NWI=-DN128WI
+		TARGET:=$(TARGET)_128wi
 	endif
 endif
 
@@ -276,7 +276,7 @@ NRUN     := 100
 NGEN     := 27000
 POPSIZE  := 150
 TESTNAME := test
-TESTLS   := sw
+TESTLS   := ad
 
 test: odock
 	$(BIN_DIR)/$(TARGET) \
@@ -289,22 +289,4 @@ test: odock
 	-gfpop 0 \
 	-lsmet $(TESTLS)
 
-ASTEX_PDB := 2bsm
-ASTEX_NRUN:= 10
-ASTEX_POPSIZE := 10
-ASTEX_TESTNAME := test_astex
-ASTEX_LS := sw
-
-astex: odock
-	$(BIN_DIR)/$(TARGET) \
-	-ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld \
-	-lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt \
-	-nrun $(ASTEX_NRUN) \
-	-psize $(ASTEX_POPSIZE) \
-	-resnam $(ASTEX_TESTNAME) \
-	-gfpop 1 \
-	-lsmet $(ASTEX_LS)
-
-#	$(BIN_DIR)/$(TARGET) -ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld -lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt -nrun $(ASTEX_NRUN) -psize $(ASTEX_POPSIZE) -resnam $(ASTEX_TESTNAME) -gfpop 1 | tee ./input_tsri/search-set-astex/intrapairs/$(ASTEX_PDB)_intrapair.txt
-
 .PHONY: clean
diff --git a/README.md b/README.md
@@ -28,11 +28,11 @@ See [more relevant papers](https://github.com/ccsb-scripps/AutoDock-GPU/wiki/Pub
 
 | Operating system                         | CPU                          | GPU                                            |
 |:----------------------------------------:|:----------------------------:|:----------------------------------------------:|
-|CentOS 6.7 & 6.8 / Ubuntu 14.04 & 16.04   | Intel SDK for OpenCL 2017    | AMD APP SDK v3.0 / CUDA 9, 10, and 11          |
+|CentOS 6.7 & 6.8 / Ubuntu 14.04 & 16.04   | Intel SDK for OpenCL 2017    | OpenCL / CUDA >= 11                            |
 |macOS Catalina 10.15.1                    | Apple / Intel                | Apple / Intel Iris, Radeon Vega 64, Radeon VII |
 
 
-Other environments or configurations likely work as well, but are untested.
+Other environments or configurations likely work as well, but are untested. AutoDock-GPU since commit 846dc2b requires a C++17-capable compiler, which in practice means GCC >= 9. This also means the minimum version supported for Cuda-compilation is Cuda 11, however, since all versions of Cuda also come with OpenCL older versions can still be used using the OpenCL code path (`DEVICE=OCLGPU`).
 
 # Compilation
 
@@ -45,7 +45,7 @@ make DEVICE=<TYPE> NUMWI=<NWI>
 
 | Parameters | Description                  | Values                                             |
 |:----------:|:----------------------------:|:--------------------------------------------------:|
-| `<TYPE>`   | Accelerator chosen           | `CPU`, `GPU`, `CUDA`, `OCLGPU`                     |
+| `<TYPE>`   | Accelerator chosen           | `CPU`, `GPU`, `CUDA`, `OCLGPU`, `OPENCL`           |
 | `<NWI>`    | work-group/thread block size | `1`, `2`, `4`, `8`, `16`, `32`, `64`, `128`, `256` |
 
 When `DEVICE=GPU` is chosen, the Makefile will automatically tests if it can compile Cuda succesfully. To override, use `DEVICE=CUDA` or `DEVICE=OCLGPU`. The cpu target is only supported using OpenCL. Furthermore, an OpenMP-enabled overlapped pipeline (for setup and processing) can be compiled with `OVERLAP=ON`.

diff --git a/host/src/getparameters.cpp b/host/src/getparameters.cpp
@@ -198,10 +198,8 @@ int parse_dpf(
 							if(mypars->ligandfile) free(mypars->ligandfile);
 							if(strincmp(argstr,"empty",5) != 0){
 								if(check_path && !has_absolute_path(argstr)){
-									len = strlen(argstr);
-									mypars->ligandfile = (char*)malloc((dpf_path.size()+len+1)*sizeof(char));
-									mypars->ligandfile[dpf_path.size()] = '\0'; // make sure first part to copy is terminated
-									strncat(strncpy(mypars->ligandfile, dpf_path.c_str(), dpf_path.size()), argstr, len);
+									mypars->ligandfile = (char*)malloc((dpf_path.size()+strlen(argstr)+1)*sizeof(char));
+									strcat(strcpy(mypars->ligandfile, dpf_path.c_str()), argstr);
 								} else mypars->ligandfile = strdup(argstr);
 							}
 						}
@@ -211,10 +209,8 @@ int parse_dpf(
 							sscanf(line.c_str(),"%*s %255s",argstr);
 							if(mypars->flexresfile) free(mypars->flexresfile);
 							if(check_path && !has_absolute_path(argstr)){
-								len = strlen(argstr);
-								mypars->flexresfile = (char*)malloc((dpf_path.size()+len+1)*sizeof(char));
-								mypars->flexresfile[dpf_path.size()] = '\0'; // make sure first part to copy is terminated
-								strncat(strncpy(mypars->flexresfile, dpf_path.c_str(), dpf_path.size()), argstr, len);
+								mypars->flexresfile = (char*)malloc((dpf_path.size()+strlen(argstr)+1)*sizeof(char));
+								strcat(strcpy(mypars->flexresfile, dpf_path.c_str()), argstr);
 							} else mypars->flexresfile = strdup(argstr);
 						}
 						break;
@@ -224,10 +220,8 @@ int parse_dpf(
 							// Add the .fld file
 							if(mypars->fldfile) free(mypars->fldfile);
 							if(check_path && !has_absolute_path(argstr)){
-								len = strlen(argstr);
-								mypars->fldfile = (char*)malloc((dpf_path.size()+len+1)*sizeof(char));
-								mypars->fldfile[dpf_path.size()] = '\0'; // make sure first part to copy is terminated
-								strncat(strncpy(mypars->fldfile, dpf_path.c_str(), dpf_path.size()), argstr, len);
+								mypars->fldfile = (char*)malloc((dpf_path.size()+strlen(argstr)+1)*sizeof(char));
+								strcat(strcpy(mypars->fldfile, dpf_path.c_str()), argstr);
 							} else mypars->fldfile = strdup(argstr); // this allows using the dpf to set up all parameters but the ligand
 							// Filling mygrid according to the specified fld file
 							if (get_gridinfo(mypars->fldfile, mygrid) != 0)

diff --git a/host/src/performdocking.cpp b/host/src/performdocking.cpp
@@ -403,11 +403,11 @@ std::vector<int> get_gpu_pool()
 	int gpuCount=0;
 	cudaError_t status;
 	status = cudaGetDeviceCount(&gpuCount);
-	RTERROR(status, "cudaGetDeviceCount failed");
+	RTERROR(status, "ERROR in cudaGetDeviceCount:");
 	std::vector<int> result;
 	cudaDeviceProp props;
 	for(unsigned int i=0; i<gpuCount; i++){
-		RTERROR(cudaGetDeviceProperties(&props,i),"cudaGetDeviceProperties failed");
+		RTERROR(cudaGetDeviceProperties(&props,i),"ERROR in cudaGetDeviceProperties:");
 		if(props.major>=3) result.push_back(i);
 	}
 	if (result.size() == 0)
@@ -430,7 +430,7 @@ void setup_gpu_for_docking(
 	// Initialize CUDA
 	int gpuCount=0;
 	cudaError_t status = cudaGetDeviceCount(&gpuCount);
-	RTERROR(status, "cudaGetDeviceCount failed");
+	RTERROR(status, "ERROR in cudaGetDeviceCount:");
 	if (gpuCount == 0)
 	{
 		printf("No CUDA-capable devices found, exiting.\n");
@@ -448,19 +448,19 @@ void setup_gpu_for_docking(
 	// Now that we have a device, gather some information
 	size_t freemem, totalmem;
 	cudaDeviceProp props;
-	RTERROR(cudaGetDevice(&(cData.devnum)),"cudaGetDevice failed");
-	RTERROR(cudaGetDeviceProperties(&props,cData.devnum),"cudaGetDeviceProperties failed");
+	RTERROR(cudaGetDevice(&(cData.devnum)),"ERROR in cudaGetDevice:");
+	RTERROR(cudaGetDeviceProperties(&props,cData.devnum),"ERROR in cudaGetDeviceProperties:");
 	tData.device_name = (char*) malloc(strlen(props.name)+32); // make sure array is large enough to hold device number text too
 	strcpy(tData.device_name, props.name);
 	if(gpuCount>1) snprintf(&tData.device_name[strlen(props.name)], strlen(props.name)+32, " (#%d / %d)",cData.devnum+1,gpuCount);
 	printf("Cuda device:                              %s\n",tData.device_name);
-	RTERROR(cudaMemGetInfo(&freemem,&totalmem), "cudaGetMemInfo failed");
+	RTERROR(cudaMemGetInfo(&freemem,&totalmem), "ERROR in cudaGetMemInfo:");
 	printf("Available memory on device:               %lu MB (total: %lu MB)\n",(freemem>>20),(totalmem>>20));
 	cData.devid=cData.devnum;
 	cData.devnum=-2;
 #ifdef SET_CUDA_PRINTF_BUFFER
 	status = cudaDeviceSetLimit(cudaLimitPrintfFifoSize, 200000000ull);
-	RTERROR(status, "cudaDeviceSetLimit failed");
+	RTERROR(status, "ERROR in cudaDeviceSetLimit:");
 #endif
 	auto const t1 = std::chrono::steady_clock::now();
 	printf("\nCUDA Setup time %fs\n", elapsed_seconds(t0 ,t1));
@@ -512,16 +512,16 @@ void finish_gpu_from_docking(
 	status = cudaFree(cData.pKerconst_conform);
 	RTERROR(status, "cudaFree: error freeing cData.pKerconst_conform\n");
 	status = cudaFree(cData.pMem_rotbonds_const);
-	RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_const");
+	RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_const\n");
 	status = cudaFree(cData.pMem_rotbonds_atoms_const);
-	RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_atoms_const");
+	RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_atoms_const\n");
 	status = cudaFree(cData.pMem_num_rotating_atoms_per_rotbond_const);
-	RTERROR(status, "cudaFree: error freeing cData.pMem_num_rotating_atoms_per_rotbond_const");
+	RTERROR(status, "cudaFree: error freeing cData.pMem_num_rotating_atoms_per_rotbond_const\n");
 
 	// Non-constant
 	if(tData.pMem_fgrids){
 		status = cudaFree(tData.pMem_fgrids);
-		RTERROR(status, "cudaFree: error freeing pMem_fgrids");
+		RTERROR(status, "cudaFree: error freeing pMem_fgrids\n");
 	}
 	free(tData.device_name);
 }
@@ -1342,7 +1342,7 @@ parameters argc and argv:
 #endif
 #ifdef USE_CUDA
 				status = cudaMemcpy(sim_state.cpu_energies.data(), pMem_energies_current, size_energies, cudaMemcpyDeviceToHost);
-				RTERROR(status, "cudaMemcpy: couldn't download pMem_energies_current");
+				RTERROR(status, "cudaMemcpy: couldn't download pMem_energies_current.\n");
 #endif
 				if (autostop.check_if_satisfactory(generation_cnt, sim_state.cpu_energies.data(), total_evals))
 					if (total_evals>min_as_evals)
@@ -1643,19 +1643,19 @@ parameters argc and argv:
 #endif
 #ifdef USE_CUDA
 	status = cudaFree(tData.pMem_conformations1);
-	RTERROR(status, "cudaFree: error freeing pMem_conformations1");
+	RTERROR(status, "cudaFree: error freeing pMem_conformations1.\n");
 	status = cudaFree(tData.pMem_conformations2);
-	RTERROR(status, "cudaFree: error freeing pMem_conformations2");
+	RTERROR(status, "cudaFree: error freeing pMem_conformations2.\n");
 	status = cudaFree(tData.pMem_energies1);
-	RTERROR(status, "cudaFree: error freeing pMem_energies1");
+	RTERROR(status, "cudaFree: error freeing pMem_energies1.\n");
 	status = cudaFree(tData.pMem_energies2);
-	RTERROR(status, "cudaFree: error freeing pMem_energies2");
+	RTERROR(status, "cudaFree: error freeing pMem_energies2.\n");
 	status = cudaFree(tData.pMem_evals_of_new_entities);
-	RTERROR(status, "cudaFree: error freeing pMem_evals_of_new_entities");
+	RTERROR(status, "cudaFree: error freeing pMem_evals_of_new_entities.\n");
 	status = cudaFree(tData.pMem_gpu_evals_of_runs);
-	RTERROR(status, "cudaFree: error freeing pMem_gpu_evals_of_runs");
+	RTERROR(status, "cudaFree: error freeing pMem_gpu_evals_of_runs.\n");
 	status = cudaFree(tData.pMem_prng_states);
-	RTERROR(status, "cudaFree: error freeing pMem_prng_states");
+	RTERROR(status, "cudaFree: error freeing pMem_prng_states.\n");
 #endif
 	delete KerConst_interintra;
 	delete KerConst_intracontrib;

diff --git a/test_cuda.sh b/test_cuda.sh
@@ -3,12 +3,37 @@
 
 current_dir=`pwd`
 script_dir=`dirname $0`
+CUDA_VERSION=`nvcc --version 2>/dev/null | grep release | awk '{ print $(NF-1) }' | sed "s/,//g"`
+if [[ $CUDA_VERSION != "" ]]; then
+	printf "Using Cuda %s\n" $CUDA_VERSION >&2
+else
+	if [[ $DEVICE == "CUDA" ]]; then
+		printf "Error: nvcc command does not exist/is not working properly.\n" >&2
+	fi
+	exit 1
+fi
+TARGETS_SUPPORTED=`nvcc --list-gpu-arch 2>/dev/null`
+if [[ $TARGETS_SUPPORTED == "" ]]; then # might be an older Cuda version that doesn't have the --list-gpu-arch option
+	TARGETS_SUPPORTED=`nvcc --help | grep -oP "compute_\d+" | sort -u`
+fi
+if [[ "$4" != "" ]]; then
+	for T in $4; do
+		SUPPORTED=`grep -o $T <<< $TARGETS_SUPPORTED`
+		if [[ $SUPPORTED == "" ]]; then
+			printf "Error: Specified compute target <$T> not supported by installed Cuda version.\n" >&2
+			exit 1
+		fi
+	done
+	TARGETS="$4"
+else
+	TARGETS=`awk -F'_' '{ if(\$2>50) print \$2 }' <<< "$TARGETS_SUPPORTED" | tr "\n" " "`
+fi
+printf "Compiling for targets: %s\n" "$TARGETS" >&2
 cd "$script_dir"
 if [[ ! -f "test_cuda" ]]; then
 	$1 -I$2 -L$3 -lcuda -lcudart -o test_cuda test_cuda.cpp &> /dev/null
-	test -e test_cuda && echo yes || echo no
+	test -e test_cuda && echo $TARGETS
 else
-	test -e test_cuda && echo yes || echo no
+	test -e test_cuda && echo $TARGETS
 fi
 cd "$current_dir"
-