Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:ccsb-scripps/AutoDock-GPU into t…
Browse files Browse the repository at this point in the history
…ensorcores
  • Loading branch information
atillack committed Jul 25, 2024
2 parents d3b3299 + 5aa1a3e commit b8e5512
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 84 deletions.
27 changes: 22 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,25 @@
# DEVICE=OCLGPU
# ------------------------------------------------------
# Choose OpenCL device
# Valid values: CPU, GPU, CUDA, OCLGPU
# Valid values: CPU, GPU, CUDA, OCLGPU, OPENCL

OVERLAP = ON

ifeq ($(DEVICE), $(filter $(DEVICE),GPU CUDA))
TEST_CUDA := $(shell ./test_cuda.sh nvcc "$(GPU_INCLUDE_PATH)" "$(GPU_LIBRARY_PATH)")
# if user specifies DEVICE=CUDA it will be used (wether the test succeeds or not)
TARGETS_SUPPORTED := $(shell ./test_cuda.sh nvcc "$(GPU_INCLUDE_PATH)" "$(GPU_LIBRARY_PATH)" "$(TARGETS)" "$(DEVICE)")
# if user specifies DEVICE=GPU the test result determines wether CUDA will be used or not
ifeq ($(DEVICE)$(TEST_CUDA),GPUyes)
ifeq ($(TARGETS_SUPPORTED),)
ifeq ($(DEVICE),CUDA)
$(error Cuda verification failed)
else
$(info Cuda is not available, using OpenCL)
$(info )
override DEVICE:=GPU
export
endif
else
override TARGETS:=$(TARGETS_SUPPORTED)
export
override DEVICE:=CUDA
endif
endif
Expand All @@ -37,9 +49,14 @@ override DEVICE:=GPU
export
include Makefile.Cuda
else
ifeq ($(DEVICE),OCLGPU)
ifeq ($(DEVICE),$(filter $(DEVICE),OCLGPU OPENCL))
override DEVICE:=GPU
export
$(info Using OpenCL)
$(info )
endif
$(info Please make sure to set environment variables)
$(info GPU_INCLUDE_PATH and GPU_LIBRARY_PATH)
$(info )
include Makefile.OpenCL
endif
24 changes: 3 additions & 21 deletions Makefile.Cuda
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ else
NWI=-DN16WI
TARGET:=$(TARGET)_16wi
else ifeq ($(DEVICE), GPU)
NWI=-DN64WI
TARGET:=$(TARGET)_64wi
NWI=-DN128WI
TARGET:=$(TARGET)_128wi
endif
endif

Expand Down Expand Up @@ -215,7 +215,7 @@ NRUN := 100
NGEN := 27000
POPSIZE := 150
TESTNAME := test
TESTLS := sw
TESTLS := ad

test: odock
$(BIN_DIR)/$(TARGET) \
Expand All @@ -228,22 +228,4 @@ test: odock
-gfpop 0 \
-lsmet $(TESTLS)

ASTEX_PDB := 2bsm
ASTEX_NRUN:= 10
ASTEX_POPSIZE := 10
ASTEX_TESTNAME := test_astex
ASTEX_LS := sw

astex: odock
$(BIN_DIR)/$(TARGET) \
-ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld \
-lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt \
-nrun $(ASTEX_NRUN) \
-psize $(ASTEX_POPSIZE) \
-resnam $(ASTEX_TESTNAME) \
-gfpop 1 \
-lsmet $(ASTEX_LS)

# $(BIN_DIR)/$(TARGET) -ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld -lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt -nrun $(ASTEX_NRUN) -psize $(ASTEX_POPSIZE) -resnam $(ASTEX_TESTNAME) -gfpop 1 | tee ./input_tsri/search-set-astex/intrapairs/$(ASTEX_PDB)_intrapair.txt

.PHONY: clean
24 changes: 3 additions & 21 deletions Makefile.OpenCL
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,8 @@ else
NWI=-DN16WI
TARGET:=$(TARGET)_16wi
else ifeq ($(DEVICE), GPU)
NWI=-DN64WI
TARGET:=$(TARGET)_64wi
NWI=-DN128WI
TARGET:=$(TARGET)_128wi
endif
endif

Expand Down Expand Up @@ -276,7 +276,7 @@ NRUN := 100
NGEN := 27000
POPSIZE := 150
TESTNAME := test
TESTLS := sw
TESTLS := ad

test: odock
$(BIN_DIR)/$(TARGET) \
Expand All @@ -289,22 +289,4 @@ test: odock
-gfpop 0 \
-lsmet $(TESTLS)

ASTEX_PDB := 2bsm
ASTEX_NRUN:= 10
ASTEX_POPSIZE := 10
ASTEX_TESTNAME := test_astex
ASTEX_LS := sw

astex: odock
$(BIN_DIR)/$(TARGET) \
-ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld \
-lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt \
-nrun $(ASTEX_NRUN) \
-psize $(ASTEX_POPSIZE) \
-resnam $(ASTEX_TESTNAME) \
-gfpop 1 \
-lsmet $(ASTEX_LS)

# $(BIN_DIR)/$(TARGET) -ffile ./input_tsri/search-set-astex/$(ASTEX_PDB)/protein.maps.fld -lfile ./input_tsri/search-set-astex/$(ASTEX_PDB)/flex-xray.pdbqt -nrun $(ASTEX_NRUN) -psize $(ASTEX_POPSIZE) -resnam $(ASTEX_TESTNAME) -gfpop 1 | tee ./input_tsri/search-set-astex/intrapairs/$(ASTEX_PDB)_intrapair.txt

.PHONY: clean
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ See [more relevant papers](https://github.com/ccsb-scripps/AutoDock-GPU/wiki/Pub

| Operating system | CPU | GPU |
|:----------------------------------------:|:----------------------------:|:----------------------------------------------:|
|CentOS 6.7 & 6.8 / Ubuntu 14.04 & 16.04 | Intel SDK for OpenCL 2017 | AMD APP SDK v3.0 / CUDA 9, 10, and 11 |
|CentOS 6.7 & 6.8 / Ubuntu 14.04 & 16.04 | Intel SDK for OpenCL 2017 | OpenCL / CUDA >= 11 |
|macOS Catalina 10.15.1 | Apple / Intel | Apple / Intel Iris, Radeon Vega 64, Radeon VII |


Other environments or configurations likely work as well, but are untested.
Other environments or configurations likely work as well, but are untested. AutoDock-GPU since commit 846dc2b requires a C++17-capable compiler, which in practice means GCC >= 9. This also means the minimum version supported for Cuda-compilation is Cuda 11, however, since all versions of Cuda also come with OpenCL older versions can still be used using the OpenCL code path (`DEVICE=OCLGPU`).

# Compilation

Expand All @@ -45,7 +45,7 @@ make DEVICE=<TYPE> NUMWI=<NWI>

| Parameters | Description | Values |
|:----------:|:----------------------------:|:--------------------------------------------------:|
| `<TYPE>` | Accelerator chosen | `CPU`, `GPU`, `CUDA`, `OCLGPU` |
| `<TYPE>` | Accelerator chosen | `CPU`, `GPU`, `CUDA`, `OCLGPU`, `OPENCL` |
| `<NWI>` | work-group/thread block size | `1`, `2`, `4`, `8`, `16`, `32`, `64`, `128`, `256` |

When `DEVICE=GPU` is chosen, the Makefile will automatically tests if it can compile Cuda succesfully. To override, use `DEVICE=CUDA` or `DEVICE=OCLGPU`. The cpu target is only supported using OpenCL. Furthermore, an OpenMP-enabled overlapped pipeline (for setup and processing) can be compiled with `OVERLAP=ON`.
Expand Down
18 changes: 6 additions & 12 deletions host/src/getparameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,8 @@ int parse_dpf(
if(mypars->ligandfile) free(mypars->ligandfile);
if(strincmp(argstr,"empty",5) != 0){
if(check_path && !has_absolute_path(argstr)){
len = strlen(argstr);
mypars->ligandfile = (char*)malloc((dpf_path.size()+len+1)*sizeof(char));
mypars->ligandfile[dpf_path.size()] = '\0'; // make sure first part to copy is terminated
strncat(strncpy(mypars->ligandfile, dpf_path.c_str(), dpf_path.size()), argstr, len);
mypars->ligandfile = (char*)malloc((dpf_path.size()+strlen(argstr)+1)*sizeof(char));
strcat(strcpy(mypars->ligandfile, dpf_path.c_str()), argstr);
} else mypars->ligandfile = strdup(argstr);
}
}
Expand All @@ -211,10 +209,8 @@ int parse_dpf(
sscanf(line.c_str(),"%*s %255s",argstr);
if(mypars->flexresfile) free(mypars->flexresfile);
if(check_path && !has_absolute_path(argstr)){
len = strlen(argstr);
mypars->flexresfile = (char*)malloc((dpf_path.size()+len+1)*sizeof(char));
mypars->flexresfile[dpf_path.size()] = '\0'; // make sure first part to copy is terminated
strncat(strncpy(mypars->flexresfile, dpf_path.c_str(), dpf_path.size()), argstr, len);
mypars->flexresfile = (char*)malloc((dpf_path.size()+strlen(argstr)+1)*sizeof(char));
strcat(strcpy(mypars->flexresfile, dpf_path.c_str()), argstr);
} else mypars->flexresfile = strdup(argstr);
}
break;
Expand All @@ -224,10 +220,8 @@ int parse_dpf(
// Add the .fld file
if(mypars->fldfile) free(mypars->fldfile);
if(check_path && !has_absolute_path(argstr)){
len = strlen(argstr);
mypars->fldfile = (char*)malloc((dpf_path.size()+len+1)*sizeof(char));
mypars->fldfile[dpf_path.size()] = '\0'; // make sure first part to copy is terminated
strncat(strncpy(mypars->fldfile, dpf_path.c_str(), dpf_path.size()), argstr, len);
mypars->fldfile = (char*)malloc((dpf_path.size()+strlen(argstr)+1)*sizeof(char));
strcat(strcpy(mypars->fldfile, dpf_path.c_str()), argstr);
} else mypars->fldfile = strdup(argstr); // this allows using the dpf to set up all parameters but the ligand
// Filling mygrid according to the specified fld file
if (get_gridinfo(mypars->fldfile, mygrid) != 0)
Expand Down
38 changes: 19 additions & 19 deletions host/src/performdocking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,11 +403,11 @@ std::vector<int> get_gpu_pool()
int gpuCount=0;
cudaError_t status;
status = cudaGetDeviceCount(&gpuCount);
RTERROR(status, "cudaGetDeviceCount failed");
RTERROR(status, "ERROR in cudaGetDeviceCount:");
std::vector<int> result;
cudaDeviceProp props;
for(unsigned int i=0; i<gpuCount; i++){
RTERROR(cudaGetDeviceProperties(&props,i),"cudaGetDeviceProperties failed");
RTERROR(cudaGetDeviceProperties(&props,i),"ERROR in cudaGetDeviceProperties:");
if(props.major>=3) result.push_back(i);
}
if (result.size() == 0)
Expand All @@ -430,7 +430,7 @@ void setup_gpu_for_docking(
// Initialize CUDA
int gpuCount=0;
cudaError_t status = cudaGetDeviceCount(&gpuCount);
RTERROR(status, "cudaGetDeviceCount failed");
RTERROR(status, "ERROR in cudaGetDeviceCount:");
if (gpuCount == 0)
{
printf("No CUDA-capable devices found, exiting.\n");
Expand All @@ -448,19 +448,19 @@ void setup_gpu_for_docking(
// Now that we have a device, gather some information
size_t freemem, totalmem;
cudaDeviceProp props;
RTERROR(cudaGetDevice(&(cData.devnum)),"cudaGetDevice failed");
RTERROR(cudaGetDeviceProperties(&props,cData.devnum),"cudaGetDeviceProperties failed");
RTERROR(cudaGetDevice(&(cData.devnum)),"ERROR in cudaGetDevice:");
RTERROR(cudaGetDeviceProperties(&props,cData.devnum),"ERROR in cudaGetDeviceProperties:");
tData.device_name = (char*) malloc(strlen(props.name)+32); // make sure array is large enough to hold device number text too
strcpy(tData.device_name, props.name);
if(gpuCount>1) snprintf(&tData.device_name[strlen(props.name)], strlen(props.name)+32, " (#%d / %d)",cData.devnum+1,gpuCount);
printf("Cuda device: %s\n",tData.device_name);
RTERROR(cudaMemGetInfo(&freemem,&totalmem), "cudaGetMemInfo failed");
RTERROR(cudaMemGetInfo(&freemem,&totalmem), "ERROR in cudaGetMemInfo:");
printf("Available memory on device: %lu MB (total: %lu MB)\n",(freemem>>20),(totalmem>>20));
cData.devid=cData.devnum;
cData.devnum=-2;
#ifdef SET_CUDA_PRINTF_BUFFER
status = cudaDeviceSetLimit(cudaLimitPrintfFifoSize, 200000000ull);
RTERROR(status, "cudaDeviceSetLimit failed");
RTERROR(status, "ERROR in cudaDeviceSetLimit:");
#endif
auto const t1 = std::chrono::steady_clock::now();
printf("\nCUDA Setup time %fs\n", elapsed_seconds(t0 ,t1));
Expand Down Expand Up @@ -512,16 +512,16 @@ void finish_gpu_from_docking(
status = cudaFree(cData.pKerconst_conform);
RTERROR(status, "cudaFree: error freeing cData.pKerconst_conform\n");
status = cudaFree(cData.pMem_rotbonds_const);
RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_const");
RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_const\n");
status = cudaFree(cData.pMem_rotbonds_atoms_const);
RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_atoms_const");
RTERROR(status, "cudaFree: error freeing cData.pMem_rotbonds_atoms_const\n");
status = cudaFree(cData.pMem_num_rotating_atoms_per_rotbond_const);
RTERROR(status, "cudaFree: error freeing cData.pMem_num_rotating_atoms_per_rotbond_const");
RTERROR(status, "cudaFree: error freeing cData.pMem_num_rotating_atoms_per_rotbond_const\n");

// Non-constant
if(tData.pMem_fgrids){
status = cudaFree(tData.pMem_fgrids);
RTERROR(status, "cudaFree: error freeing pMem_fgrids");
RTERROR(status, "cudaFree: error freeing pMem_fgrids\n");
}
free(tData.device_name);
}
Expand Down Expand Up @@ -1342,7 +1342,7 @@ parameters argc and argv:
#endif
#ifdef USE_CUDA
status = cudaMemcpy(sim_state.cpu_energies.data(), pMem_energies_current, size_energies, cudaMemcpyDeviceToHost);
RTERROR(status, "cudaMemcpy: couldn't download pMem_energies_current");
RTERROR(status, "cudaMemcpy: couldn't download pMem_energies_current.\n");
#endif
if (autostop.check_if_satisfactory(generation_cnt, sim_state.cpu_energies.data(), total_evals))
if (total_evals>min_as_evals)
Expand Down Expand Up @@ -1643,19 +1643,19 @@ parameters argc and argv:
#endif
#ifdef USE_CUDA
status = cudaFree(tData.pMem_conformations1);
RTERROR(status, "cudaFree: error freeing pMem_conformations1");
RTERROR(status, "cudaFree: error freeing pMem_conformations1.\n");
status = cudaFree(tData.pMem_conformations2);
RTERROR(status, "cudaFree: error freeing pMem_conformations2");
RTERROR(status, "cudaFree: error freeing pMem_conformations2.\n");
status = cudaFree(tData.pMem_energies1);
RTERROR(status, "cudaFree: error freeing pMem_energies1");
RTERROR(status, "cudaFree: error freeing pMem_energies1.\n");
status = cudaFree(tData.pMem_energies2);
RTERROR(status, "cudaFree: error freeing pMem_energies2");
RTERROR(status, "cudaFree: error freeing pMem_energies2.\n");
status = cudaFree(tData.pMem_evals_of_new_entities);
RTERROR(status, "cudaFree: error freeing pMem_evals_of_new_entities");
RTERROR(status, "cudaFree: error freeing pMem_evals_of_new_entities.\n");
status = cudaFree(tData.pMem_gpu_evals_of_runs);
RTERROR(status, "cudaFree: error freeing pMem_gpu_evals_of_runs");
RTERROR(status, "cudaFree: error freeing pMem_gpu_evals_of_runs.\n");
status = cudaFree(tData.pMem_prng_states);
RTERROR(status, "cudaFree: error freeing pMem_prng_states");
RTERROR(status, "cudaFree: error freeing pMem_prng_states.\n");
#endif
delete KerConst_interintra;
delete KerConst_intracontrib;
Expand Down
31 changes: 28 additions & 3 deletions test_cuda.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,37 @@

current_dir=`pwd`
script_dir=`dirname $0`
CUDA_VERSION=`nvcc --version 2>/dev/null | grep release | awk '{ print $(NF-1) }' | sed "s/,//g"`
if [[ $CUDA_VERSION != "" ]]; then
printf "Using Cuda %s\n" $CUDA_VERSION >&2
else
if [[ $DEVICE == "CUDA" ]]; then
printf "Error: nvcc command does not exist/is not working properly.\n" >&2
fi
exit 1
fi
TARGETS_SUPPORTED=`nvcc --list-gpu-arch 2>/dev/null`
if [[ $TARGETS_SUPPORTED == "" ]]; then # might be an older Cuda version that doesn't have the --list-gpu-arch option
TARGETS_SUPPORTED=`nvcc --help | grep -oP "compute_\d+" | sort -u`
fi
if [[ "$4" != "" ]]; then
for T in $4; do
SUPPORTED=`grep -o $T <<< $TARGETS_SUPPORTED`
if [[ $SUPPORTED == "" ]]; then
printf "Error: Specified compute target <$T> not supported by installed Cuda version.\n" >&2
exit 1
fi
done
TARGETS="$4"
else
TARGETS=`awk -F'_' '{ if(\$2>50) print \$2 }' <<< "$TARGETS_SUPPORTED" | tr "\n" " "`
fi
printf "Compiling for targets: %s\n" "$TARGETS" >&2
cd "$script_dir"
if [[ ! -f "test_cuda" ]]; then
$1 -I$2 -L$3 -lcuda -lcudart -o test_cuda test_cuda.cpp &> /dev/null
test -e test_cuda && echo yes || echo no
test -e test_cuda && echo $TARGETS
else
test -e test_cuda && echo yes || echo no
test -e test_cuda && echo $TARGETS
fi
cd "$current_dir"

0 comments on commit b8e5512

Please sign in to comment.