diff --git a/Makefile b/Makefile index 3e44cdc1..3b41fa03 100755 --- a/Makefile +++ b/Makefile @@ -27,7 +27,11 @@ OVERLAP = ON ifeq ($(DEVICE), $(filter $(DEVICE),GPU CUDA)) -TARGETS_SUPPORTED := $(shell ./test_cuda.sh nvcc "$(GPU_INCLUDE_PATH)" "$(GPU_LIBRARY_PATH)" "$(TARGETS)" "$(DEVICE)") +MIN_COMPUTE:=50 +ifeq ($(TENSOR), ON) +MIN_COMPUTE:=80 +endif +TARGETS_SUPPORTED := $(shell ./test_cuda.sh nvcc "$(GPU_INCLUDE_PATH)" "$(GPU_LIBRARY_PATH)" "$(TARGETS)" "$(MIN_COMPUTE)") # if user specifies DEVICE=GPU the test result determines wether CUDA will be used or not ifeq ($(TARGETS_SUPPORTED),) ifeq ($(DEVICE),CUDA) diff --git a/host/src/performdocking.cpp b/host/src/performdocking.cpp index 2375386c..a6e3d661 100644 --- a/host/src/performdocking.cpp +++ b/host/src/performdocking.cpp @@ -450,6 +450,13 @@ void setup_gpu_for_docking( cudaDeviceProp props; RTERROR(cudaGetDevice(&(cData.devnum)),"ERROR in cudaGetDevice:"); RTERROR(cudaGetDeviceProperties(&props,cData.devnum),"ERROR in cudaGetDeviceProperties:"); +#ifdef USE_NVTENSOR + if(props.major < 8){ + printf("Error: Compute capability 8.0 or higher is needed for tensor core sum reductions.\n"); + printf(" Available device %s has compute capability %d.%d.\n", props.name, props.major, props.minor); + exit(-1); + } +#endif tData.device_name = (char*) malloc(strlen(props.name)+32); // make sure array is large enough to hold device number text too strcpy(tData.device_name, props.name); if(gpuCount>1) snprintf(&tData.device_name[strlen(props.name)], strlen(props.name)+32, " (#%d / %d)",cData.devnum+1,gpuCount); diff --git a/test_cuda.sh b/test_cuda.sh index 12fb3d86..e39cceb0 100755 --- a/test_cuda.sh +++ b/test_cuda.sh @@ -26,7 +26,7 @@ if [[ "$4" != "" ]]; then done TARGETS="$4" else - TARGETS=`awk -F'_' '{ if(\$2>50) print \$2 }' <<< "$TARGETS_SUPPORTED" | tr "\n" " "` + TARGETS=`awk -F'_' "{ if(\\$2>=$5) print \\$2 }" <<< "$TARGETS_SUPPORTED" | tr "\n" " "` fi printf "Compiling for targets: %s\n" "$TARGETS" >&2 cd "$script_dir"