From 620f8a4c75367812e34014491f4abb46c0ad68ee Mon Sep 17 00:00:00 2001 From: Akifumi Imanishi Date: Mon, 19 Aug 2024 14:54:21 +0900 Subject: [PATCH] Merge pull request #386 from kmaehashi/fix-cuda-header Install `nvidia-cuda-runtime-cu12` in verifier --- verifier/Dockerfile.debian | 1 + verifier/Dockerfile.el8 | 1 + verifier/Dockerfile.rhel | 1 + verifier/agent.py | 5 +++++ verifier/setup_cuda_runtime_headers.py | 31 ++++++++++++++++++++++++++ 5 files changed, 39 insertions(+) create mode 100644 verifier/setup_cuda_runtime_headers.py diff --git a/verifier/Dockerfile.debian b/verifier/Dockerfile.debian index 3ffa9fb6..233f7571 100644 --- a/verifier/Dockerfile.debian +++ b/verifier/Dockerfile.debian @@ -61,5 +61,6 @@ ENV LD_LIBRARY_PATH="/opt/rocm/lib:${LD_LIBRARY_PATH}" # Workaround for bug specific in ROCm 4.3 (https://github.com/cupy/cupy/issues/6605) ENV LLVM_PATH="/opt/rocm/llvm" +COPY setup_cuda_runtime_headers.py / COPY agent.py / ENTRYPOINT ["/agent.py"] diff --git a/verifier/Dockerfile.el8 b/verifier/Dockerfile.el8 index fa155187..ce7312e9 100644 --- a/verifier/Dockerfile.el8 +++ b/verifier/Dockerfile.el8 @@ -26,4 +26,5 @@ RUN [ -z "${system_packages}" ] || ( \ ENV HOME /tmp COPY agent.py / +COPY setup_cuda_runtime_headers.py / ENTRYPOINT ["/agent.py"] diff --git a/verifier/Dockerfile.rhel b/verifier/Dockerfile.rhel index 3fc2e6c6..4ad7209f 100644 --- a/verifier/Dockerfile.rhel +++ b/verifier/Dockerfile.rhel @@ -29,4 +29,5 @@ RUN [ -z "${system_packages}" ] || ( \ ENV HOME /tmp COPY agent.py / +COPY setup_cuda_runtime_headers.py / ENTRYPOINT ["/agent.py"] diff --git a/verifier/agent.py b/verifier/agent.py index 1485145d..b573be4b 100755 --- a/verifier/agent.py +++ b/verifier/agent.py @@ -58,6 +58,11 @@ def main(self): ] self._run(*cmdline) + self._log('Installing CUDA Runtime headers (if necessary)...') + verifier_dir = os.path.abspath(os.path.dirname(sys.argv[0])) + cmdline = pycommand + [f'{verifier_dir}/setup_cuda_runtime_headers.py'] + self._run(*cmdline) + # Importing CuPy should not be emit warnings, # Raise on warning to to catch bugs of preload warnings, e.g.: # https://github.com/cupy/cupy/pull/4933 diff --git a/verifier/setup_cuda_runtime_headers.py b/verifier/setup_cuda_runtime_headers.py new file mode 100644 index 00000000..1425cad0 --- /dev/null +++ b/verifier/setup_cuda_runtime_headers.py @@ -0,0 +1,31 @@ +import shlex +import subprocess +import sys + +import cupy + + +def main(): + if cupy.cuda.runtime.is_hip: + return + + # Only install if CUDA 12.2+. + (major, minor) = cupy.cuda.nvrtc.getVersion() + if major == 11: + return + elif major == 12: + if minor < 2: + return + else: + assert False, f'Unsupported CUDA version: {major}.{minor}' + + cmdline = [ + sys.executable, '-m', 'pip', 'install', '--user', + f'nvidia-cuda-runtime-cu{major}=={major}.{minor}.*', + ] + print(f'Running: {shlex.join(cmdline)}') + subprocess.run(cmdline, check=True) + + +if __name__ == '__main__': + main()