From c26aeccd90622745d60906850e4cf5968038a2b4 Mon Sep 17 00:00:00 2001 From: Dan Webb Date: Tue, 10 Dec 2024 15:49:12 +0000 Subject: [PATCH] Add GPU support Signed-off-by: Dan Webb --- CHANGELOG.md | 2 ++ documentation/docker_container.md | 24 ++++++++++++++++++++++++ resources/container.rb | 14 +++++++++++--- 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb39adb12..30dd423c2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Added GPU support for the `docker_container` resource + ## 11.6.1 - *2024-12-10* ## 11.6.0 - *2024-12-03* diff --git a/documentation/docker_container.md b/documentation/docker_container.md index 219671b74..17cf438f3 100644 --- a/documentation/docker_container.md +++ b/documentation/docker_container.md @@ -42,6 +42,8 @@ Most `docker_container` properties are the `snake_case` version of the `CamelCas - `env_file` - Read environment variables from a file and set in the container. Accepts an Array or String to the file location. lazy evaluator must be set if the file passed is created by Chef. - `extra_hosts` - An array of hosts to add to the container's `/etc/hosts` in the form `['host_a:10.9.8.7', 'host_b:10.9.8.6']` - `force` - A boolean to use in container operations that support a `force` option. Defaults to `false` +- `gpus` - GPU devices to add to the container. Use 'all' to pass all GPUs to the container. +- `gpu_driver` - GPU driver to use for container. Defaults to 'nvidia'. - `health_check` - A hash containing the health check options - [healthcheck reference](https://docs.docker.com/engine/reference/run/#healthcheck) - `host` - A string containing the host the API should communicate with. Defaults to ENV['DOCKER_HOST'] if set - `host_name` - The hostname for the container. @@ -521,3 +523,25 @@ docker_container 'health_check' do action :run end ``` + +### Run a container with GPU support + +```ruby +# Using default NVIDIA driver +docker_container 'gpu_container' do + repo 'nvidia/cuda' + tag 'latest' + command 'nvidia-smi' + gpus 'all' + action :run_if_missing +end + +# Using a custom GPU driver +docker_container 'custom_gpu_container' do + repo 'custom/gpu-image' + tag 'latest' + gpus 'all' + gpu_driver 'custom_driver' + action :run_if_missing +end +``` diff --git a/resources/container.rb b/resources/container.rb index a145d80f0..d6484d177 100644 --- a/resources/container.rb +++ b/resources/container.rb @@ -77,6 +77,8 @@ property :volumes_from, [String, Array], coerce: proc { |v| v.nil? ? nil : Array(v) } property :volume_driver, String property :working_dir, String +property :gpus, [String, nil], description: 'GPU devices to add to the container (e.g., all or device=0)' +property :gpu_driver, String, default: 'nvidia', description: 'GPU driver to use for container (e.g., nvidia)' # Used to store the bind property since binds is an alias to volumes property :volumes_binds, Array, coerce: proc { |v| v.sort } @@ -326,11 +328,11 @@ def coerce_port_bindings(v) # # If you say: `image 'repo/blah'` # Repo will be: `repo/blah` -# Tag will be: `latest` +# Tag will be: `latest' # # If you say: `image 'repo/blah:3.1'` # Repo will be: `repo/blah` -# Tag will be: `3.1` +# Tag will be: `3.1' # # If you say: `image 'repo:1337/blah'` # Repo will be: `repo:1337/blah` @@ -338,7 +340,7 @@ def coerce_port_bindings(v) # # If you say: `image 'repo:1337/blah:3.1'` # Repo will be: `repo:1337/blah` -# Tag will be: `3.1` +# Tag will be: `3.1' # def image(image = nil) if image @@ -544,6 +546,12 @@ def load_container_labels # Store the state of the options and create the container new_resource.create_options = config + config['HostConfig']['DeviceRequests'] = [{ + 'Driver' => new_resource.gpu_driver, + 'Count' => -1, # -1 means no limit + 'Capabilities' => [['gpu']], + }] if new_resource.gpus == 'all' + Docker::Container.create(config, connection) end end