ministryofjustice · AntFMoJ · May 31, 2024 · May 31, 2024
@@ -1,6 +1,6 @@
-# resource "kubernetes_manifest" "nvidia_device_plugin" {
-#   manifest = yamldecode(file("src/kubernetes/nvidia-device-plugin.yml"))
-# }
+resource "kubernetes_manifest" "nvidia_device_plugin" {
+  manifest = yamldecode(file("src/kubernetes/nvidia-device-plugin.yml"))
+}
 
 resource "kubernetes_manifest" "nvidia_gpu_slicing" {
   manifest = yamldecode(file("src/kubernetes/nvidia-gpu-slicing.yml"))

@@ -1,136 +1,136 @@
-# # Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
-# #
-# # Licensed under the Apache License, Version 2.0 (the "License");
-# # you may not use this file except in compliance with the License.
-# # You may obtain a copy of the License at
-# #
-# #     http://www.apache.org/licenses/LICENSE-2.0
-# #
-# # Unless required by applicable law or agreed to in writing, software
-# # distributed under the License is distributed on an "AS IS" BASIS,
-# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# # See the License for the specific language governing permissions and
-# # limitations under the License.
-# ---
-# apiVersion: apps/v1
-# kind: DaemonSet
-# metadata:
-#   name: nvidia-device-plugin-daemonset
-#   namespace: kube-system
-# spec:
-#   selector:
-#     matchLabels:
-#       name: nvidia-device-plugin-ds
-#   updateStrategy:
-#     type: RollingUpdate
-#   template:
-#     metadata:
-#       labels:
-#         name: nvidia-device-plugin-ds
-#     spec:
-#       # @ministryofjustice/analytical-platform: We added this nodeSelector
-#       nodeSelector:
-#         gpu-compute: "true"
-#       tolerations:
-#         - key: nvidia.com/gpu
-#           operator: Exists
-#           effect: NoSchedule
-#         # @ministryofjustice/analytical-platform: We added this toleration
-#         - key: gpu-compute
-#           operator: Exists
-#           effect: NoSchedule
-#       # Mark this pod as a critical add-on; when enabled, the critical add-on
-#       # scheduler reserves resources for critical add-on pods so that they can
-#       # be rescheduled after a failure.
-#       # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
-#       priorityClassName: "system-node-critical"
-#       shareProcessNamespace: true
-#       serviceAccountName: nvidia-device-plugin-daemonset-service-account
-#       initContainers:
-#         - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
-#           name: nvidia-device-plugin-init
-#           command: ["config-manager"]
-#           env:
-#             - name: ONESHOT
-#               value: "true"
-#             - name: NODE_NAME
-#               valueFrom:
-#                 fieldRef:
-#                   fieldPath: "spec.nodeName"
-#             - name: NODE_LABEL
-#               value: "nvidia.com/device-plugin.config"
-#             - name: CONFIG_FILE_SRCDIR
-#               value: "/available-configs"
-#             - name: CONFIG_FILE_DST
-#               value: "/config/config.yaml"
-#             - name: FALLBACK_STRATEGIES
-#               value: named,single
-#             - name: SEND_SIGNAL
-#               value: "false"
-#           volumeMounts:
-#             - name: available-configs
-#               mountPath: /available-configs
-#             - name: config
-#               mountPath: /config
-#       containers:
-#         - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
-#           name: nvidia-device-plugin-sidecar
-#           command: ["config-manager"]
-#           env:
-#             - name: ONESHOT
-#               value: "false"
-#             - name: NODE_NAME
-#               valueFrom:
-#                 fieldRef:
-#                   fieldPath: "spec.nodeName"
-#             - name: NODE_LABEL
-#               value: "nvidia.com/device-plugin.config"
-#             - name: CONFIG_FILE_SRCDIR
-#               value: "/available-configs"
-#             - name: CONFIG_FILE_DST
-#               value: "/config/config.yaml"
-#             - name: FALLBACK_STRATEGIES
-#               value: named,single
-#             - name: SEND_SIGNAL
-#               value: "true"
-#             - name: SIGNAL
-#               value: "1" # SIGHUP
-#             - name: PROCESS_TO_SIGNAL
-#               value: "nvidia-device-plugin"
-#           volumeMounts:
-#             - name: available-configs
-#               mountPath: /available-configs
-#             - name: config
-#               mountPath: /config
-#           securityContext:
-#             allowPrivilegeEscalation: false
-#             capabilities:
-#               drop: ["ALL"]
-#         - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
-#           name: nvidia-device-plugin-ctr
-#           env:
-#             - name: FAIL_ON_INIT_ERROR
-#               value: "false"
-#             - name: CONFIG_FILE
-#               value: /config/config.yaml
-#           securityContext:
-#             allowPrivilegeEscalation: false
-#             capabilities:
-#               drop: ["ALL"]
-#           volumeMounts:
-#             - name: device-plugin
-#               mountPath: /var/lib/kubelet/device-plugins
-#             - name: available-configs
-#               mountPath: /available-configs
-#             - name: config
-#               mountPath: /config
-#       volumes:
-#         - name: device-plugin
-#           hostPath:
-#             path: /var/lib/kubelet/device-plugins
-#         - name: available-configs
-#           configMap:
-#             name: nvidia-device-plugin-daemonset
-#             defaultMode: 444
-#         - name: config
-#           emptyDir: {}
+# Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nvidia-device-plugin-daemonset
+  namespace: kube-system
+spec:
+  selector:
+    matchLabels:
+      name: nvidia-device-plugin-ds
+  updateStrategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        name: nvidia-device-plugin-ds
+    spec:
+      # @ministryofjustice/analytical-platform: We added this nodeSelector
+      nodeSelector:
+        gpu-compute: "true"
+      tolerations:
+        - key: nvidia.com/gpu
+          operator: Exists
+          effect: NoSchedule
+        # @ministryofjustice/analytical-platform: We added this toleration
+        - key: gpu-compute
+          operator: Exists
+          effect: NoSchedule
+      # Mark this pod as a critical add-on; when enabled, the critical add-on
+      # scheduler reserves resources for critical add-on pods so that they can
+      # be rescheduled after a failure.
+      # See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
+      priorityClassName: "system-node-critical"
+      shareProcessNamespace: true
+      serviceAccountName: nvidia-device-plugin-daemonset-service-account
+      initContainers:
+        - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
+          name: nvidia-device-plugin-init
+          command: ["config-manager"]
+          env:
+            - name: ONESHOT
+              value: "true"
+            - name: NODE_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: "spec.nodeName"
+            - name: NODE_LABEL
+              value: "nvidia.com/device-plugin.config"
+            - name: CONFIG_FILE_SRCDIR
+              value: "/available-configs"
+            - name: CONFIG_FILE_DST
+              value: "/config/config.yaml"
+            - name: FALLBACK_STRATEGIES
+              value: named,single
+            - name: SEND_SIGNAL
+              value: "false"
+          volumeMounts:
+            - name: available-configs
+              mountPath: /available-configs
+            - name: config
+              mountPath: /config
+      containers:
+        - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
+          name: nvidia-device-plugin-sidecar
+          command: ["config-manager"]
+          env:
+            - name: ONESHOT
+              value: "false"
+            - name: NODE_NAME
+              valueFrom:
+                fieldRef:
+                  fieldPath: "spec.nodeName"
+            - name: NODE_LABEL
+              value: "nvidia.com/device-plugin.config"
+            - name: CONFIG_FILE_SRCDIR
+              value: "/available-configs"
+            - name: CONFIG_FILE_DST
+              value: "/config/config.yaml"
+            - name: FALLBACK_STRATEGIES
+              value: named,single
+            - name: SEND_SIGNAL
+              value: "true"
+            - name: SIGNAL
+              value: "1" # SIGHUP
+            - name: PROCESS_TO_SIGNAL
+              value: "nvidia-device-plugin"
+          volumeMounts:
+            - name: available-configs
+              mountPath: /available-configs
+            - name: config
+              mountPath: /config
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop: ["ALL"]
+        - image: nvcr.io/nvidia/k8s-device-plugin:v0.15.0
+          name: nvidia-device-plugin-ctr
+          env:
+            - name: FAIL_ON_INIT_ERROR
+              value: "false"
+            - name: CONFIG_FILE
+              value: /config/config.yaml
+          securityContext:
+            allowPrivilegeEscalation: false
+            capabilities:
+              drop: ["ALL"]
+          volumeMounts:
+            - name: device-plugin
+              mountPath: /var/lib/kubelet/device-plugins
+            - name: available-configs
+              mountPath: /available-configs
+            - name: config
+              mountPath: /config
+      volumes:
+        - name: device-plugin
+          hostPath:
+            path: /var/lib/kubelet/device-plugins
+        - name: available-configs
+          configMap:
+            name: nvidia-device-plugin-daemonset
+            defaultMode: 444
+        - name: config
+          emptyDir: {}