From e11ebcfb792570efa96078880c0dc2d369e4c418 Mon Sep 17 00:00:00 2001 From: Sam Stoelinga Date: Sat, 2 Nov 2024 08:56:03 -0700 Subject: [PATCH] run make manifests --- manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml b/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml index 5c595d7d..dd2ef872 100644 --- a/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml +++ b/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml @@ -1,3 +1,4 @@ +# Source: models/templates/models.yaml apiVersion: kubeai.org/v1 kind: Model metadata: @@ -13,5 +14,4 @@ spec: - --enable-prefix-caching - --disable-log-requests targetRequests: 50 - minReplicas: 1 - resourceProfile: nvidia-gpu-gh200:1 \ No newline at end of file + resourceProfile: nvidia-gpu-gh200:1