diff --git a/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml b/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml index 5c595d7d..dd2ef872 100644 --- a/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml +++ b/manifests/models/llama-3.1-70b-instruct-awq-int4-gh200.yaml @@ -1,3 +1,4 @@ +# Source: models/templates/models.yaml apiVersion: kubeai.org/v1 kind: Model metadata: @@ -13,5 +14,4 @@ spec: - --enable-prefix-caching - --disable-log-requests targetRequests: 50 - minReplicas: 1 - resourceProfile: nvidia-gpu-gh200:1 \ No newline at end of file + resourceProfile: nvidia-gpu-gh200:1