From fff7959fdd2044736d221fd9b92b881cb2683223 Mon Sep 17 00:00:00 2001
From: Luca Giorgi <lgiorgi@redhat.com>
Date: Fri, 10 May 2024 17:44:17 +0200
Subject: [PATCH 1/6] Add base test for vLLM and its metrics

Signed-off-by: Luca Giorgi <lgiorgi@redhat.com>
---
 .../Resources/CLI/ModelServing/llm.resource   |   5 +-
 .../Files/llm/vllm/download_model.yaml        |  74 +++++++++++
 .../tests/Resources/Files/llm/vllm/query.json |  13 ++
 .../llm/vllm/vllm-gpt2_inferenceservice.yaml  |  14 +++
 .../Files/llm/vllm/vllm_servingruntime.yaml   |  79 ++++++++++++
 .../426__model_serving_vllm_metrics.robot     | 119 ++++++++++++++++++
 6 files changed, 302 insertions(+), 2 deletions(-)
 create mode 100644 ods_ci/tests/Resources/Files/llm/vllm/download_model.yaml
 create mode 100644 ods_ci/tests/Resources/Files/llm/vllm/query.json
 create mode 100644 ods_ci/tests/Resources/Files/llm/vllm/vllm-gpt2_inferenceservice.yaml
 create mode 100644 ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
 create mode 100644 ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot

diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
index 2b502357d..48d1c935e 100644
--- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
+++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource
@@ -775,8 +775,9 @@ Get KServe Default Deployment Mode From DSC
     RETURN    ${mode}
 
 Start Port-forwarding
-    [Arguments]    ${namespace}    ${pod_name}    ${process_alias}=llm-query-process
-    ${process}=    Start Process    oc -n ${namespace} port-forward pod/${pod_name} 8033:8033
+    [Arguments]    ${namespace}    ${pod_name}    ${process_alias}=llm-query-process    ${local_port}=8033
+    ...    ${remote_port}=8033
+    ${process}=    Start Process    oc -n ${namespace} port-forward pod/${pod_name} ${local_port}:${remote_port}
     ...    alias=${process_alias}    stderr=STDOUT    shell=yes
     Process Should Be Running    ${process}
     sleep  7s
diff --git a/ods_ci/tests/Resources/Files/llm/vllm/download_model.yaml b/ods_ci/tests/Resources/Files/llm/vllm/download_model.yaml
new file mode 100644
index 000000000..c7851289c
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/llm/vllm/download_model.yaml
@@ -0,0 +1,74 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: vllm-gpt2
+---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: vlmm-gpt2-claim
+  namespace: vllm-gpt2
+spec:
+  accessModes:
+    - ReadWriteOnce
+  volumeMode: Filesystem
+  resources:
+    requests:
+      storage: 10Gi
+---
+apiVersion: v1
+kind: Pod
+metadata:
+  name: setup-gpt2-binary
+  namespace: vllm-gpt2
+  labels:
+    gpt-download-pod: 'true'
+spec:
+  volumes:
+    - name: model-volume
+      persistentVolumeClaim:
+        claimName: vlmm-gpt2-claim
+  restartPolicy: Never
+  initContainers:
+    - name: fix-volume-permissions
+      image: quay.io/quay/busybox:latest
+      imagePullPolicy: IfNotPresent
+      securityContext:
+        allowPrivilegeEscalation: true
+      resources:
+        requests:
+          memory: "64Mi"
+          cpu: "250m"
+          nvidia.com/gpu: "1"
+        limits:
+          memory: "128Mi"
+          cpu: "500m"
+          nvidia.com/gpu: "1"
+      command: ["sh"]
+      args: ["-c", "chown -R 1001:1001 /mnt/models"]
+      volumeMounts: 
+        - mountPath: "/mnt/models/"
+          name: model-volume
+  containers:
+    - name: download-model
+      image: registry.access.redhat.com/ubi9/python-311:latest
+      imagePullPolicy: IfNotPresent
+      securityContext:
+        allowPrivilegeEscalation: true
+      resources:
+        requests:
+          memory: "1Gi"
+          cpu: "1"
+          nvidia.com/gpu: "1"
+        limits:
+          memory: "1Gi"
+          cpu: "1"
+          nvidia.com/gpu: "1"
+      command: ["sh"]
+      args: [ "-c", "pip install --upgrade pip && pip install --upgrade huggingface_hub && python3 -c 'from huggingface_hub import snapshot_download\nsnapshot_download(repo_id=\"gpt2\", local_dir=\"/mnt/models/gpt2\", local_dir_use_symlinks=False)'"]
+      volumeMounts:
+        - mountPath: "/mnt/models/"
+          name: model-volume
+      env:
+        - name: TRANSFORMERS_CACHE
+          value: /tmp
\ No newline at end of file
diff --git a/ods_ci/tests/Resources/Files/llm/vllm/query.json b/ods_ci/tests/Resources/Files/llm/vllm/query.json
new file mode 100644
index 000000000..156795eda
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/llm/vllm/query.json
@@ -0,0 +1,13 @@
+{
+    "model": "gpt2",
+    "messages": [
+      {
+        "role": "system",
+        "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."
+      },
+      {
+        "role": "user",
+        "content": "Compose a poem that explains the concept of recursion in programming."
+      }
+    ]
+}
\ No newline at end of file
diff --git a/ods_ci/tests/Resources/Files/llm/vllm/vllm-gpt2_inferenceservice.yaml b/ods_ci/tests/Resources/Files/llm/vllm/vllm-gpt2_inferenceservice.yaml
new file mode 100644
index 000000000..6ef4bbda2
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/llm/vllm/vllm-gpt2_inferenceservice.yaml
@@ -0,0 +1,14 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata: 
+  name: vllm-gpt2-openai
+  namespace: vllm-gpt2
+  labels:
+    modelmesh-enabled: "true"
+spec: 
+  predictor: 
+    model: 
+      runtime: kserve-vllm
+      modelFormat: 
+        name: vLLM
+      storageUri: pvc://vlmm-gpt2-claim/
\ No newline at end of file
diff --git a/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml b/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
new file mode 100644
index 000000000..4d02cdcd0
--- /dev/null
+++ b/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
@@ -0,0 +1,79 @@
+apiVersion: serving.kserve.io/v1alpha1
+kind: ServingRuntime
+metadata: 
+  name: kserve-vllm
+  namespace: vllm-gpt2
+spec: 
+  annotations: 
+    sidecar.istio.io/inject: "true"
+    sidecar.istio.io/rewriteAppHTTPProbers: "true"
+    serving.knative.openshift.io/enablePassthrough: "true"
+    opendatahub.io/dashboard: "true"
+    openshift.io/display-name: "vLLLM Openai entry point"
+    serving.kserve.io/enable-prometheus-scraping: "false"
+    serving.kserve.io/enable-metric-aggregation: "true"
+    prometheus.io/port: '8080'
+    prometheus.io/path: "/metrics/"
+  multiModel: false
+  supportedModelFormats: 
+    - name: vLLM
+      autoSelect: true
+  containers: 
+    - name: kserve-container
+      #image: kserve/vllmserver:latest
+      image: quay.io/wxpe/tgis-vllm:release.74803b6
+      startupProbe: 
+        httpGet: 
+          port: 8080
+          path: /health
+        # Allow 12 minutes to start
+        failureThreshold: 24
+        periodSeconds: 30
+      readinessProbe: 
+        httpGet: 
+          port: 8080
+          path: /health
+        periodSeconds: 30
+        timeoutSeconds: 5
+      livenessProbe: 
+        httpGet: 
+          port: 8080
+          path: /health
+        periodSeconds: 100
+        timeoutSeconds: 8
+      terminationMessagePolicy: "FallbackToLogsOnError"
+      terminationGracePeriodSeconds: 120
+      args: 
+        - --port
+        - "8080"
+        - --model
+        - /mnt/models/gpt2
+        - --served-model-name
+        - "gpt2"
+      command: 
+        - python3
+        - -m
+        - vllm.entrypoints.openai.api_server
+      env: 
+        - name: STORAGE_URI
+          value: pvc://vlmm-gpt2-claim/
+        - name: HF_HUB_CACHE
+          value: /tmp
+        - name: TRANSFORMERS_CACHE
+          value: $(HF_HUB_CACHE)
+        - name: NUM_GPUS
+          value: "1"
+        - name: CUDA_VISIBLE_DEVICES
+          value: "0"   
+      ports: 
+        - containerPort: 8080
+          protocol: TCP
+      resources: 
+        limits: 
+          cpu: "4"
+          memory: 8Gi
+          nvidia.com/gpu: "1"
+        requests: 
+          cpu: "1"
+          memory: 4Gi
+          nvidia.com/gpu: "1"
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
new file mode 100644
index 000000000..85e701c76
--- /dev/null
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
@@ -0,0 +1,119 @@
+*** Settings ***
+Documentation     Basic vLLM deploy test to validate metrics being correctly exposed in OpenShift
+Resource          ../../../../../Resources/Page/ODH/ODHDashboard/ODHModelServing.resource
+Resource          ../../../../../Resources/OCP.resource
+Resource          ../../../../../Resources/Page/Operators/ISVs.resource
+Resource          ../../../../../Resources/Page/ODH/ODHDashboard/ODHDashboardAPI.resource
+Resource          ../../../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/ModelServer.resource
+Resource          ../../../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/DataConnections.resource
+Resource          ../../../../../Resources/CLI/ModelServing/llm.resource
+Resource          ../../../../../Resources/Page/ODH/ODHDashboard/ODHDataScienceProject/Permissions.resource
+Library           OpenShiftLibrary
+Suite Setup       Suite Setup
+Suite Teardown    Suite Teardown
+Test Tags         KServe
+
+
+*** Variables ***
+${VLLM_RESOURCES_DIRPATH}=    ods_ci/tests/Resources/Files/llm/vllm
+${DL_POD_FILEPATH}=           ${VLLM_RESOURCES_DIRPATH}/download_model.yaml
+${SR_FILEPATH}=               ${VLLM_RESOURCES_DIRPATH}/vllm_servingruntime.yaml
+${IS_FILEPATH}=               ${VLLM_RESOURCES_DIRPATH}/vllm-gpt2_inferenceservice.yaml
+${INFERENCE_INPUT}=           @${VLLM_RESOURCES_DIRPATH}/query.json
+${INFERENCE_URL}=             http://localhost:8080/v1/chat/completions
+${METRICS_URL}=               http://localhost:8080/metrics/
+${TEST_NS}=                   vllm-gpt2
+@{SEARCH_METRICS}=            vllm:cache_config_info
+...                           vllm:num_requests_running
+...                           vllm:num_requests_swapped
+...                           vllm:num_requests_waiting
+...                           vllm:gpu_cache_usage_perc
+...                           vllm:cpu_cache_usage_perc
+...                           vllm:prompt_tokens_total
+...                           vllm:generation_tokens_total
+...                           vllm:time_to_first_token_seconds_bucket
+...                           vllm:time_to_first_token_seconds_count
+...                           vllm:time_to_first_token_seconds_sum
+...                           vllm:time_per_output_token_seconds_bucket
+...                           vllm:time_per_output_token_seconds_count
+...                           vllm:time_per_output_token_seconds_sum
+...                           vllm:e2e_request_latency_seconds_bucket
+...                           vllm:e2e_request_latency_seconds_count
+...                           vllm:e2e_request_latency_seconds_sum
+...                           vllm:avg_prompt_throughput_toks_per_s
+...                           vllm:avg_generation_throughput_toks_per_s
+
+
+*** Test Cases ***
+Verify User Can Deploy A Model With Vllm Via CLI
+    [Documentation]    Deploy a model (gpt2) using the vllm runtime and confirm that it's running
+    [Tags]    Tier1    Sanity    Resources-GPU    ODS-XXX
+    ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${DL_POD_FILEPATH}
+    Should Be Equal As Integers    ${rc}    ${0}
+    Wait For Pods To Succeed    label_selector=gpt-download-pod=true    namespace=${TEST_NS}
+    ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${SR_FILEPATH}
+    Should Be Equal As Integers    ${rc}    ${0}
+    ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${IS_FILEPATH}
+    Should Be Equal As Integers    ${rc}    ${0}
+    Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=vllm-gpt2-openai
+    ...    namespace=${TEST_NS}
+    ${pod_name}=  Get Pod Name    namespace=${TEST_NS}
+    ...    label_selector=serving.kserve.io/inferenceservice=vllm-gpt2-openai
+    Start Port-forwarding    namespace=${TEST_NS}    pod_name=${pod_name}    local_port=8080   remote_port=8080
+    ${rc}    ${out}=    Run And Return Rc And Output
+    ...    curl -ks ${INFERENCE_URL} -H "Content-Type: application/json" -d ${INFERENCE_INPUT} | jq .
+    Should Be Equal As Integers    ${rc}    ${0}
+    Log    ${out}
+
+Verify Vllm Metrics Are Present
+    [Documentation]    Confirm vLLM metrics are exposed in OpenShift metrics
+    [Tags]    Tier1    Sanity    Resources-GPU    ODS-XXX
+    ${rc}    ${out}=    Run And Return Rc And Output
+    ...    curl -ks ${METRICS_URL}
+    Should Be Equal As Integers    ${rc}    ${0}
+    Log    ${out}
+    ${thanos_url}=    Get OpenShift Thanos URL
+    ${token}=    Generate Thanos Token
+    Metrics Should Exist In UserWorkloadMonitoring    ${thanos_url}    ${token}    ${SEARCH_METRICS}
+
+
+*** Keywords ***
+Suite Setup
+    Skip If Component Is Not Enabled    kserve
+    RHOSi Setup
+    Set Default Storage Class In GCP    default=ssd-csi
+    ${is_self_managed}=    Is RHODS Self-Managed
+    IF    ${is_self_managed}
+        Configure User Workload Monitoring
+        Enable User Workload Monitoring
+    END
+
+Suite Teardown
+    Set Default Storage Class In GCP    default=standard-csi
+    Terminate Process    llm-query-process    kill=true
+    ${rc}=    Run And Return Rc    oc delete inferenceservice -n ${TEST_NS} --all
+    Should Be Equal As Integers    ${rc}    ${0}
+    ${rc}=    Run And Return Rc    oc delete servingruntime -n ${TEST_NS} --all
+    Should Be Equal As Integers    ${rc}    ${0}
+    ${rc}=    Run And Return Rc    oc delete pod -n ${TEST_NS} --all
+    Should Be Equal As Integers    ${rc}    ${0}
+    ${rc}=    Run And Return Rc    oc delete namespace ${TEST_NS}
+    Should Be Equal As Integers    ${rc}    ${0}
+    RHOSi Teardown
+
+Set Default Storage Class In GCP
+    [Documentation]    If the storage class exists we can assume we are in GCP. We force ssd-csi to be the default class
+    ...    for the duration of this test suite.
+    [Arguments]    ${default}
+    ${rc}=    Run And Return Rc    oc get storageclass ${default}
+    IF    ${rc} == ${0}
+        IF    "${default}" == "ssd-csi"
+            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
+            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
+        ELSE
+            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
+            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
+        END
+    ELSE
+        Log    Proceeding with default storage class because we're not in GCP
+    END

From ddaea0648d25417bc87090ac89250e47bf67c24b Mon Sep 17 00:00:00 2001
From: Luca Giorgi <lgiorgi@redhat.com>
Date: Mon, 13 May 2024 14:42:52 +0200
Subject: [PATCH 2/6] Update
 ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml

Co-authored-by: Vedant Mahabaleshwarkar <vmahabal@redhat.com>
---
 ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml b/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
index 4d02cdcd0..15309d851 100644
--- a/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
+++ b/ods_ci/tests/Resources/Files/llm/vllm/vllm_servingruntime.yaml
@@ -10,8 +10,6 @@ spec:
     serving.knative.openshift.io/enablePassthrough: "true"
     opendatahub.io/dashboard: "true"
     openshift.io/display-name: "vLLLM Openai entry point"
-    serving.kserve.io/enable-prometheus-scraping: "false"
-    serving.kserve.io/enable-metric-aggregation: "true"
     prometheus.io/port: '8080'
     prometheus.io/path: "/metrics/"
   multiModel: false

From 787048ed5a277574c44e2aa78d34191989c2796f Mon Sep 17 00:00:00 2001
From: Luca Giorgi <lgiorgi@redhat.com>
Date: Tue, 14 May 2024 18:28:20 +0200
Subject: [PATCH 3/6] reimplement using common keywords

Signed-off-by: Luca Giorgi <lgiorgi@redhat.com>
---
 .../Files/llm/model_expected_responses.json   | 13 ++++++++++++
 .../Files/llm/runtime_query_formats.json      | 17 ++++++++++++++++
 .../426__model_serving_vllm_metrics.robot     | 20 +++++++++----------
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
index 996c2ed02..c38dfeb7b 100644
--- a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
+++ b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
@@ -146,6 +146,19 @@
                     }
                 }
             }
+        },
+        {
+            "query_text": "{'role': 'system','content': 'You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.'},{'role': 'user','content': 'Compose a poem that explains the concept of recursion in programming.'}",
+            "models": {
+                "gpt2": {
+                    "completion_tokens": 992,
+                    "response_text": "A friend of mine came over to the house to play with his wife. He was asleep, and he felt like he'd been hit by a speeding car. He's a big guy. He's like the kind of guy who may not have a very good head, but he's big enough to stand up at a table and read something. I was like, \"I'm going to play with this.\"\n\nThat's where I started playing with my car. It was something I never dreamed of doing, but I'd never imagined that it would be such a big deal.\n\nWe started playing with it. When I was about 12, we started playing with it to see how it would turn out. I was 26, and I was playing it for the first time for the first time ever. It was fun. I remember thinking it was like a different game than I had ever played before. I remember thinking the first time we played would be like, \"Oh my god, I've never played a game like this before before.\"\n\nIt was surreal. I was in my 20s at the time. We got to have a party in my house at the time. I was sitting in the living room with my friend, who's 28. We're from Dallas, and his wife is a pretty big girl. He's about 6 feet tall and 250 pounds. On the phone with his friend said, \"Dad, is it possible you'll be able to do this without your beard?\" I was like, \"Absolutely, actually.\" I thought, \"I'm going to do it.\"\n\nI finally did it and it turned out pretty well. I was able to take our photo with our friend, and he got excited and started laughing. He was like, \"That's awesome.\" I sat in his living room for two hours and made sure he was really excited. He was really excited. We ended up having a workshop and we have a lot of stuff to do.\n\nHe just started playing. It's been amazing. I'm like, \"It's going to be huge.\" At first I was like, \"Wow, my god that's amazing.\" I was like, \"Wow, my God that's awesome.\" He's like, \"I'm so excited about this!\" He was like, \"Oh my god, I can't wait to do it!\"\n\nHe had that awesome physique. He was super skinny. He was like, \"I'm so excited about it.\" He was like, \"Really?\" I was like, \"Yeah, I'm so excited! I'm so excited.\" We did it for two weeks and it turned out pretty well.\n\nHe's like, \"I hope it stays that way.\" I was like, \"I hope it stays that way.\" He was like, \"Oh my god, I've never even played with a computer before!\" I was like, \"Yeah, it's just fun to play with a computer.\" He was like, \"Oh my god, I can't wait to play with a computer!\" He was like, \"It's just a cool thing to do!\"\n\nI was doing it with my friend's dog, a puppy.\n\nI was doing it with my friend's dog. People said, \"You think that's cool?\" I said, \"Yeah, that's cool.\" We had the dog. He was a little bit shy and it was a little bit intimidating and scary.\n\nWe played it twice. It was like a game. He was like, \"Oh my God I've never played with a computer before!\" I was like, \"I hope it stays that way.\" He was like, \"Yeah, it's just a cool thing to do!\" He was like, \"Oh my god, I can't wait to do it!\"\n\nWe played it again on the bus, on the weekend.\n\nWe played it again on the weekend.\n\nThen we went to the store and bought a new Canon 5D Mark II.\n\nI couldn't believe what the customer was saying. I was like, \"That sounds amazing!\" He was like, \"That's amazing!\"\n\nHe was like, \"Wow! That's awesome!\" So we were like, \"Wow! That looks awesome!\" He's like, \"Yeah, that looks awesome!\" I was like, \"Wow! That looks awesome! That looks awesome!\"\n\nWe played it twice again.\n\nI was like, \"Wow! That sounds awesome!\" He was like, \"Wow! That sounds awesome! That sounds awesome!\" I was like, \"Wow! That looks awesome!\"\n\nHe was like, \"Wow! That sounds awesome! That looks awesome!\"\n\nI was just like, \"Wow! That looks awesome! That looks awesome!\" He was like",
+                    "streamed_response_text": "",
+                    "vllm": {
+                        "chat-completions_response_text": ""
+                    }
+                }
+            }
         }
     ],
     "model-info": {
diff --git a/ods_ci/tests/Resources/Files/llm/runtime_query_formats.json b/ods_ci/tests/Resources/Files/llm/runtime_query_formats.json
index 17c9c1ec5..f17a0db3f 100644
--- a/ods_ci/tests/Resources/Files/llm/runtime_query_formats.json
+++ b/ods_ci/tests/Resources/Files/llm/runtime_query_formats.json
@@ -96,5 +96,22 @@
             }
         },
         "containers": ["kserve-container"]
+    },
+    "vllm": {
+        "endpoints": {
+            "chat-completions": {
+                "http": {
+                    "endpoint": "v1/chat/completions",
+                    "header": "Content-Type:application/json",
+                    "body": "{'model': '${model_name}','messages': [${query_text}]}",
+                    "response_fields_map": {
+                        "response": "choices",
+                        "completion_tokens":  "completion_tokens",
+                        "response_text":    "content"
+                    }
+                }
+            }
+        },
+        "containers": ["kserve-container"]
     }
 }
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
index 85e701c76..35b0cc421 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
@@ -53,23 +53,19 @@ Verify User Can Deploy A Model With Vllm Via CLI
     Wait For Pods To Succeed    label_selector=gpt-download-pod=true    namespace=${TEST_NS}
     ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${SR_FILEPATH}
     Should Be Equal As Integers    ${rc}    ${0}
-    ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${IS_FILEPATH}
-    Should Be Equal As Integers    ${rc}    ${0}
+    Deploy Model Via CLI    ${IS_FILEPATH}    ${TEST_NS}
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=vllm-gpt2-openai
     ...    namespace=${TEST_NS}
-    ${pod_name}=  Get Pod Name    namespace=${TEST_NS}
-    ...    label_selector=serving.kserve.io/inferenceservice=vllm-gpt2-openai
-    Start Port-forwarding    namespace=${TEST_NS}    pod_name=${pod_name}    local_port=8080   remote_port=8080
-    ${rc}    ${out}=    Run And Return Rc And Output
-    ...    curl -ks ${INFERENCE_URL} -H "Content-Type: application/json" -d ${INFERENCE_INPUT} | jq .
-    Should Be Equal As Integers    ${rc}    ${0}
-    Log    ${out}
+    Query Model Multiple Times    model_name=gpt2    isvc_name=vllm-gpt2-openai    runtime=vllm    protocol=http
+    ...    inference_type=chat-completions    n_times=3    query_idx=8
+    ...    namespace=${TEST_NS}    string_check_only=${TRUE}
 
 Verify Vllm Metrics Are Present
     [Documentation]    Confirm vLLM metrics are exposed in OpenShift metrics
     [Tags]    Tier1    Sanity    Resources-GPU    ODS-XXX
+    ${host} =    llm.Get KServe Inference Host Via CLI    isvc_name=vllm-gpt2-openai    namespace=${TEST_NS}
     ${rc}    ${out}=    Run And Return Rc And Output
-    ...    curl -ks ${METRICS_URL}
+    ...    curl -ks ${host}/metrics/
     Should Be Equal As Integers    ${rc}    ${0}
     Log    ${out}
     ${thanos_url}=    Get OpenShift Thanos URL
@@ -86,11 +82,13 @@ Suite Setup
     IF    ${is_self_managed}
         Configure User Workload Monitoring
         Enable User Workload Monitoring
+        #TODO: Find reliable signal for UWM being ready
+        #Sleep    10m
     END
+    Load Expected Responses
 
 Suite Teardown
     Set Default Storage Class In GCP    default=standard-csi
-    Terminate Process    llm-query-process    kill=true
     ${rc}=    Run And Return Rc    oc delete inferenceservice -n ${TEST_NS} --all
     Should Be Equal As Integers    ${rc}    ${0}
     ${rc}=    Run And Return Rc    oc delete servingruntime -n ${TEST_NS} --all

From 4d813e2ea136f60d21f6ff995bab5dad9cd53c85 Mon Sep 17 00:00:00 2001
From: Luca Giorgi <lgiorgi@redhat.com>
Date: Wed, 15 May 2024 16:31:53 +0200
Subject: [PATCH 4/6] Better handle missing metrics from UWM, change expected
 response format

Signed-off-by: Luca Giorgi <lgiorgi@redhat.com>
---
 .../Resources/Files/llm/model_expected_responses.json  |  5 +----
 .../Resources/Page/ODH/Monitoring/Monitoring.resource  | 10 +++++++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
index c38dfeb7b..4616259fc 100644
--- a/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
+++ b/ods_ci/tests/Resources/Files/llm/model_expected_responses.json
@@ -151,11 +151,8 @@
             "query_text": "{'role': 'system','content': 'You are a poetic assistant, skilled in explaining complex programming concepts with creative flair.'},{'role': 'user','content': 'Compose a poem that explains the concept of recursion in programming.'}",
             "models": {
                 "gpt2": {
-                    "completion_tokens": 992,
-                    "response_text": "A friend of mine came over to the house to play with his wife. He was asleep, and he felt like he'd been hit by a speeding car. He's a big guy. He's like the kind of guy who may not have a very good head, but he's big enough to stand up at a table and read something. I was like, \"I'm going to play with this.\"\n\nThat's where I started playing with my car. It was something I never dreamed of doing, but I'd never imagined that it would be such a big deal.\n\nWe started playing with it. When I was about 12, we started playing with it to see how it would turn out. I was 26, and I was playing it for the first time for the first time ever. It was fun. I remember thinking it was like a different game than I had ever played before. I remember thinking the first time we played would be like, \"Oh my god, I've never played a game like this before before.\"\n\nIt was surreal. I was in my 20s at the time. We got to have a party in my house at the time. I was sitting in the living room with my friend, who's 28. We're from Dallas, and his wife is a pretty big girl. He's about 6 feet tall and 250 pounds. On the phone with his friend said, \"Dad, is it possible you'll be able to do this without your beard?\" I was like, \"Absolutely, actually.\" I thought, \"I'm going to do it.\"\n\nI finally did it and it turned out pretty well. I was able to take our photo with our friend, and he got excited and started laughing. He was like, \"That's awesome.\" I sat in his living room for two hours and made sure he was really excited. He was really excited. We ended up having a workshop and we have a lot of stuff to do.\n\nHe just started playing. It's been amazing. I'm like, \"It's going to be huge.\" At first I was like, \"Wow, my god that's amazing.\" I was like, \"Wow, my God that's awesome.\" He's like, \"I'm so excited about this!\" He was like, \"Oh my god, I can't wait to do it!\"\n\nHe had that awesome physique. He was super skinny. He was like, \"I'm so excited about it.\" He was like, \"Really?\" I was like, \"Yeah, I'm so excited! I'm so excited.\" We did it for two weeks and it turned out pretty well.\n\nHe's like, \"I hope it stays that way.\" I was like, \"I hope it stays that way.\" He was like, \"Oh my god, I've never even played with a computer before!\" I was like, \"Yeah, it's just fun to play with a computer.\" He was like, \"Oh my god, I can't wait to play with a computer!\" He was like, \"It's just a cool thing to do!\"\n\nI was doing it with my friend's dog, a puppy.\n\nI was doing it with my friend's dog. People said, \"You think that's cool?\" I said, \"Yeah, that's cool.\" We had the dog. He was a little bit shy and it was a little bit intimidating and scary.\n\nWe played it twice. It was like a game. He was like, \"Oh my God I've never played with a computer before!\" I was like, \"I hope it stays that way.\" He was like, \"Yeah, it's just a cool thing to do!\" He was like, \"Oh my god, I can't wait to do it!\"\n\nWe played it again on the bus, on the weekend.\n\nWe played it again on the weekend.\n\nThen we went to the store and bought a new Canon 5D Mark II.\n\nI couldn't believe what the customer was saying. I was like, \"That sounds amazing!\" He was like, \"That's amazing!\"\n\nHe was like, \"Wow! That's awesome!\" So we were like, \"Wow! That looks awesome!\" He's like, \"Yeah, that looks awesome!\" I was like, \"Wow! That looks awesome! That looks awesome!\"\n\nWe played it twice again.\n\nI was like, \"Wow! That sounds awesome!\" He was like, \"Wow! That sounds awesome! That sounds awesome!\" I was like, \"Wow! That looks awesome!\"\n\nHe was like, \"Wow! That sounds awesome! That looks awesome!\"\n\nI was just like, \"Wow! That looks awesome! That looks awesome!\" He was like",
-                    "streamed_response_text": "",
                     "vllm": {
-                        "chat-completions_response_text": ""
+                        "chat-completions_response_text": "A friend of mine came over to the house to play with his wife. He was asleep, and he felt like he'd been hit by a speeding car. He's a big guy. He's like the kind of guy who may not have a very good head, but he's big enough to stand up at a table and read something. I was like, \"I'm going to play with this.\"\n\nThat's where I started playing with my car. It was something I never dreamed of doing, but I'd never imagined that it would be such a big deal.\n\nWe started playing with it. When I was about 12, we started playing with it to see how it would turn out. I was 26, and I was playing it for the first time for the first time ever. It was fun. I remember thinking it was like a different game than I had ever played before. I remember thinking the first time we played would be like, \"Oh my god, I've never played a game like this before before.\"\n\nIt was surreal. I was in my 20s at the time. We got to have a party in my house at the time. I was sitting in the living room with my friend, who's 28. We're from Dallas, and his wife is a pretty big girl. He's about 6 feet tall and 250 pounds. On the phone with his friend said, \"Dad, is it possible you'll be able to do this without your beard?\" I was like, \"Absolutely, actually.\" I thought, \"I'm going to do it.\"\n\nI finally did it and it turned out pretty well. I was able to take our photo with our friend, and he got excited and started laughing. He was like, \"That's awesome.\" I sat in his living room for two hours and made sure he was really excited. He was really excited. We ended up having a workshop and we have a lot of stuff to do.\n\nHe just started playing. It's been amazing. I'm like, \"It's going to be huge.\" At first I was like, \"Wow, my god that's amazing.\" I was like, \"Wow, my God that's awesome.\" He's like, \"I'm so excited about this!\" He was like, \"Oh my god, I can't wait to do it!\"\n\nHe had that awesome physique. He was super skinny. He was like, \"I'm so excited about it.\" He was like, \"Really?\" I was like, \"Yeah, I'm so excited! I'm so excited.\" We did it for two weeks and it turned out pretty well.\n\nHe's like, \"I hope it stays that way.\" I was like, \"I hope it stays that way.\" He was like, \"Oh my god, I've never even played with a computer before!\" I was like, \"Yeah, it's just fun to play with a computer.\" He was like, \"Oh my god, I can't wait to play with a computer!\" He was like, \"It's just a cool thing to do!\"\n\nI was doing it with my friend's dog, a puppy.\n\nI was doing it with my friend's dog. People said, \"You think that's cool?\" I said, \"Yeah, that's cool.\" We had the dog. He was a little bit shy and it was a little bit intimidating and scary.\n\nWe played it twice. It was like a game. He was like, \"Oh my God I've never played with a computer before!\" I was like, \"I hope it stays that way.\" He was like, \"Yeah, it's just a cool thing to do!\" He was like, \"Oh my god, I can't wait to do it!\"\n\nWe played it again on the bus, on the weekend.\n\nWe played it again on the weekend.\n\nThen we went to the store and bought a new Canon 5D Mark II.\n\nI couldn't believe what the customer was saying. I was like, \"That sounds amazing!\" He was like, \"That's amazing!\"\n\nHe was like, \"Wow! That's awesome!\" So we were like, \"Wow! That looks awesome!\" He's like, \"Yeah, that looks awesome!\" I was like, \"Wow! That looks awesome! That looks awesome!\"\n\nWe played it twice again.\n\nI was like, \"Wow! That sounds awesome!\" He was like, \"Wow! That sounds awesome! That sounds awesome!\" I was like, \"Wow! That looks awesome!\"\n\nHe was like, \"Wow! That sounds awesome! That looks awesome!\"\n\nI was just like, \"Wow! That looks awesome! That looks awesome!\" He was like"
                     }
                 }
             }
diff --git a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource
index 7e026d1ad..6127a192c 100644
--- a/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource
+++ b/ods_ci/tests/Resources/Page/ODH/Monitoring/Monitoring.resource
@@ -176,9 +176,13 @@ Metrics Should Exist In UserWorkloadMonitoring
         Log    ${index}: ${metric_search_text}
         ${metrics_names}=    Get Thanos Metrics List    thanos_url=${thanos_url}    thanos_token=${thanos_token}
         ...    search_text=${metric_search_text}
-        Should Not Be Empty    ${metrics_names}
-        ${metrics_names}=    Split To Lines    ${metrics_names}
-        Append To List    ${metrics}    @{metrics_names}
+        ${found} =    Run Keyword And Return Status    Should Not Be Empty    ${metrics_names}
+        IF  ${found}
+            ${metrics_names}=    Split To Lines    ${metrics_names}
+            Append To List    ${metrics}    @{metrics_names}
+        ELSE
+            Run Keyword And Continue On Failure    Fail    msg=${metric_search_text} not found
+        END
     END
     RETURN    ${metrics}
 

From 6c0c3d9c999c36995281a8f0d6c61c350c047411 Mon Sep 17 00:00:00 2001
From: Luca Giorgi <lgiorgi@redhat.com>
Date: Thu, 16 May 2024 14:12:45 +0200
Subject: [PATCH 5/6] small cleanup

Signed-off-by: Luca Giorgi <lgiorgi@redhat.com>
---
 ods_ci/tests/Resources/Files/llm/vllm/query.json    | 13 -------------
 .../LLMs/vllm/426__model_serving_vllm_metrics.robot |  7 ++-----
 2 files changed, 2 insertions(+), 18 deletions(-)
 delete mode 100644 ods_ci/tests/Resources/Files/llm/vllm/query.json

diff --git a/ods_ci/tests/Resources/Files/llm/vllm/query.json b/ods_ci/tests/Resources/Files/llm/vllm/query.json
deleted file mode 100644
index 156795eda..000000000
--- a/ods_ci/tests/Resources/Files/llm/vllm/query.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-    "model": "gpt2",
-    "messages": [
-      {
-        "role": "system",
-        "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."
-      },
-      {
-        "role": "user",
-        "content": "Compose a poem that explains the concept of recursion in programming."
-      }
-    ]
-}
\ No newline at end of file
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
index 35b0cc421..755bccc55 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
@@ -19,9 +19,6 @@ ${VLLM_RESOURCES_DIRPATH}=    ods_ci/tests/Resources/Files/llm/vllm
 ${DL_POD_FILEPATH}=           ${VLLM_RESOURCES_DIRPATH}/download_model.yaml
 ${SR_FILEPATH}=               ${VLLM_RESOURCES_DIRPATH}/vllm_servingruntime.yaml
 ${IS_FILEPATH}=               ${VLLM_RESOURCES_DIRPATH}/vllm-gpt2_inferenceservice.yaml
-${INFERENCE_INPUT}=           @${VLLM_RESOURCES_DIRPATH}/query.json
-${INFERENCE_URL}=             http://localhost:8080/v1/chat/completions
-${METRICS_URL}=               http://localhost:8080/metrics/
 ${TEST_NS}=                   vllm-gpt2
 @{SEARCH_METRICS}=            vllm:cache_config_info
 ...                           vllm:num_requests_running
@@ -47,7 +44,7 @@ ${TEST_NS}=                   vllm-gpt2
 *** Test Cases ***
 Verify User Can Deploy A Model With Vllm Via CLI
     [Documentation]    Deploy a model (gpt2) using the vllm runtime and confirm that it's running
-    [Tags]    Tier1    Sanity    Resources-GPU    ODS-XXX
+    [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264
     ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${DL_POD_FILEPATH}
     Should Be Equal As Integers    ${rc}    ${0}
     Wait For Pods To Succeed    label_selector=gpt-download-pod=true    namespace=${TEST_NS}
@@ -62,7 +59,7 @@ Verify User Can Deploy A Model With Vllm Via CLI
 
 Verify Vllm Metrics Are Present
     [Documentation]    Confirm vLLM metrics are exposed in OpenShift metrics
-    [Tags]    Tier1    Sanity    Resources-GPU    ODS-XXX
+    [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264
     ${host} =    llm.Get KServe Inference Host Via CLI    isvc_name=vllm-gpt2-openai    namespace=${TEST_NS}
     ${rc}    ${out}=    Run And Return Rc And Output
     ...    curl -ks ${host}/metrics/

From 95093e418517b929ab03e2b904da0dfc34742898 Mon Sep 17 00:00:00 2001
From: Luca Giorgi <lgiorgi@redhat.com>
Date: Fri, 17 May 2024 11:58:38 +0200
Subject: [PATCH 6/6] Move keyword, some cleanup, comments

Signed-off-by: Luca Giorgi <lgiorgi@redhat.com>
---
 ods_ci/tests/Resources/OCP.resource           | 17 ++++++++++++++
 .../LLMs/422__model_serving_llm_models.robot  | 19 ++--------------
 .../426__model_serving_vllm_metrics.robot     | 22 +++++--------------
 3 files changed, 24 insertions(+), 34 deletions(-)

diff --git a/ods_ci/tests/Resources/OCP.resource b/ods_ci/tests/Resources/OCP.resource
index 8b04230b2..7658f4256 100644
--- a/ods_ci/tests/Resources/OCP.resource
+++ b/ods_ci/tests/Resources/OCP.resource
@@ -258,3 +258,20 @@ Check If Pod Does Not Exist
     ${rc}   ${output}=    Run And Return Rc And Output
     ...    oc get pod -l {label_selector} -n ${namespace}
     Should Be Equal    "${rc}"    "1"    msg=${output}
+
+Set Default Storage Class In GCP
+    [Documentation]    If the storage class exists we can assume we are in GCP. We force ssd-csi to be the default class
+    ...    for the duration of this test suite.
+    [Arguments]    ${default}
+    ${rc}=    Run And Return Rc    oc get storageclass ${default}
+    IF    ${rc} == ${0}
+        IF    "${default}" == "ssd-csi"
+            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
+            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
+        ELSE
+            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
+            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
+        END
+    ELSE
+        Log    Proceeding with default storage class because we're not in GCP
+    END
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot
index eee446ab7..493c1bcdb 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_models.robot
@@ -19,6 +19,8 @@ ${KSERVE_MODE}=    RawDeployment
 ${MODEL_FORMAT}=   pytorch       #vLLM
 ${PROTOCOL}=     grpc         #http
 ${OVERLAY}=    vllm
+
+
 *** Test Cases ***
 Verify User Can Serve And Query A bigscience/mt0-xxl Model
     [Documentation]    Basic tests for preparing, deploying and querying a LLM model
@@ -454,23 +456,6 @@ Suite Teardown
     Set Default Storage Class In GCP    default=standard-csi
     RHOSi Teardown
 
-Set Default Storage Class In GCP
-    [Documentation]    If the storage class exists we can assume we are in GCP. We force ssd-csi to be the default class
-    ...    for the duration of this test suite.
-    [Arguments]    ${default}
-    ${rc}=    Run And Return Rc    oc get storageclass ${default}
-    IF    ${rc} == ${0}
-        IF    "${default}" == "ssd-csi"
-            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
-            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
-        ELSE
-            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
-            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
-        END
-    ELSE
-        Log    Proceeding with default storage class because we're not in GCP
-    END
-
 Setup Test Variables
     [Arguments]    ${model_name}    ${kserve_mode}=Serverless    ${use_pvc}=${FALSE}    ${use_gpu}=${FALSE}
     ...    ${model_path}=${model_name}
diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
index 755bccc55..09afd76e3 100644
--- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
+++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/vllm/426__model_serving_vllm_metrics.robot
@@ -50,6 +50,10 @@ Verify User Can Deploy A Model With Vllm Via CLI
     Wait For Pods To Succeed    label_selector=gpt-download-pod=true    namespace=${TEST_NS}
     ${rc}    ${out}=    Run And Return Rc And Output    oc apply -f ${SR_FILEPATH}
     Should Be Equal As Integers    ${rc}    ${0}
+    #TODO: Switch to common keyword for model DL and SR deploy
+    #Set Project And Runtime    runtime=vllm     namespace=${TEST_NS}
+    #...    download_in_pvc=${DOWNLOAD_IN_PVC}    model_name=gpt2
+    #...    storage_size=10Gi
     Deploy Model Via CLI    ${IS_FILEPATH}    ${TEST_NS}
     Wait For Pods To Be Ready    label_selector=serving.kserve.io/inferenceservice=vllm-gpt2-openai
     ...    namespace=${TEST_NS}
@@ -60,6 +64,7 @@ Verify User Can Deploy A Model With Vllm Via CLI
 Verify Vllm Metrics Are Present
     [Documentation]    Confirm vLLM metrics are exposed in OpenShift metrics
     [Tags]    Tier1    Sanity    Resources-GPU    RHOAIENG-6264
+    ...       Depends On Test    Verify User Can Deploy A Model With Vllm Via CLI
     ${host} =    llm.Get KServe Inference Host Via CLI    isvc_name=vllm-gpt2-openai    namespace=${TEST_NS}
     ${rc}    ${out}=    Run And Return Rc And Output
     ...    curl -ks ${host}/metrics/
@@ -95,20 +100,3 @@ Suite Teardown
     ${rc}=    Run And Return Rc    oc delete namespace ${TEST_NS}
     Should Be Equal As Integers    ${rc}    ${0}
     RHOSi Teardown
-
-Set Default Storage Class In GCP
-    [Documentation]    If the storage class exists we can assume we are in GCP. We force ssd-csi to be the default class
-    ...    for the duration of this test suite.
-    [Arguments]    ${default}
-    ${rc}=    Run And Return Rc    oc get storageclass ${default}
-    IF    ${rc} == ${0}
-        IF    "${default}" == "ssd-csi"
-            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
-            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
-        ELSE
-            Run    oc patch storageclass ssd-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'  #robocop: disable
-            Run    oc patch storageclass standard-csi -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'  #robocop: disable
-        END
-    ELSE
-        Log    Proceeding with default storage class because we're not in GCP
-    END