diff --git a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource index 73d7c334f..2b502357d 100644 --- a/ods_ci/tests/Resources/CLI/ModelServing/llm.resource +++ b/ods_ci/tests/Resources/CLI/ModelServing/llm.resource @@ -13,8 +13,10 @@ ${INFERENCESERVICE_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/serving_runtimes/base/ ${INFERENCESERVICE_FILLED_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/serving_runtimes/isvc_filled.yaml ${DEFAULT_BUCKET_SECRET_NAME}= models-bucket-secret ${DEFAULT_BUCKET_SA_NAME}= models-bucket-sa +${DEFAULT_BUCKET_PREFIX}= models-bucket ${BUCKET_SECRET_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/bucket_secret.yaml ${BUCKET_SA_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/bucket_sa.yaml +${ROLE_BINDING_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/rolebinding_view.yaml ${USE_BUCKET_HTTPS}= "1" ${MODELS_BUCKET}= ${S3.BUCKET_3} ${SERVICEMESH_CR_NS}= istio-system @@ -315,6 +317,7 @@ Query Model Multiple Times ... ${port}=443 ... ${body_params}=&{EMPTY} ... ${cert}=${False} + ... ${token}=${None} ... &{args} IF "${inference_type}" == "streaming" ${streamed_response}= Set Variable ${TRUE} @@ -339,6 +342,9 @@ Query Model Multiple Times ${body} ${header} ${extra_args}= llm.Prepare Payload runtime=${runtime} protocol=${protocol} ... inference_type=${inference_type} model_name=${model_name} body_params=${body_params} ... query_text=${EXP_RESPONSES}[queries][${query_idx}][query_text] + IF "${token}" != "${None}" + ${header}= Set Variable "Authorization: Bearer ${token}" -H ${header} + END ${runtime_details}= Set Variable ${RUNTIME_FORMATS}[${runtime}][endpoints][${inference_type}][${protocol}] ${endpoint}= Set Variable ${runtime_details}[endpoint] Set To Dictionary ${args} &{extra_args} @@ -775,6 +781,23 @@ Start Port-forwarding Process Should Be Running ${process} sleep 7s +Create Role Binding For Authorino + [Arguments] ${name} ${namespace} + Set Test Variable ${name} + Set Test Variable ${namespace} + Set Test Variable ${nameview} ${name}-view + Set Test Variable ${namesa} ${name}-sa + Create File From Template ${ROLE_BINDING_FILEPATH} ${LLM_RESOURCES_DIRPATH}/rb.filled.yaml + ${rc} ${out}= Run And Return Rc And Output + ... oc apply -f ${LLM_RESOURCES_DIRPATH}/rb.filled.yaml + Should Be Equal As Integers ${rc} ${0} + +Create Inference Access Token + [Arguments] ${test_namespace} ${bucket_sa_name} + ${rc} ${out}= Run And Return Rc And Output oc create token -n ${test_namespace} ${bucket_sa_name} + Should Be Equal As Strings ${rc} 0 + [Return] ${out} + Wait For Model KServe Deployment To Be Ready [Documentation] Waits for the Pod to be Ready (i.e., Running status) and checks that ... the deployment has the expected pods and containers diff --git a/ods_ci/tests/Resources/Files/llm/rolebinding_view.yaml b/ods_ci/tests/Resources/Files/llm/rolebinding_view.yaml new file mode 100644 index 000000000..9e8b2ee45 --- /dev/null +++ b/ods_ci/tests/Resources/Files/llm/rolebinding_view.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: ${NAMEVIEW} + namespace: ${NAMESPACE} +subjects: + - kind: ServiceAccount + name: ${NAMESA} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: view diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/overlay/authorino/kustomization.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/overlay/authorino/kustomization.yaml new file mode 100644 index 000000000..33dd636cc --- /dev/null +++ b/ods_ci/tests/Resources/Files/llm/serving_runtimes/overlay/authorino/kustomization.yaml @@ -0,0 +1,10 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +metadata: + name: authorino +resources: + - ../../base/ + +commonAnnotations: + security.opendatahub.io/enable-auth: "true" + diff --git a/ods_ci/tests/Resources/RHOSi.resource b/ods_ci/tests/Resources/RHOSi.resource index 957ce6857..56b61ae9a 100644 --- a/ods_ci/tests/Resources/RHOSi.resource +++ b/ods_ci/tests/Resources/RHOSi.resource @@ -40,6 +40,7 @@ Resource Common.robot ... Set Thanos Credentials Variables ... Generate Minio Random Credentials ... Get Minio Credentials +... Create Inference Access Token *** Keywords *** diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot index e76872568..0afce48e1 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm.robot @@ -552,6 +552,36 @@ Verify User Can Query A Model Using HTTP Calls [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${models_names} wait_prj_deletion=${FALSE} +Verify User Can Serve And Query A Model With Token + [Documentation] Basic tests for preparing, deploying and querying a LLM model + ... With Token using Kserve and Caikit+TGIS runtime + [Tags] RHOAIENG-6333 + ... Tier1 + [Setup] Set Project And Runtime namespace=${TEST_NS}-cli + ${test_namespace}= Set Variable ${TEST_NS}-cli + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + ${overlays}= Create List authorino + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... overlays=${overlays} + + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Create Role Binding For Authorino name=${DEFAULT_BUCKET_PREFIX} namespace=${test_namespace} + ${inf_token} Create Inference Access Token ${test_namespace} ${DEFAULT_BUCKET_SA_NAME} + Query Model Multiple Times model_name=${flan_model_name} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} token=${inf_token} + Query Model Multiple Times model_name=${flan_model_name} + ... inference_type=streaming n_times=1 + ... namespace=${test_namespace} token=${inf_token} + + [Teardown] Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} *** Keywords *** Install Model Serving Stack Dependencies diff --git a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot index 6f081b005..03e9e45b3 100644 --- a/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot +++ b/ods_ci/tests/Tests/400__ods_dashboard/420__model_serving/LLMs/422__model_serving_llm_tgis.robot @@ -626,6 +626,53 @@ Verify User Can Query A Model Using HTTP Calls [Teardown] Clean Up Test Project test_ns=${test_namespace} ... isvc_names=${models_names} wait_prj_deletion=${FALSE} +Verify User Can Serve And Query A Model With Token + [Documentation] Basic tests for preparing, deploying and querying a LLM model + ... With Token using Kserve and Caikit+TGIS runtime + [Tags] RHOAIENG-6306 + ... Tier1 + [Setup] Set Project And Runtime runtime=${TGIS_RUNTIME_NAME} namespace=${TEST_NS}-cli + ${test_namespace}= Set Variable ${TEST_NS}-cli + ${flan_model_name}= Set Variable flan-t5-small-caikit + ${models_names}= Create List ${flan_model_name} + ${overlays}= Create List authorino + + Compile Inference Service YAML isvc_name=${flan_model_name} + ... sa_name=${DEFAULT_BUCKET_SA_NAME} + ... model_storage_uri=${FLAN_STORAGE_URI} + ... model_format=pytorch serving_runtime=${TGIS_RUNTIME_NAME} + ... limits_dict=${GPU_LIMITS} + ... overlays=${overlays} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + ... namespace=${test_namespace} + Create Role Binding For Authorino name=${DEFAULT_BUCKET_PREFIX} namespace=tgis-standalone-cli + ${inf_token} Create Inference Access Token ${test_namespace} ${DEFAULT_BUCKET_SA_NAME} + ${pod_name}= Get Pod Name namespace=${test_namespace} label_selector=serving.kserve.io/inferenceservice=${flan_model_name} + IF ${IS_KSERVE_RAW} Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=all-tokens n_times=1 + ... namespace=${test_namespace} port_forwarding=${IS_KSERVE_RAW} token=${inf_token} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=tokenize n_times=1 port_forwarding=${IS_KSERVE_RAW} + ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... token=${inf_token} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=model-info n_times=1 port_forwarding=${IS_KSERVE_RAW} + ... namespace=${test_namespace} validate_response=${TRUE} string_check_only=${TRUE} + ... token=${inf_token} + Query Model Multiple Times model_name=${flan_model_name} runtime=${TGIS_RUNTIME_NAME} + ... inference_type=streaming n_times=1 port_forwarding=${IS_KSERVE_RAW} + ... namespace=${test_namespace} validate_response=${FALSE} + ... token=${inf_token} + + [Teardown] Run Keywords + ... Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} + ... AND + ... Run Keyword If ${IS_KSERVE_RAW} Terminate Process llm-query-process kill=true + *** Keywords *** Suite Setup