diff --git a/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve_api.robot b/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve_api.robot index 7319280db..263edf3d3 100644 --- a/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve_api.robot +++ b/ods_ci/tests/Tests/1000__model_serving/1009__model_serving_triton_on_kserve/1009__model_serving_triton_on_kserve_api.robot @@ -38,6 +38,9 @@ ${INFERENCESERVICE_FILEPATH}= ${LLM_RESOURCES_DIRPATH}/serving_runtimes/base/ ${INFERENCESERVICE_FILEPATH_NEW}= ${LLM_RESOURCES_DIRPATH}/serving_runtimes/isvc ${INFERENCESERVICE_FILLED_FILEPATH}= ${INFERENCESERVICE_FILEPATH_NEW}/isvc_filled.yaml ${KSERVE_RUNTIME_REST_NAME}= triton-kserve-runtime +${PYTORCH_MODEL_NAME}= resnet50 +${INFERENCE_REST_INPUT_PYTORCH}= @tests/Resources/Files/triton/kserve-triton-resnet-rest-input.json +${EXPECTED_INFERENCE_REST_OUTPUT_FILE__PYTORCH}= tests/Resources/Files/triton/kserve-triton-resnet-rest-output.json ${PATTERN}= https:\/\/([^\/:]+) ${PROTOBUFF_FILE}= tests/Resources/Files/triton/grpc_predict_v2.proto @@ -82,6 +85,47 @@ Test Python Model Rest Inference Via API (Triton on Kserve) # robocop: off=to ... AND ... Run Keyword If "${KSERVE_MODE}"=="RawDeployment" Terminate Process triton-process kill=true +Test Pytorch Model Rest Inference Via API (Triton on Kserve) # robocop: off=too-long-test-case + [Documentation] Test the deployment of python model in Kserve using Triton + [Tags] Tier2 RHOAIENG-16909 + Setup Test Variables model_name=${PYTORCH_MODEL_NAME} use_pvc=${FALSE} use_gpu=${FALSE} + ... kserve_mode=${KSERVE_MODE} model_path=triton/model_repository/ + Set Project And Runtime runtime=${KSERVE_RUNTIME_REST_NAME} protocol=${PROTOCOL} namespace=${test_namespace} + ... download_in_pvc=${DOWNLOAD_IN_PVC} model_name=${PYTORCH_MODEL_NAME} + ... storage_size=100Mi memory_request=100Mi + ${requests}= Create Dictionary memory=1Gi + Compile Inference Service YAML isvc_name=${PYTORCH_MODEL_NAME} + ... sa_name=models-bucket-sa + ... model_storage_uri=${storage_uri} + ... model_format=python serving_runtime=${KSERVE_RUNTIME_REST_NAME} + ... version="1" + ... limits_dict=${limits} requests_dict=${requests} kserve_mode=${KSERVE_MODE} + Deploy Model Via CLI isvc_filepath=${INFERENCESERVICE_FILLED_FILEPATH} + ... namespace=${test_namespace} + # File is not needed anymore after applying + Remove File ${INFERENCESERVICE_FILLED_FILEPATH} + Wait For Pods To Be Ready label_selector=serving.kserve.io/inferenceservice=${PYTORCH_MODEL_NAME} + ... namespace=${test_namespace} + ${pod_name}= Get Pod Name namespace=${test_namespace} + ... label_selector=serving.kserve.io/inferenceservice=${PYTORCH_MODEL_NAME} + ${service_port}= Extract Service Port service_name=${PYTORCH_MODEL_NAME}-predictor protocol=TCP + ... namespace=${test_namespace} + IF "${KSERVE_MODE}"=="RawDeployment" + Start Port-forwarding namespace=${test_namespace} pod_name=${pod_name} local_port=${service_port} + ... remote_port=${service_port} process_alias=triton-process + END + ${EXPECTED_INFERENCE_REST_OUTPUT_PYTORCH}= Load Json File + ... file_path=${EXPECTED_INFERENCE_REST_OUTPUT_FILE_PYTORCH} as_string=${TRUE} + Verify Model Inference With Retries model_name=${PYTORCH_MODEL_NAME} inference_input=${INFERENCE_REST_INPUT_PYTORCH} + ... expected_inference_output=${EXPECTED_INFERENCE_REST_OUTPUT_PYTORCH} project_title=${test_namespace} + ... deployment_mode=Cli kserve_mode=${KSERVE_MODE} service_port=${service_port} + ... end_point=/v2/models/${model_name}/infer retries=3 + [Teardown] Run Keywords + ... Clean Up Test Project test_ns=${test_namespace} + ... isvc_names=${models_names} wait_prj_deletion=${FALSE} kserve_mode=${KSERVE_MODE} + ... AND + ... Run Keyword If "${KSERVE_MODE}"=="RawDeployment" Terminate Process triton-process kill=true + Test Python Model Grpc Inference Via API (Triton on Kserve) # robocop: off=too-long-test-case [Documentation] Test the deployment of python model in Kserve using Triton [Tags] Tier2 RHOAIENG-16912 @@ -166,6 +210,7 @@ Test Onnx Model Rest Inference Via API (Triton on Kserve) # robocop: off=too- ... AND ... Run Keyword If "${KSERVE_MODE}"=="RawDeployment" Terminate Process triton-process kill=true + *** Keywords *** Suite Setup [Documentation] Suite setup keyword