From 88db0fbc6a68af28527a878c5110e2bfb86b84d8 Mon Sep 17 00:00:00 2001 From: Lucas Fernandez Date: Mon, 13 Jan 2025 19:46:21 +0100 Subject: [PATCH] Add annotation for kserve servereless (#3629) --- .../mockInferenceServiceK8sResource.ts | 22 ++++++---- .../modelServing/modelServingGlobal.cy.ts | 6 +-- .../modelServing/servingRuntimeList.cy.ts | 6 +-- .../k8s/__tests__/inferenceServices.spec.ts | 22 ++++++---- frontend/src/api/k8s/inferenceServices.ts | 43 +++++++++---------- frontend/src/k8sTypes.ts | 24 ++++++----- .../modelServing/screens/projects/utils.ts | 19 +++++++- frontend/src/pages/modelServing/utils.ts | 4 +- 8 files changed, 88 insertions(+), 58 deletions(-) diff --git a/frontend/src/__mocks__/mockInferenceServiceK8sResource.ts b/frontend/src/__mocks__/mockInferenceServiceK8sResource.ts index c4ca4b6bb4..b2aced0f67 100644 --- a/frontend/src/__mocks__/mockInferenceServiceK8sResource.ts +++ b/frontend/src/__mocks__/mockInferenceServiceK8sResource.ts @@ -1,5 +1,5 @@ import { K8sStatus } from '@openshift/dynamic-plugin-sdk-utils'; -import { InferenceServiceKind, KnownLabels } from '~/k8sTypes'; +import { DeploymentMode, InferenceServiceKind, KnownLabels } from '~/k8sTypes'; import { genUID } from '~/__mocks__/mockUtils'; import { ContainerResources } from '~/types'; @@ -27,6 +27,7 @@ type MockResourceConfigType = { additionalLabels?: Record; args?: string[]; env?: Array<{ name: string; value: string }>; + isKserveRaw?: boolean; }; type InferenceServicek8sError = K8sStatus & { @@ -90,19 +91,24 @@ export const mockInferenceServiceK8sResource = ({ additionalLabels = {}, args = [], env = [], + isKserveRaw = false, }: MockResourceConfigType): InferenceServiceKind => ({ apiVersion: 'serving.kserve.io/v1beta1', kind: 'InferenceService', metadata: { annotations: { 'openshift.io/display-name': displayName, - ...(isModelMesh - ? { 'serving.kserve.io/deploymentMode': 'ModelMesh' } - : { - 'serving.knative.openshift.io/enablePassthrough': 'true', - 'sidecar.istio.io/inject': 'true', - 'sidecar.istio.io/rewriteAppHTTPProbers': 'true', - }), + 'serving.kserve.io/deploymentMode': isModelMesh + ? DeploymentMode.ModelMesh + : isKserveRaw + ? DeploymentMode.RawDeployment + : DeploymentMode.Serverless, + ...(!isModelMesh && + !isKserveRaw && { + 'serving.knative.openshift.io/enablePassthrough': 'true', + 'sidecar.istio.io/inject': 'true', + 'sidecar.istio.io/rewriteAppHTTPProbers': 'true', + }), }, creationTimestamp: '2023-03-17T16:12:41Z', ...(deleted ? { deletionTimestamp: new Date().toUTCString() } : {}), diff --git a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelServingGlobal.cy.ts b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelServingGlobal.cy.ts index 26caee8dfe..4bb5ebf160 100644 --- a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelServingGlobal.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelServingGlobal.cy.ts @@ -24,7 +24,7 @@ import { ServingRuntimeModel, TemplateModel, } from '~/__tests__/cypress/cypress/utils/models'; -import type { InferenceServiceKind, ServingRuntimeKind } from '~/k8sTypes'; +import { DeploymentMode, type InferenceServiceKind, type ServingRuntimeKind } from '~/k8sTypes'; import { ServingRuntimePlatform } from '~/types'; import { be } from '~/__tests__/cypress/cypress/utils/should'; import { asClusterAdminUser } from '~/__tests__/cypress/cypress/utils/mockUsers'; @@ -408,7 +408,7 @@ describe('Model Serving Global', () => { labels: { 'opendatahub.io/dashboard': 'true' }, annotations: { 'openshift.io/display-name': 'Test Name', - 'serving.kserve.io/deploymentMode': 'ModelMesh', + 'serving.kserve.io/deploymentMode': DeploymentMode.ModelMesh, }, }, spec: { @@ -473,7 +473,7 @@ describe('Model Serving Global', () => { labels: { 'opendatahub.io/dashboard': 'true' }, annotations: { 'openshift.io/display-name': 'trigger-error', - 'serving.kserve.io/deploymentMode': 'ModelMesh', + 'serving.kserve.io/deploymentMode': DeploymentMode.ModelMesh, }, }, spec: { diff --git a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/servingRuntimeList.cy.ts b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/servingRuntimeList.cy.ts index 29ef31f40b..91c2fbe549 100644 --- a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/servingRuntimeList.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/servingRuntimeList.cy.ts @@ -34,7 +34,7 @@ import { } from '~/__tests__/cypress/cypress/pages/modelServing'; import { projectDetails } from '~/__tests__/cypress/cypress/pages/projects'; import { be } from '~/__tests__/cypress/cypress/utils/should'; -import type { InferenceServiceKind, ServingRuntimeKind } from '~/k8sTypes'; +import { DeploymentMode, type InferenceServiceKind, type ServingRuntimeKind } from '~/k8sTypes'; import { ServingRuntimePlatform } from '~/types'; import { deleteModal } from '~/__tests__/cypress/cypress/pages/components/DeleteModal'; import { StackCapability } from '~/concepts/areas/types'; @@ -486,7 +486,7 @@ describe('Serving Runtime List', () => { labels: { 'opendatahub.io/dashboard': 'true' }, annotations: { 'openshift.io/display-name': 'Test Name', - 'serving.kserve.io/deploymentMode': 'ModelMesh', + 'serving.kserve.io/deploymentMode': DeploymentMode.ModelMesh, }, }, spec: { @@ -1419,7 +1419,7 @@ describe('Serving Runtime List', () => { namespace: 'test-project', annotations: { 'openshift.io/display-name': 'Test Name', - 'serving.kserve.io/deploymentMode': 'RawDeployment', + 'serving.kserve.io/deploymentMode': DeploymentMode.RawDeployment, }, labels: { 'opendatahub.io/dashboard': 'true', diff --git a/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts b/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts index a87db96499..99f995e6a3 100644 --- a/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts +++ b/frontend/src/api/k8s/__tests__/inferenceServices.spec.ts @@ -24,7 +24,7 @@ import { updateInferenceService, } from '~/api/k8s/inferenceServices'; import { InferenceServiceModel, ProjectModel } from '~/api/models'; -import { InferenceServiceKind, ProjectKind } from '~/k8sTypes'; +import { DeploymentMode, InferenceServiceKind, ProjectKind } from '~/k8sTypes'; import { ModelServingSize } from '~/pages/modelServing/screens/types'; import { AcceleratorProfileFormData } from '~/utilities/useAcceleratorProfileFormState'; import { AcceleratorProfileState } from '~/utilities/useReadAcceleratorState'; @@ -51,7 +51,7 @@ describe('assembleInferenceService', () => { expect(inferenceService.metadata.annotations).toBeDefined(); expect(inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe( - undefined, + DeploymentMode.Serverless, ); expect(inferenceService.metadata.annotations?.['security.opendatahub.io/enable-auth']).toBe( undefined, @@ -72,7 +72,7 @@ describe('assembleInferenceService', () => { expect(inferenceService.metadata.annotations).toBeDefined(); expect(inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe( - undefined, + DeploymentMode.Serverless, ); expect(inferenceService.metadata.annotations?.['security.opendatahub.io/enable-auth']).toBe( 'true', @@ -96,7 +96,7 @@ describe('assembleInferenceService', () => { expect(inferenceService.metadata.annotations).toBeDefined(); expect(inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe( - 'ModelMesh', + DeploymentMode.ModelMesh, ); expect( inferenceService.metadata.annotations?.['serving.knative.openshift.io/enablePassthrough'], @@ -392,7 +392,7 @@ describe('assembleInferenceService', () => { ); const { annotations, labels } = inferenceService.metadata; - expect(annotations?.['serving.kserve.io/deploymentMode']).toBe('RawDeployment'); + expect(annotations?.['serving.kserve.io/deploymentMode']).toBe(DeploymentMode.RawDeployment); expect(annotations?.['serving.knative.openshift.io/enablePassthrough']).toBe(undefined); expect(annotations?.['sidecar.istio.io/inject']).toBe(undefined); @@ -407,7 +407,9 @@ describe('assembleInferenceService', () => { const ext = assembleInferenceService( mockInferenceServiceModalData({ isKServeRawDeployment: true, externalRoute: true }), ); - expect(ext.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe('RawDeployment'); + expect(ext.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe( + DeploymentMode.RawDeployment, + ); expect(ext.metadata.annotations?.['security.opendatahub.io/enable-auth']).toBe(undefined); expect(ext.metadata.labels?.['security.opendatahub.io/enable-auth']).toBe(undefined); expect(ext.metadata.labels?.['networking.kserve.io/visibility']).toBe('exposed'); @@ -416,7 +418,9 @@ describe('assembleInferenceService', () => { const auth = assembleInferenceService( mockInferenceServiceModalData({ isKServeRawDeployment: true, tokenAuth: true }), ); - expect(auth.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe('RawDeployment'); + expect(auth.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe( + DeploymentMode.RawDeployment, + ); expect(auth.metadata.annotations?.['security.opendatahub.io/enable-auth']).toBe(undefined); expect(auth.metadata.labels?.['security.opendatahub.io/enable-auth']).toBe('true'); expect(auth.metadata.labels?.['networking.kserve.io/visibility']).toBe(undefined); @@ -429,7 +433,9 @@ describe('assembleInferenceService', () => { tokenAuth: true, }), ); - expect(both.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe('RawDeployment'); + expect(both.metadata.annotations?.['serving.kserve.io/deploymentMode']).toBe( + DeploymentMode.RawDeployment, + ); expect(both.metadata.annotations?.['security.opendatahub.io/enable-auth']).toBe(undefined); expect(both.metadata.labels?.['security.opendatahub.io/enable-auth']).toBe('true'); expect(both.metadata.labels?.['networking.kserve.io/visibility']).toBe('exposed'); diff --git a/frontend/src/api/k8s/inferenceServices.ts b/frontend/src/api/k8s/inferenceServices.ts index a1f1d5340c..75b39383b7 100644 --- a/frontend/src/api/k8s/inferenceServices.ts +++ b/frontend/src/api/k8s/inferenceServices.ts @@ -14,6 +14,7 @@ import { applyK8sAPIOptions } from '~/api/apiMergeUtils'; import { ContainerResources } from '~/types'; import { AcceleratorProfileFormData } from '~/utilities/useAcceleratorProfileFormState'; import { AcceleratorProfileState } from '~/utilities/useReadAcceleratorState'; +import { getInferenceServiceDeploymentMode } from '~/pages/modelServing/screens/projects/utils'; import { getModelServingProjects } from './projects'; import { assemblePodSpecOptions, parseCommandLine } from './utils'; @@ -111,17 +112,16 @@ export const assembleInferenceService = ( ...inferenceService.metadata, annotations: { 'openshift.io/display-name': data.name.trim(), - ...(isModelMesh - ? { 'serving.kserve.io/deploymentMode': 'ModelMesh' } - : data.isKServeRawDeployment - ? { - 'serving.kserve.io/deploymentMode': 'RawDeployment', - } - : { - 'serving.knative.openshift.io/enablePassthrough': 'true', - 'sidecar.istio.io/inject': 'true', - 'sidecar.istio.io/rewriteAppHTTPProbers': 'true', - }), + 'serving.kserve.io/deploymentMode': getInferenceServiceDeploymentMode( + !!isModelMesh, + !!data.isKServeRawDeployment, + ), + ...(!isModelMesh && + !data.isKServeRawDeployment && { + 'serving.knative.openshift.io/enablePassthrough': 'true', + 'sidecar.istio.io/inject': 'true', + 'sidecar.istio.io/rewriteAppHTTPProbers': 'true', + }), }, labels: { ...inferenceService.metadata.labels, @@ -159,17 +159,16 @@ export const assembleInferenceService = ( namespace: project, annotations: { 'openshift.io/display-name': data.name.trim(), - ...(isModelMesh - ? { 'serving.kserve.io/deploymentMode': 'ModelMesh' } - : data.isKServeRawDeployment - ? { - 'serving.kserve.io/deploymentMode': 'RawDeployment', - } - : { - 'serving.knative.openshift.io/enablePassthrough': 'true', - 'sidecar.istio.io/inject': 'true', - 'sidecar.istio.io/rewriteAppHTTPProbers': 'true', - }), + 'serving.kserve.io/deploymentMode': getInferenceServiceDeploymentMode( + !!isModelMesh, + !!data.isKServeRawDeployment, + ), + ...(!isModelMesh && + !data.isKServeRawDeployment && { + 'serving.knative.openshift.io/enablePassthrough': 'true', + 'sidecar.istio.io/inject': 'true', + 'sidecar.istio.io/rewriteAppHTTPProbers': 'true', + }), }, labels: { [KnownLabels.DASHBOARD_RESOURCE]: 'true', diff --git a/frontend/src/k8sTypes.ts b/frontend/src/k8sTypes.ts index af7f66f92b..eed5fb6c60 100644 --- a/frontend/src/k8sTypes.ts +++ b/frontend/src/k8sTypes.ts @@ -1,5 +1,5 @@ import { K8sResourceCommon, MatchExpression } from '@openshift/dynamic-plugin-sdk-utils'; -import { EitherNotBoth, EitherOrNone } from '@openshift/dynamic-plugin-sdk'; +import { EitherNotBoth } from '@openshift/dynamic-plugin-sdk'; import { AwsKeys } from '~/pages/projects/dataConnections/const'; import { StackComponent } from '~/concepts/areas/types'; import { @@ -453,6 +453,12 @@ export type SupportedModelFormats = { autoSelect?: boolean; }; +export enum DeploymentMode { + ModelMesh = 'ModelMesh', + RawDeployment = 'RawDeployment', + Serverless = 'Serverless', +} + export type InferenceServiceAnnotations = Partial<{ 'security.opendatahub.io/enable-auth': string; }>; @@ -469,16 +475,12 @@ export type InferenceServiceKind = K8sResourceCommon & { namespace: string; annotations?: InferenceServiceAnnotations & DisplayNameAnnotations & - EitherOrNone< - { - 'serving.kserve.io/deploymentMode': 'ModelMesh' | 'RawDeployment'; - }, - { - 'serving.knative.openshift.io/enablePassthrough': 'true'; - 'sidecar.istio.io/inject': 'true'; - 'sidecar.istio.io/rewriteAppHTTPProbers': 'true'; - } - >; + Partial<{ + 'serving.kserve.io/deploymentMode': DeploymentMode; + 'serving.knative.openshift.io/enablePassthrough': 'true'; + 'sidecar.istio.io/inject': 'true'; + 'sidecar.istio.io/rewriteAppHTTPProbers': 'true'; + }>; labels?: InferenceServiceLabels; }; spec: { diff --git a/frontend/src/pages/modelServing/screens/projects/utils.ts b/frontend/src/pages/modelServing/screens/projects/utils.ts index 510c4f977a..b783708645 100644 --- a/frontend/src/pages/modelServing/screens/projects/utils.ts +++ b/frontend/src/pages/modelServing/screens/projects/utils.ts @@ -2,6 +2,7 @@ import * as React from 'react'; import { ConfigMapKind, DashboardConfigKind, + DeploymentMode, InferenceServiceKind, KnownLabels, PersistentVolumeClaimKind, @@ -66,8 +67,9 @@ export const isServingRuntimeTokenEnabled = (servingRuntime: ServingRuntimeKind) export const isServingRuntimeRouteEnabled = (servingRuntime: ServingRuntimeKind): boolean => servingRuntime.metadata.annotations?.['enable-route'] === 'true'; -const isInferenceServiceKServeRaw = (inferenceService: InferenceServiceKind): boolean => - inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode'] === 'RawDeployment'; +export const isInferenceServiceKServeRaw = (inferenceService: InferenceServiceKind): boolean => + inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode'] === + DeploymentMode.RawDeployment; export const isInferenceServiceTokenEnabled = (inferenceService: InferenceServiceKind): boolean => isInferenceServiceKServeRaw(inferenceService) @@ -82,6 +84,19 @@ export const isInferenceServiceRouteEnabled = (inferenceService: InferenceServic export const isGpuDisabled = (servingRuntime: ServingRuntimeKind): boolean => servingRuntime.metadata.annotations?.['opendatahub.io/disable-gpu'] === 'true'; +export const getInferenceServiceDeploymentMode = ( + modelMesh: boolean, + kserveRaw: boolean, +): DeploymentMode => { + if (modelMesh) { + return DeploymentMode.ModelMesh; + } + if (kserveRaw) { + return DeploymentMode.RawDeployment; + } + return DeploymentMode.Serverless; +}; + export const getInferenceServiceFromServingRuntime = ( inferenceServices: InferenceServiceKind[], servingRuntime: ServingRuntimeKind, diff --git a/frontend/src/pages/modelServing/utils.ts b/frontend/src/pages/modelServing/utils.ts index e2906df4b5..17ac664a08 100644 --- a/frontend/src/pages/modelServing/utils.ts +++ b/frontend/src/pages/modelServing/utils.ts @@ -31,6 +31,7 @@ import { ServiceAccountKind, RoleKind, ServingContainer, + DeploymentMode, } from '~/k8sTypes'; import { ContainerResources } from '~/types'; import { getDisplayNameFromK8sResource, translateDisplayNameForK8s } from '~/concepts/k8s/utils'; @@ -370,4 +371,5 @@ export const isModelServerEditInfoChanged = ( : true; export const isModelMesh = (inferenceService: InferenceServiceKind): boolean => - inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode'] === 'ModelMesh'; + inferenceService.metadata.annotations?.['serving.kserve.io/deploymentMode'] === + DeploymentMode.ModelMesh;