diff --git a/backend/src/utils/constants.ts b/backend/src/utils/constants.ts index 4307ecb085..1f87eaae4b 100644 --- a/backend/src/utils/constants.ts +++ b/backend/src/utils/constants.ts @@ -70,7 +70,7 @@ export const blankDashboardCR: DashboardConfig = { disableServingRuntimeParams: false, disableConnectionTypes: false, disableStorageClasses: false, - disableNIMModelServing: false, + disableNIMModelServing: true, }, notebookController: { enabled: true, diff --git a/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts b/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts index 06a64628fb..b8a9b99c17 100644 --- a/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts +++ b/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts @@ -284,6 +284,96 @@ export const MOCK_NIM_METRICS_CONFIG_MISSING_QUERY = `{ ] }`; +export const MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_2 = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Requests outcomes", + "type": "REQUEST_OUTCOMES", + "queries": [ + { + "title": "Number of failed incoming requests", + "query": "round(sum(increase(request_failure_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[5m])))" + } + ] + }, + { + "title": "Current running, waiting, and max requests count", + "type": "CURRENT_REQUESTS", + "queries": [ + { + "title": "Requests running", + "query": "num_requests_running{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + } + ] + }, + { + "title": "Tokens count", + "type": "TOKENS_COUNT", + "queries": [ + { + "title": "Total generation token", + "query": "round(rate(generation_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + } + ] + } + ] +}`; + +export const MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_3 = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Requests outcomes", + "type": "REQUEST_OUTCOMES", + "queries": [ + { + "title": "Number of successful incoming requests", + "query": "round(sum(increase(request_success_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[5m])))" + } + ] + }, + { + "title": "Current running, waiting, and max requests count", + "type": "CURRENT_REQUESTS", + "queries": [ + { + "title": "Max requests", + "query": "num_request_max{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + } + ] + }, + { + "title": "Tokens count", + "type": "TOKENS_COUNT", + "queries": [ + { + "title": "Total prompts token", + "query": "round(rate(prompt_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + } + ] + } + ] +}`; + export const mockKserveMetricsConfigMap = ({ namespace = 'test-project', modelName = 'test-inference-service', diff --git a/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts b/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts index c566def439..9e0a54c124 100644 --- a/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts +++ b/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts @@ -48,7 +48,7 @@ class ModelMetricsPerformance extends ModelMetricsGlobal { class ModelMetricsNim extends ModelMetricsGlobal { visit(project: string, model: string) { - cy.visitWithLogin(`/modelServing/${project}/metrics/${model}/nim`); + cy.visitWithLogin(`/modelServing/${project}/metrics/${model}/performance`); this.wait(); } diff --git a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts index a104f40cd3..7f640e6aeb 100644 --- a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts @@ -51,6 +51,8 @@ import { MOCK_KSERVE_METRICS_CONFIG_MISSING_QUERY, MOCK_NIM_METRICS_CONFIG_3, MOCK_NIM_METRICS_CONFIG_MISSING_QUERY, + MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_2, + MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_3, mockKserveMetricsConfigMap, mockNimMetricsConfigMap, } from '~/__mocks__/mockKserveMetricsConfigMap'; @@ -808,7 +810,7 @@ describe('KServe NIM metrics', () => { { statusCode: 404, body: mock404Error({}) }, ); - modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.visit('tomer-test-2', 'nim-deploy'); modelMetricsKserveNim.findUnknownErrorCard().should('be.visible'); }); @@ -914,6 +916,58 @@ describe('KServe NIM metrics', () => { modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveNoData(); }); + it('charts should not error out if a query is missing and there is other data', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_2 }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 4); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Requests outcomes').shouldHaveData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveData(); + }); + + it('charts should not error out if a query is missing and there is other data', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_3 }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 4); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Requests outcomes').shouldHaveData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveData(); + }); + it('charts should show data when serving data is available', () => { initIntercepts({ disableTrustyBiasMetrics: false, diff --git a/frontend/src/api/prometheus/kservePerformanceMetrics.ts b/frontend/src/api/prometheus/kservePerformanceMetrics.ts index 014b2bce77..39480ea3e8 100644 --- a/frontend/src/api/prometheus/kservePerformanceMetrics.ts +++ b/frontend/src/api/prometheus/kservePerformanceMetrics.ts @@ -1,6 +1,8 @@ import React from 'react'; -import { KserveMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; -import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { + KserveMetricGraphDefinition, + NimMetricGraphDefinition, +} from '~/concepts/metrics/kserve/types'; import { defaultResponsePredicate } from '~/api/prometheus/usePrometheusQueryRange'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; import { TimeframeTitle } from '~/concepts/metrics/types'; diff --git a/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx b/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx index d3cce8472d..65d87decbb 100644 --- a/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx @@ -8,15 +8,15 @@ import useDoesTrustyAICRExist from '~/concepts/trustyai/context/useDoesTrustyAIC import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; import { InferenceServiceKind } from '~/k8sTypes'; import { TrustyInstallState } from '~/concepts/trustyai/types'; +import './MetricsPageTabs.scss'; +import useServingPlatformStatuses from '~/pages/modelServing/useServingPlatformStatuses'; +import { byName, ProjectsContext } from '~/concepts/projects/ProjectsContext'; +import { isProjectNIMSupported } from '~/pages/modelServing/screens/projects/nimUtils'; +import useMetricsPageEnabledTabs from './useMetricsPageEnabledTabs'; +import BiasConfigurationAlertPopover from './bias/BiasConfigurationPage/BiasConfigurationAlertPopover'; import PerformanceTab from './performance/PerformanceTab'; import BiasTab from './bias/BiasTab'; -import BiasConfigurationAlertPopover from './bias/BiasConfigurationPage/BiasConfigurationAlertPopover'; -import useMetricsPageEnabledTabs from './useMetricsPageEnabledTabs'; import NIMTab from './nim/NimTab'; -import './MetricsPageTabs.scss'; -import useServingPlatformStatuses from '../../useServingPlatformStatuses'; -import { byName, ProjectsContext } from '~/concepts/projects/ProjectsContext'; -import { isProjectNIMSupported } from '../projects/nimUtils'; type MetricsPageTabsProps = { model: InferenceServiceKind; @@ -26,7 +26,7 @@ const MetricsPageTabs: React.FC = ({ model }) => { const servingPlatformStatuses = useServingPlatformStatuses(); const isNIMAvailable = servingPlatformStatuses.kServeNIM.enabled; const { projects } = React.useContext(ProjectsContext); - const project = projects.find(byName(model?.metadata.namespace)) ?? null; + const project = projects.find(byName(model.metadata.namespace)) ?? null; const enabledTabs = useMetricsPageEnabledTabs(); const isKServeNIMEnabled = project ? isProjectNIMSupported(project) : false; const isNimEnabled = isNIMAvailable && isKServeNIMEnabled; diff --git a/frontend/src/pages/modelServing/screens/metrics/nim/ModelGraphs.tsx b/frontend/src/pages/modelServing/screens/metrics/nim/ModelGraphs.tsx deleted file mode 100644 index de3f12a082..0000000000 --- a/frontend/src/pages/modelServing/screens/metrics/nim/ModelGraphs.tsx +++ /dev/null @@ -1,14 +0,0 @@ -import * as React from 'react'; -import { InferenceServiceKind } from '~/k8sTypes'; -import { isModelMesh } from '~/pages/modelServing/utils'; -import ModelMeshMetrics from '~/pages/modelServing/screens/metrics/performance/ModelMeshMetrics'; -import NimMetrics from '~/pages/modelServing/screens/metrics/nim/NimMetrics'; - -type ModelGraphProps = { - model: InferenceServiceKind; -}; - -const ModelGraphs: React.FC = ({ model }) => - isModelMesh(model) ? : ; - -export default ModelGraphs; diff --git a/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx b/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx index a3f49fc77f..49b7f8ba6e 100644 --- a/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx @@ -5,7 +5,7 @@ import { InferenceServiceKind } from '~/k8sTypes'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; import { isModelMesh } from '~/pages/modelServing/utils'; import MetricsPageToolbar from '~/concepts/metrics/MetricsPageToolbar'; -import ModelGraphs from '~/pages/modelServing/screens/metrics/nim/ModelGraphs'; +import NimMetrics from './NimMetrics'; type NIMTabProps = { model: InferenceServiceKind; @@ -37,7 +37,7 @@ const NIMTab: React.FC = ({ model }) => { - + ); diff --git a/frontend/src/pages/modelServing/useModelMetricsEnabled.ts b/frontend/src/pages/modelServing/useModelMetricsEnabled.ts index e2d3257ee4..5834e92119 100644 --- a/frontend/src/pages/modelServing/useModelMetricsEnabled.ts +++ b/frontend/src/pages/modelServing/useModelMetricsEnabled.ts @@ -6,7 +6,7 @@ const useModelMetricsEnabled = (): [modelMetricsEnabled: boolean] => { ).status; const biasMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.BIAS_METRICS).status; - const nimMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.BIAS_METRICS).status; + const nimMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; const checkModelMetricsEnabled = () => performanceMetricsAreaAvailable || biasMetricsAreaAvailable || nimMetricsAreaAvailable; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx index 5846fcd9db..97fc8692e6 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx @@ -23,8 +23,6 @@ import InferenceServiceEndpoint from '~/pages/modelServing/screens/global/Infere import TypeBorderedCard from '~/concepts/design/TypeBorderedCard'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas/'; import { getDisplayNameFromK8sResource } from '~/concepts/k8s/utils'; -import { ProjectDetailsContext } from '~/pages/projects/ProjectDetailsContext'; -import { isProjectNIMSupported } from '~/pages/modelServing/screens/projects/nimUtils'; interface DeployedModelCardProps { inferenceService: InferenceServiceKind; @@ -38,9 +36,6 @@ const DeployedModelCard: React.FC = ({ const navigate = useNavigate(); const kserveMetricsEnabled = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; const modelMesh = isModelMesh(inferenceService); - const { currentProject } = React.useContext(ProjectDetailsContext); - const isKServeNIMEnabled = isProjectNIMSupported(currentProject); - const modelMetricsSupported = modelMetricsEnabled && (modelMesh || kserveMetricsEnabled); const inferenceServiceDisplayName = getDisplayNameFromK8sResource(inferenceService);