From 859f097e6354ded2b85b5ee4f259eb15ec79b372 Mon Sep 17 00:00:00 2001 From: LinoyBitan1 Date: Wed, 15 Jan 2025 15:04:14 +0200 Subject: [PATCH] fixed all issues but testing --- .../__mocks__/mockKserveMetricsConfigMap.ts | 243 +++++++++++++++ .../cypress/cypress/pages/modelMetrics.ts | 34 ++ .../mocked/modelServing/modelMetrics.cy.ts | 238 ++++++++++++++ .../prometheus/kservePerformanceMetrics.ts | 292 +++++++++++++++++- .../metrics/kserve/NimMetricsContext.tsx | 119 +++++++ .../metrics/kserve/__tests__/utils.spec.ts | 67 +++- frontend/src/concepts/metrics/kserve/const.ts | 9 + .../content/NIMCurrentRequestsGraph.tsx | 70 +++++ .../kserve/content/NIMKVCacheUsageGraph.tsx | 39 +++ .../content/NIMRequestsOutcomesGraph.tsx | 53 ++++ .../content/NIMTimeForFirstTokenGraphs.tsx | 40 +++ .../content/NIMTimePerOutputTokenGraph.tsx | 39 +++ .../kserve/content/NIMTokensCountGraph.tsx | 59 ++++ .../kserve/content/NimMetricsContent.tsx | 19 ++ .../kserve/content/NimPerformanceGraphs.tsx | 115 +++++++ frontend/src/concepts/metrics/kserve/types.ts | 36 ++- .../kserve/useNimMetricsGraphDefinition.ts | 40 +++ frontend/src/concepts/metrics/kserve/utils.ts | 9 + .../global/InferenceServiceTableRow.tsx | 3 +- .../screens/metrics/MetricsChart.tsx | 28 +- .../screens/metrics/MetricsPageTabs.tsx | 44 ++- .../screens/metrics/nim/NimMetrics.tsx | 20 ++ .../screens/metrics/nim/NimTab.tsx | 46 +++ .../modelServing/screens/metrics/types.ts | 3 + .../metrics/useMetricsPageEnabledTabs.ts | 7 + .../modelServing/useModelMetricsEnabled.ts | 4 +- .../deployedModels/DeployedModelCard.tsx | 8 +- 27 files changed, 1649 insertions(+), 35 deletions(-) create mode 100644 frontend/src/concepts/metrics/kserve/NimMetricsContext.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NIMCurrentRequestsGraph.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NIMKVCacheUsageGraph.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NIMRequestsOutcomesGraph.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NIMTimeForFirstTokenGraphs.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NIMTimePerOutputTokenGraph.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NIMTokensCountGraph.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NimMetricsContent.tsx create mode 100644 frontend/src/concepts/metrics/kserve/content/NimPerformanceGraphs.tsx create mode 100644 frontend/src/concepts/metrics/kserve/useNimMetricsGraphDefinition.ts create mode 100644 frontend/src/pages/modelServing/screens/metrics/nim/NimMetrics.tsx create mode 100644 frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx diff --git a/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts b/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts index 02713dd556..b8a9b99c17 100644 --- a/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts +++ b/frontend/src/__mocks__/mockKserveMetricsConfigMap.ts @@ -144,6 +144,236 @@ export const MOCK_KSERVE_METRICS_CONFIG_3 = ` ] }`; +// NVIDIA NIM +export const MOCK_NIM_METRICS_CONFIG_1 = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Current running, waiting, and max requests count", + "type": "CURRENT_REQUESTS", + "queries": [ + { + "title": "Requests waiting", + "query": "num_requests_waiting{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + }, + { + "title": "Requests running", + "query": "num_requests_running{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + }, + { + "title": "Max requests", + "query": "num_request_max{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + } + ] + }, + { + "title": "Tokens count", + "type": "TOKENS_COUNT", + "queries": [ + { + "title": "Total prompts token", + "query": "round(rate(prompt_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + }, + { + "title": "Total generation token", + "query": "round(rate(generation_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + } + ] + }, + { + "title": "Time to first token", + "type": "TIME_TO_FIRST_TOKEN", + "queries": [ + { + "title": "Time to first token", + "query": "rate(time_to_first_token_seconds_sum{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m])" + } + ] + }, + { + "title": "Time per output token", + "type": "TIME_PER_OUTPUT_TOKEN", + "queries": [ + { + "title": "Time per output token", + "query": "rate(time_per_output_token_seconds_sum{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m])" + } + ] + }, + { + "title": "Requests outcomes", + "type": "REQUEST_OUTCOMES", + "queries": [ + { + "title": "Number of successful incoming requests", + "query": "round(sum(increase(request_success_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[5m])))" + }, + { + "title": "Number of failed incoming requests", + "query": "round(sum(increase(request_failure_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[5m])))" + } + ] + } + ] +}`; + +export const MOCK_NIM_METRICS_CONFIG_3 = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Current running, waiting, and max requests count", + "type": "CURRENT_REQUESTS", + "queries": [ + { + "title": "Requests waiting", + "query": "num_requests_waiting{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + }, + { + "title": "Requests running", + "query": "num_requests_running{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + }, + { + "title": "Max requests", + "query": "num_request_max{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + } + ] + } + ] +}`; + +export const MOCK_NIM_METRICS_CONFIG_MISSING_QUERY = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Tokens count", + "type": "TOKENS_COUNT", + "queries": [ + { + "title": "Total prompts token", + "query": "round(rate(prompt_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + } + ] + } + ] +}`; + +export const MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_2 = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Requests outcomes", + "type": "REQUEST_OUTCOMES", + "queries": [ + { + "title": "Number of failed incoming requests", + "query": "round(sum(increase(request_failure_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[5m])))" + } + ] + }, + { + "title": "Current running, waiting, and max requests count", + "type": "CURRENT_REQUESTS", + "queries": [ + { + "title": "Requests running", + "query": "num_requests_running{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + } + ] + }, + { + "title": "Tokens count", + "type": "TOKENS_COUNT", + "queries": [ + { + "title": "Total generation token", + "query": "round(rate(generation_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + } + ] + } + ] +}`; + +export const MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_3 = `{ + "config": [ + { + "title": "GPU cache usage over time", + "type": "KV_CACHE", + "queries": [ + { + "title": "GPU cache usage over time", + "query": "sum_over_time(gpu_cache_usage_perc{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[24h])" + } + ] + }, + { + "title": "Requests outcomes", + "type": "REQUEST_OUTCOMES", + "queries": [ + { + "title": "Number of successful incoming requests", + "query": "round(sum(increase(request_success_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[5m])))" + } + ] + }, + { + "title": "Current running, waiting, and max requests count", + "type": "CURRENT_REQUESTS", + "queries": [ + { + "title": "Max requests", + "query": "num_request_max{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}" + } + ] + }, + { + "title": "Tokens count", + "type": "TOKENS_COUNT", + "queries": [ + { + "title": "Total prompts token", + "query": "round(rate(prompt_tokens_total{namespace='tomer-test-2', pod=~'nim-deploy-predictor-.*'}[1m]))" + } + ] + } + ] +}`; + export const mockKserveMetricsConfigMap = ({ namespace = 'test-project', modelName = 'test-inference-service', @@ -156,3 +386,16 @@ export const mockKserveMetricsConfigMap = ({ }; return mockConfigMap({ data, namespace, name: `${modelName}-metrics-dashboard` }); }; + +export const mockNimMetricsConfigMap = ({ + namespace = 'test-project', + modelName = 'test-inference-service', + supported = true, + config = MOCK_NIM_METRICS_CONFIG_1, +}: MockKserveMetricsConfigMapType): ConfigMapKind => { + const data = { + metrics: config, + supported: String(supported), + }; + return mockConfigMap({ data, namespace, name: `${modelName}-metrics-dashboard` }); +}; diff --git a/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts b/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts index 9b1ea5c601..9e0a54c124 100644 --- a/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts +++ b/frontend/src/__tests__/cypress/cypress/pages/modelMetrics.ts @@ -46,6 +46,24 @@ class ModelMetricsPerformance extends ModelMetricsGlobal { } } +class ModelMetricsNim extends ModelMetricsGlobal { + visit(project: string, model: string) { + cy.visitWithLogin(`/modelServing/${project}/metrics/${model}/performance`); + this.wait(); + } + + protected wait() { + cy.findByTestId('nim-metrics-loaded'); + cy.testA11y(); + } + + findTab() { + return { + nimTab: cy.findByTestId('nim-tab'), + }; + } +} + class ModelMetricsKserve extends ModelMetricsPerformance { findKserveAreaDisabledCard() { return cy.findByTestId('kserve-metrics-disabled'); @@ -60,6 +78,20 @@ class ModelMetricsKserve extends ModelMetricsPerformance { } } +class ModelMetricsKserveNim extends ModelMetricsNim { + findKserveAreaDisabledCard() { + return cy.findByTestId('kserve-metrics-disabled'); + } + + findUnsupportedRuntimeCard() { + return cy.findByTestId('kserve-metrics-runtime-unsupported'); + } + + findUnknownErrorCard() { + return cy.findByTestId('kserve-unknown-error'); + } +} + class ModelMetricsBias extends ModelMetricsGlobal { visit(project: string, model: string, disableA11y = false) { cy.visitWithLogin(`/modelServing/${project}/metrics/${model}/bias`); @@ -199,8 +231,10 @@ class ConfigureBiasMetricModal extends Modal { } export const modelMetricsPerformance = new ModelMetricsPerformance(); +export const modelMetricsNim = new ModelMetricsNim(); export const modelMetricsBias = new ModelMetricsBias(); export const serverMetrics = new ServerMetrics(); export const modelMetricsConfigureSection = new ModelMetricsConfigureSection(); export const configureBiasMetricModal = new ConfigureBiasMetricModal(); export const modelMetricsKserve = new ModelMetricsKserve(); +export const modelMetricsKserveNim = new ModelMetricsKserveNim(); diff --git a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts index dcd1784093..7f640e6aeb 100644 --- a/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts +++ b/frontend/src/__tests__/cypress/cypress/tests/mocked/modelServing/modelMetrics.cy.ts @@ -12,6 +12,7 @@ import { modelMetricsBias, modelMetricsConfigureSection, modelMetricsKserve, + modelMetricsKserveNim, modelMetricsPerformance, serverMetrics, } from '~/__tests__/cypress/cypress/pages/modelMetrics'; @@ -48,11 +49,17 @@ import { MOCK_KSERVE_METRICS_CONFIG_2, MOCK_KSERVE_METRICS_CONFIG_3, MOCK_KSERVE_METRICS_CONFIG_MISSING_QUERY, + MOCK_NIM_METRICS_CONFIG_3, + MOCK_NIM_METRICS_CONFIG_MISSING_QUERY, + MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_2, + MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_3, mockKserveMetricsConfigMap, + mockNimMetricsConfigMap, } from '~/__mocks__/mockKserveMetricsConfigMap'; type HandlersProps = { disablePerformanceMetrics?: boolean; + disableNIMModelServing?: boolean; disableTrustyBiasMetrics?: boolean; disableKServeMetrics?: boolean; servingRuntimes?: ServingRuntimeKind[]; @@ -81,6 +88,7 @@ const mockTrustyDBSecret = (): SecretKind => const initIntercepts = ({ disablePerformanceMetrics, + disableNIMModelServing = false, disableTrustyBiasMetrics, disableKServeMetrics, servingRuntimes = [mockServingRuntimeK8sResource({})], @@ -102,6 +110,7 @@ const initIntercepts = ({ mockDashboardConfig({ disableTrustyBiasMetrics, disablePerformanceMetrics, + disableNIMModelServing, disableKServeMetrics, }), ); @@ -778,3 +787,232 @@ describe('KServe performance metrics', () => { modelMetricsKserve.getMetricsChart('Memory usage').shouldHaveNoData(); }); }); + +//Nim Metrics Tests +describe('KServe NIM metrics', () => { + it('should show error when ConfigMap is missing', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableNIMModelServing: false, + disableKServeMetrics: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + { + model: ConfigMapModel, + ns: 'test-project', + name: 'test-inference-service-metrics-dashboard', + }, + { statusCode: 404, body: mock404Error({}) }, + ); + + modelMetricsKserveNim.visit('tomer-test-2', 'nim-deploy'); + modelMetricsKserveNim.findUnknownErrorCard().should('be.visible'); + }); + + it('should inform user when serving runtime is unsupported', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableNIMModelServing: false, + disableKServeMetrics: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s(ConfigMapModel, mockNimMetricsConfigMap({ supported: false })); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.findUnsupportedRuntimeCard().should('be.visible'); + }); + + it('should handle a malformed graph definition gracefully', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_KSERVE_METRICS_CONFIG_2 }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.findUnknownErrorCard().should('be.visible'); + }); + + it('should display only 2 graphs, when the config specifies', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s(ConfigMapModel, mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_3 })); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveData(); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 2); + }); + + it('charts should not error out if a query is missing and there is other data', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_MISSING_QUERY }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 2); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveData(); + }); + + it('charts should not error out if a query is missing and there is no data', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: false, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_MISSING_QUERY }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 2); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveNoData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveNoData(); + }); + + it('charts should not error out if a query is missing and there is other data', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_2 }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 4); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Requests outcomes').shouldHaveData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveData(); + }); + + it('charts should not error out if a query is missing and there is other data', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s( + ConfigMapModel, + mockNimMetricsConfigMap({ config: MOCK_NIM_METRICS_CONFIG_MISSING_QUERY_3 }), + ); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 4); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Requests outcomes').shouldHaveData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveData(); + }); + + it('charts should show data when serving data is available', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: true, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s(ConfigMapModel, mockNimMetricsConfigMap({ supported: true })); + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getAllMetricsCharts().should('have.length', 6); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Time to first token').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Time per output token').shouldHaveData(); + modelMetricsKserveNim.getMetricsChart('Requests outcomes').shouldHaveData(); + }); + + it('charts should show empty state when no serving data is available', () => { + initIntercepts({ + disableTrustyBiasMetrics: false, + disablePerformanceMetrics: false, + disableKServeMetrics: false, + disableNIMModelServing: false, + hasServingData: false, + hasBiasData: false, + inferenceServices: [mockInferenceServiceK8sResource({ isModelMesh: false })], + }); + + cy.interceptK8s(ConfigMapModel, mockNimMetricsConfigMap({ supported: true })); + + modelMetricsKserveNim.visit('test-project', 'test-inference-service'); + modelMetricsKserveNim.getMetricsChart('GPU cache usage over time').shouldHaveNoData(); + modelMetricsKserveNim + .getMetricsChart('Current running, waiting, and max requests count') + .shouldHaveNoData(); + modelMetricsKserveNim.getMetricsChart('Tokens count').shouldHaveNoData(); + modelMetricsKserveNim.getMetricsChart('Time to first token').shouldHaveNoData(); + modelMetricsKserveNim.getMetricsChart('Time per output token').shouldHaveNoData(); + modelMetricsKserveNim.getMetricsChart('Requests outcomes').shouldHaveNoData(); + }); +}); diff --git a/frontend/src/api/prometheus/kservePerformanceMetrics.ts b/frontend/src/api/prometheus/kservePerformanceMetrics.ts index 129b3bd349..39480ea3e8 100644 --- a/frontend/src/api/prometheus/kservePerformanceMetrics.ts +++ b/frontend/src/api/prometheus/kservePerformanceMetrics.ts @@ -1,5 +1,8 @@ import React from 'react'; -import { KserveMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { + KserveMetricGraphDefinition, + NimMetricGraphDefinition, +} from '~/concepts/metrics/kserve/types'; import { defaultResponsePredicate } from '~/api/prometheus/usePrometheusQueryRange'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; import { TimeframeTitle } from '~/concepts/metrics/types'; @@ -178,6 +181,293 @@ export const useFetchKserveMemoryUsageData = ( }); }; +// Nim Metrics graphs + +// Graph #1 - KV Cache usage over time +type KVCacheUsageData = { + data: { + kvCacheUsage: PendingContextResourceData; + }; + refreshAll: () => void; +}; + +export const useFetchNimKVCacheUsageData = ( + metricsDef: NimMetricGraphDefinition, + timeframe: TimeframeTitle, + endInMs: number, + namespace: string, +): KVCacheUsageData => { + const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; + + const kvCacheUsage = useQueryRangeResourceData( + active, + metricsDef.queries[0]?.query, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const data = React.useMemo( + () => ({ + kvCacheUsage, + }), + [kvCacheUsage], + ); + + return useAllSettledContextResourceData(data, { + kvCacheUsage: DEFAULT_PENDING_CONTEXT_RESOURCE, + }); +}; + +// Graph #2 +type CurrentRequestsData = { + data: { + requestsWaiting: PendingContextResourceData; + requestsRunning: PendingContextResourceData; + maxRequests: PendingContextResourceData; + }; + refreshAll: () => void; +}; + +export const useFetchNimCurrentRequestsData = ( + metricsDef: NimMetricGraphDefinition, + timeframe: TimeframeTitle, + endInMs: number, + namespace: string, +): CurrentRequestsData => { + // Check if Nim metrics are active + const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; + + // Extract the queries for "Requests waiting", "Requests running", and "Max requests" + const requestsWaitingQuery = metricsDef.queries[0].query; + const requestsRunningQuery = metricsDef.queries[1].query; + const maxRequestsQuery = metricsDef.queries[2].query; + + // Fetch data using useQueryRangeResourceData + const requestsWaiting = useQueryRangeResourceData( + active, + requestsWaitingQuery, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const requestsRunning = useQueryRangeResourceData( + active, + requestsRunningQuery, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const maxRequests = useQueryRangeResourceData( + active, + maxRequestsQuery, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + // Combine the fetched data + const data = React.useMemo( + () => ({ + requestsWaiting, + requestsRunning, + maxRequests, + }), + [requestsWaiting, requestsRunning, maxRequests], + ); + + // Use helper to handle pending state and refresh functionality + return useAllSettledContextResourceData(data, { + requestsWaiting: DEFAULT_PENDING_CONTEXT_RESOURCE, + requestsRunning: DEFAULT_PENDING_CONTEXT_RESOURCE, + maxRequests: DEFAULT_PENDING_CONTEXT_RESOURCE, + }); +}; + +// Graph #3 - Total Prompt Token Count and Total Generation Token Count +type TokensCountData = { + data: { + totalPromptTokenCount: PendingContextResourceData; + totalGenerationTokenCount: PendingContextResourceData; + }; + refreshAll: () => void; +}; + +export const useFetchNimTokensCountData = ( + metricsDef: NimMetricGraphDefinition, + timeframe: TimeframeTitle, + endInMs: number, + namespace: string, +): TokensCountData => { + const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; + + // Extract the queries for "Total Prompt Token Count" and "Total Generation Token Count + const totalPromptTokenCount = useQueryRangeResourceData( + active, + metricsDef.queries[0]?.query, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const totalGenerationTokenCount = useQueryRangeResourceData( + active, + metricsDef.queries[1]?.query, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const data = React.useMemo( + () => ({ + totalPromptTokenCount, + totalGenerationTokenCount, + }), + [totalPromptTokenCount, totalGenerationTokenCount], + ); + + return useAllSettledContextResourceData(data, { + totalPromptTokenCount: DEFAULT_PENDING_CONTEXT_RESOURCE, + totalGenerationTokenCount: DEFAULT_PENDING_CONTEXT_RESOURCE, + }); +}; + +// Graph #4 - Time to First Token +type TimeToFirstTokenData = { + data: { + timeToFirstToken: PendingContextResourceData; + }; + refreshAll: () => void; +}; + +export const useFetchNimTimeToFirstTokenData = ( + metricsDef: NimMetricGraphDefinition, + timeframe: TimeframeTitle, + endInMs: number, + namespace: string, +): TimeToFirstTokenData => { + const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; + + const timeToFirstToken = useQueryRangeResourceData( + active, + metricsDef.queries[0]?.query, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const data = React.useMemo( + () => ({ + timeToFirstToken, + }), + [timeToFirstToken], + ); + + return useAllSettledContextResourceData(data, { + timeToFirstToken: DEFAULT_PENDING_CONTEXT_RESOURCE, + }); +}; + +// Graph #5 +type TimePerOutputTokenData = { + data: { + timePerOutputToken: PendingContextResourceData; + }; + refreshAll: () => void; +}; +export const useFetchNimTimePerOutputTokenData = ( + metricsDef: NimMetricGraphDefinition, + timeframe: TimeframeTitle, + endInMs: number, + namespace: string, +): TimePerOutputTokenData => { + // Check if Nim metrics are active + const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; + // Extract the query for TIME_PER_OUTPUT_TOKEN + const timePerOutputTokenQuery = metricsDef.queries[0].query; // Assumes it's the first query in the metric definition + // Fetch data using useQueryRangeResourceData + const timePerOutputToken = useQueryRangeResourceData( + active, + timePerOutputTokenQuery, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + // Memoize the fetched data + const data = React.useMemo( + () => ({ + timePerOutputToken, + }), + [timePerOutputToken], + ); + // Return all-settled context resource data + return useAllSettledContextResourceData(data, { + timePerOutputToken: DEFAULT_PENDING_CONTEXT_RESOURCE, + }); +}; + +// Graph #6 +type RequestsOutcomesData = { + data: { + successCount: PendingContextResourceData; + failedCount: PendingContextResourceData; + }; + refreshAll: () => void; +}; + +export const useFetchNimRequestsOutcomesData = ( + metricsDef: NimMetricGraphDefinition, + timeframe: TimeframeTitle, + endInMs: number, + namespace: string, +): RequestsOutcomesData => { + const active = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; + + const successQuery = metricsDef.queries[0]?.query; + const failedQuery = metricsDef.queries[1]?.query; + + const successCount = useQueryRangeResourceData( + active, + successQuery, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const failedCount = useQueryRangeResourceData( + active, + failedQuery, + endInMs, + timeframe, + defaultResponsePredicate, + namespace, + ); + + const data = React.useMemo( + () => ({ + successCount, + failedCount, + }), + [failedCount, successCount], + ); + + return useAllSettledContextResourceData(data, { + successCount: DEFAULT_PENDING_CONTEXT_RESOURCE, + failedCount: DEFAULT_PENDING_CONTEXT_RESOURCE, + }); +}; + const useAllSettledContextResourceData = < T, U extends Record>, diff --git a/frontend/src/concepts/metrics/kserve/NimMetricsContext.tsx b/frontend/src/concepts/metrics/kserve/NimMetricsContext.tsx new file mode 100644 index 0000000000..be008dfcf9 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/NimMetricsContext.tsx @@ -0,0 +1,119 @@ +import * as React from 'react'; +import { + Bullseye, + EmptyState, + EmptyStateBody, + EmptyStateVariant, + Spinner, +} from '@patternfly/react-core'; +import { CubesIcon, ErrorCircleOIcon } from '@patternfly/react-icons'; +import { MetricsCommonContext } from '~/concepts/metrics/MetricsCommonContext'; +import useKserveMetricsConfigMap from '~/concepts/metrics/kserve/useKserveMetricsConfigMap'; +import useNimMetricsGraphDefinitions from '~/concepts/metrics/kserve/useNimMetricsGraphDefinition'; +import useRefreshInterval from '~/utilities/useRefreshInterval'; +import { RefreshIntervalValue } from '~/concepts/metrics/const'; +import { RefreshIntervalTitle, TimeframeTitle } from '~/concepts/metrics/types'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { conditionalArea, SupportedArea } from '~/concepts/areas'; + +type NimMetricsContextProps = { + namespace: string; + timeframe: TimeframeTitle; + refreshInterval: RefreshIntervalTitle; + lastUpdateTime: number; + graphDefinitions: NimMetricGraphDefinition[]; +}; + +export const NimMetricsContext = React.createContext({ + namespace: '', + timeframe: TimeframeTitle.ONE_DAY, + refreshInterval: RefreshIntervalTitle.FIVE_MINUTES, + lastUpdateTime: 0, + graphDefinitions: [], +}); + +type NimMetricsContextProviderProps = { + children: React.ReactNode; + namespace: string; + modelName: string; +}; + +export const NimMetricsContextProvider = conditionalArea( + SupportedArea.K_SERVE_METRICS, + true, +)(({ children, namespace, modelName }) => { + const { currentTimeframe, currentRefreshInterval, lastUpdateTime, setLastUpdateTime } = + React.useContext(MetricsCommonContext); + const [configMap, configMapLoaded, configMapError] = useKserveMetricsConfigMap( + namespace, + modelName, + ); + const { + graphDefinitions, + error: graphDefinitionsError, + loaded: graphDefinitionsLoaded, + supported, + } = useNimMetricsGraphDefinitions(configMap); + + const loaded = configMapLoaded && graphDefinitionsLoaded; + + const error = graphDefinitionsError || configMapError; + + const refreshAllMetrics = React.useCallback(() => { + setLastUpdateTime(Date.now()); + }, [setLastUpdateTime]); + + useRefreshInterval(RefreshIntervalValue[currentRefreshInterval], refreshAllMetrics); + + const contextValue = React.useMemo( + () => ({ + namespace, + lastUpdateTime, + refreshInterval: currentRefreshInterval, + timeframe: currentTimeframe, + graphDefinitions, + }), + [currentRefreshInterval, currentTimeframe, graphDefinitions, lastUpdateTime, namespace], + ); + + if (error) { + return ( + + Error loading metrics configuration + + ); + } + + if (!loaded) { + return ( + + + + ); + } + + if (!supported) { + return ( + + + {modelName} is using a custom serving runtime. Metrics are only supported for models + served via a pre-installed runtime when the single-model serving platform is enabled for a + project. + + + ); + } + + return {children}; +}); diff --git a/frontend/src/concepts/metrics/kserve/__tests__/utils.spec.ts b/frontend/src/concepts/metrics/kserve/__tests__/utils.spec.ts index d8429afc97..b3db2af0d4 100644 --- a/frontend/src/concepts/metrics/kserve/__tests__/utils.spec.ts +++ b/frontend/src/concepts/metrics/kserve/__tests__/utils.spec.ts @@ -1,7 +1,8 @@ -import { KserveMetricsGraphTypes } from '~/concepts/metrics/kserve/const'; +import { KserveMetricsGraphTypes, NimMetricsGraphTypes } from '~/concepts/metrics/kserve/const'; import { isKserveMetricsConfigMapKind, isValidKserveMetricsDataObject, + isValidNimMetricsDataObject, } from '~/concepts/metrics/kserve/utils'; import { mockConfigMap } from '~/__mocks__/mockConfigMap'; @@ -118,3 +119,67 @@ describe('isValidKserveMetricsDataObject', () => { expect(isValidKserveMetricsDataObject(1)).toBe(false); }); }); + +describe('isValidNimMetricsDataObject', () => { + it('should return true when given a valid value', () => { + expect( + isValidNimMetricsDataObject({ + config: [ + { + title: 'Requests outcomes', + type: NimMetricsGraphTypes.REQUEST_OUTCOMES, + queries: [ + { + title: 'Number of successful incoming requests', + query: 'prometheus query', + }, + { + title: 'Number of failed incoming requests', + query: 'prometheus query', + }, + ], + }, + ], + }), + ).toBe(true); + }); + + it('should return false when given an invalid value', () => { + expect( + isValidNimMetricsDataObject({ + cats: [ + { + title: 'Requests outcomes', + type: NimMetricsGraphTypes.REQUEST_OUTCOMES, + queries: [ + { + title: 'Number of successful incoming requests', + query: 'prometheus query', + }, + { + title: 'Number of failed incoming requests', + query: 'prometheus query', + }, + ], + }, + ], + }), + ).toBe(false); + + expect( + isValidNimMetricsDataObject({ + config: [], + }), + ).toBe(false); + }); + + it('should return false when given an insane value', () => { + expect(isValidNimMetricsDataObject(null)).toBe(false); + expect(isValidNimMetricsDataObject(undefined)).toBe(false); + expect(isValidNimMetricsDataObject({})).toBe(false); + expect(isValidNimMetricsDataObject([])).toBe(false); + expect(isValidNimMetricsDataObject(true)).toBe(false); + expect(isValidNimMetricsDataObject(false)).toBe(false); + expect(isValidNimMetricsDataObject(1)).toBe(false); + }); +}); diff --git a/frontend/src/concepts/metrics/kserve/const.ts b/frontend/src/concepts/metrics/kserve/const.ts index 674f29a2d8..b34d31ee87 100644 --- a/frontend/src/concepts/metrics/kserve/const.ts +++ b/frontend/src/concepts/metrics/kserve/const.ts @@ -6,3 +6,12 @@ export enum KserveMetricsGraphTypes { REQUEST_COUNT = 'REQUEST_COUNT', MEAN_LATENCY = 'MEAN_LATENCY', } + +export enum NimMetricsGraphTypes { + TIME_TO_FIRST_TOKEN = 'TIME_TO_FIRST_TOKEN', + TIME_PER_OUTPUT_TOKEN = 'TIME_PER_OUTPUT_TOKEN', + KV_CACHE = 'KV_CACHE', + CURRENT_REQUESTS = 'CURRENT_REQUESTS', + TOKENS_COUNT = 'TOKENS_COUNT', + REQUEST_OUTCOMES = 'REQUEST_OUTCOMES', +} diff --git a/frontend/src/concepts/metrics/kserve/content/NIMCurrentRequestsGraph.tsx b/frontend/src/concepts/metrics/kserve/content/NIMCurrentRequestsGraph.tsx new file mode 100644 index 0000000000..95924c8704 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NIMCurrentRequestsGraph.tsx @@ -0,0 +1,70 @@ +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import MetricsChart from '~/pages/modelServing/screens/metrics/MetricsChart'; +import { useFetchNimCurrentRequestsData } from '~/api'; +import { convertPrometheusNaNToZero } from '~/pages/modelServing/screens/metrics/utils'; +import { MetricsChartTypes } from '~/pages/modelServing/screens/metrics/types'; + +type NimCurrentRequestsGraphProps = { + graphDefinition: NimMetricGraphDefinition; // Contains queries and title + timeframe: TimeframeTitle; // Time range + end: number; // End timestamp + namespace: string; // Namespace +}; + +const NimCurrentRequestsGraph: React.FC = ({ + graphDefinition, + timeframe, + end, + namespace, +}) => { + // Fetch the data for "Running", "Waiting", and "Max Requests" + const { + data: { requestsWaiting, requestsRunning, maxRequests }, + } = useFetchNimCurrentRequestsData(graphDefinition, timeframe, end, namespace); + + return ( + + ); +}; + +export default NimCurrentRequestsGraph; diff --git a/frontend/src/concepts/metrics/kserve/content/NIMKVCacheUsageGraph.tsx b/frontend/src/concepts/metrics/kserve/content/NIMKVCacheUsageGraph.tsx new file mode 100644 index 0000000000..65c417e9d6 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NIMKVCacheUsageGraph.tsx @@ -0,0 +1,39 @@ +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import { useFetchNimKVCacheUsageData } from '~/api'; +import MetricsChart from '~/pages/modelServing/screens/metrics/MetricsChart'; +import { MetricsChartTypes } from '~/pages/modelServing/screens/metrics/types'; +import { toPercentage } from '~/pages/modelServing/screens/metrics/utils'; + +// Graph #1 - KV Cache usage over time +type NimKVCacheUsageGraphProps = { + graphDefinition: NimMetricGraphDefinition; + timeframe: TimeframeTitle; + end: number; + namespace: string; +}; + +const NimKVCacheUsageGraph: React.FC = ({ + graphDefinition, + timeframe, + end, + namespace, +}) => { + const { + data: { kvCacheUsage }, + } = useFetchNimKVCacheUsageData(graphDefinition, timeframe, end, namespace); + + return ( + ({ + y: [0, 100], + })} + /> + ); +}; + +export default NimKVCacheUsageGraph; diff --git a/frontend/src/concepts/metrics/kserve/content/NIMRequestsOutcomesGraph.tsx b/frontend/src/concepts/metrics/kserve/content/NIMRequestsOutcomesGraph.tsx new file mode 100644 index 0000000000..1b0bd91847 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NIMRequestsOutcomesGraph.tsx @@ -0,0 +1,53 @@ +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { useFetchNimRequestsOutcomesData } from '~/api'; +import MetricsChart from '~/pages/modelServing/screens/metrics/MetricsChart'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import { MetricsChartTypes } from '~/pages/modelServing/screens/metrics/types'; + +type NimRequestsOutcomesGraphProps = { + graphDefinition: NimMetricGraphDefinition; + timeframe: TimeframeTitle; + end: number; + namespace: string; +}; + +const NimRequestsOutcomesGraph: React.FC = ({ + graphDefinition, + timeframe, + end, + namespace, +}) => { + const { + data: { successCount, failedCount }, + } = useFetchNimRequestsOutcomesData(graphDefinition, timeframe, end, namespace); + + return ( + + ); +}; + +export default NimRequestsOutcomesGraph; diff --git a/frontend/src/concepts/metrics/kserve/content/NIMTimeForFirstTokenGraphs.tsx b/frontend/src/concepts/metrics/kserve/content/NIMTimeForFirstTokenGraphs.tsx new file mode 100644 index 0000000000..7b362d519f --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NIMTimeForFirstTokenGraphs.tsx @@ -0,0 +1,40 @@ +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import { useFetchNimTimeToFirstTokenData } from '~/api'; +import MetricsChart from '~/pages/modelServing/screens/metrics/MetricsChart'; +import { convertPrometheusNaNToZero } from '~/pages/modelServing/screens/metrics/utils'; + +// Graph #4 - Time to First Token +type NimTimeToFirstTokenGraphProps = { + graphDefinition: NimMetricGraphDefinition; + timeframe: TimeframeTitle; + end: number; + namespace: string; +}; + +const NimTimeToFirstTokenGraph: React.FC = ({ + graphDefinition, + timeframe, + end, + namespace, +}) => { + const { + data: { timeToFirstToken }, + } = useFetchNimTimeToFirstTokenData(graphDefinition, timeframe, end, namespace); + + return ( + ({ + y: [0, 20], + })} + /> + ); +}; + +export default NimTimeToFirstTokenGraph; diff --git a/frontend/src/concepts/metrics/kserve/content/NIMTimePerOutputTokenGraph.tsx b/frontend/src/concepts/metrics/kserve/content/NIMTimePerOutputTokenGraph.tsx new file mode 100644 index 0000000000..d303871781 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NIMTimePerOutputTokenGraph.tsx @@ -0,0 +1,39 @@ +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import MetricsChart from '~/pages/modelServing/screens/metrics/MetricsChart'; +import { useFetchNimTimePerOutputTokenData } from '~/api'; +import { convertPrometheusNaNToZero } from '~/pages/modelServing/screens/metrics/utils'; +import { MetricsChartTypes } from '~/pages/modelServing/screens/metrics/types'; + +type NimTimePerOutputTokenGraphProps = { + graphDefinition: NimMetricGraphDefinition; // Contains query and title + timeframe: TimeframeTitle; // Time range + end: number; // End timestamp + namespace: string; // Namespace +}; +const NimTimePerOutputTokenGraph: React.FC = ({ + graphDefinition, + timeframe, + end, + namespace, +}) => { + // Fetch the data for "Time per Output Token" + const { + data: { timePerOutputToken }, + } = useFetchNimTimePerOutputTokenData(graphDefinition, timeframe, end, namespace); + return ( + + ); +}; +export default NimTimePerOutputTokenGraph; diff --git a/frontend/src/concepts/metrics/kserve/content/NIMTokensCountGraph.tsx b/frontend/src/concepts/metrics/kserve/content/NIMTokensCountGraph.tsx new file mode 100644 index 0000000000..46638391de --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NIMTokensCountGraph.tsx @@ -0,0 +1,59 @@ +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import { useFetchNimTokensCountData } from '~/api'; +import MetricsChart from '~/pages/modelServing/screens/metrics/MetricsChart'; +import { MetricsChartTypes } from '~/pages/modelServing/screens/metrics/types'; +import { convertPrometheusNaNToZero } from '~/pages/modelServing/screens/metrics/utils'; + +// Graph #3 - Total Prompt Token Count and Total Generation Token Count +type NimTokensCountGraphProps = { + graphDefinition: NimMetricGraphDefinition; + timeframe: TimeframeTitle; + end: number; + namespace: string; +}; + +const NimTokensCountGraph: React.FC = ({ + graphDefinition, + timeframe, + end, + namespace, +}) => { + const { + data: { totalPromptTokenCount, totalGenerationTokenCount }, + } = useFetchNimTokensCountData(graphDefinition, timeframe, end, namespace); + + return ( + + ); +}; + +export default NimTokensCountGraph; diff --git a/frontend/src/concepts/metrics/kserve/content/NimMetricsContent.tsx b/frontend/src/concepts/metrics/kserve/content/NimMetricsContent.tsx new file mode 100644 index 0000000000..4b5c1704a3 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NimMetricsContent.tsx @@ -0,0 +1,19 @@ +import React from 'react'; +import NimPerformanceGraphs from '~/concepts/metrics/kserve/content/NimPerformanceGraphs'; +import { NimMetricsContext } from '~/concepts/metrics/kserve/NimMetricsContext'; + +const NimMetricsContent: React.FC = () => { + const { namespace, graphDefinitions, timeframe, lastUpdateTime } = + React.useContext(NimMetricsContext); + + return ( + + ); +}; + +export default NimMetricsContent; diff --git a/frontend/src/concepts/metrics/kserve/content/NimPerformanceGraphs.tsx b/frontend/src/concepts/metrics/kserve/content/NimPerformanceGraphs.tsx new file mode 100644 index 0000000000..89fa9f2789 --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/content/NimPerformanceGraphs.tsx @@ -0,0 +1,115 @@ +import { Stack, StackItem } from '@patternfly/react-core/dist/esm'; +import React from 'react'; +import { NimMetricGraphDefinition } from '~/concepts/metrics/kserve/types'; +import { NimMetricsGraphTypes } from '~/concepts/metrics/kserve/const'; +import { TimeframeTitle } from '~/concepts/metrics/types'; +import NIMTimeToFirstTokenGraph from './NIMTimeForFirstTokenGraphs'; +import NIMKVCacheUsageGraph from './NIMKVCacheUsageGraph'; +import NIMTokensCountGraph from './NIMTokensCountGraph'; +import NIMRequestsOutcomesGraph from './NIMRequestsOutcomesGraph'; +import NIMTimePerOutputTokenGraph from './NIMTimePerOutputTokenGraph'; +import NIMCurrentRequestsGraph from './NIMCurrentRequestsGraph'; + +type NimPerformanceGraphsProps = { + namespace: string; + graphDefinitions: NimMetricGraphDefinition[]; + timeframe: TimeframeTitle; + end: number; +}; + +const NimPerformanceGraphs: React.FC = ({ + namespace, + graphDefinitions, + timeframe, + end, +}) => { + const renderGraph = (graphDefinition: NimMetricGraphDefinition) => { + // Graph #1 - KV Cache usage over time + if (graphDefinition.type === NimMetricsGraphTypes.KV_CACHE) { + return ( + + ); + } + + // Graph #3 - Total Prompt Token Count and Total Generation Token Count + if (graphDefinition.type === NimMetricsGraphTypes.TOKENS_COUNT) { + return ( + + ); + } + + // Graph #4 - Time to First Token + if (graphDefinition.type === NimMetricsGraphTypes.TIME_TO_FIRST_TOKEN) { + return ( + + ); + } + + // Graph #5 - Time per Output Token + if (graphDefinition.type === NimMetricsGraphTypes.TIME_PER_OUTPUT_TOKEN) { + return ( + + ); + } + + // Graph #6- Requests Outcomes + if (graphDefinition.type === NimMetricsGraphTypes.REQUEST_OUTCOMES) { + return ( + + ); + } + + // Graph #2 Current Requests + // Condition IS necessary as graph types are provided by the backend. + // We need to guard against receiving an unknown value at runtime and fail gracefully. + // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition + if (graphDefinition.type === NimMetricsGraphTypes.CURRENT_REQUESTS) { + return ( + + ); + } + + // TODO: add an unsupported graph type error state. + return null; + }; + + return ( + + {graphDefinitions.map((x) => ( + {renderGraph(x)} + ))} + + ); +}; + +export default NimPerformanceGraphs; diff --git a/frontend/src/concepts/metrics/kserve/types.ts b/frontend/src/concepts/metrics/kserve/types.ts index cd127327fd..9fc51378da 100644 --- a/frontend/src/concepts/metrics/kserve/types.ts +++ b/frontend/src/concepts/metrics/kserve/types.ts @@ -1,5 +1,5 @@ import { ConfigMapKind } from '~/k8sTypes'; -import { KserveMetricsGraphTypes } from '~/concepts/metrics/kserve/const'; +import { KserveMetricsGraphTypes, NimMetricsGraphTypes } from '~/concepts/metrics/kserve/const'; export type KserveMetricsConfigMapKind = ConfigMapKind & { data: { @@ -8,24 +8,44 @@ export type KserveMetricsConfigMapKind = ConfigMapKind & { }; }; -export type KserveMetricGraphDefinition = { - title: string; - type: KserveMetricsGraphTypes; - queries: KserveMetricQueryDefinition[]; -}; - -export type KserveMetricQueryDefinition = { +export type MetricQueryDefinition = { title: string; query: string; }; +//Kserve Data Type Defenitions + export type KserveMetricsDataObject = { config: KserveMetricGraphDefinition[]; }; +export type KserveMetricGraphDefinition = { + title: string; + type: KserveMetricsGraphTypes; + queries: MetricQueryDefinition[]; +}; + export type KserveMetricsDefinition = { supported: boolean; loaded: boolean; error?: Error; graphDefinitions: KserveMetricGraphDefinition[]; }; + +//Nim Data Type Defenitions +export type NimMetricsDataObject = { + config: NimMetricGraphDefinition[]; +}; + +export type NimMetricGraphDefinition = { + title: string; + type: NimMetricsGraphTypes; + queries: MetricQueryDefinition[]; +}; + +export type NimMetricsDefinition = { + supported: boolean; + loaded: boolean; + error?: Error; + graphDefinitions: NimMetricGraphDefinition[]; +}; diff --git a/frontend/src/concepts/metrics/kserve/useNimMetricsGraphDefinition.ts b/frontend/src/concepts/metrics/kserve/useNimMetricsGraphDefinition.ts new file mode 100644 index 0000000000..4119bfc02b --- /dev/null +++ b/frontend/src/concepts/metrics/kserve/useNimMetricsGraphDefinition.ts @@ -0,0 +1,40 @@ +import React from 'react'; +import { KserveMetricsConfigMapKind, NimMetricsDefinition } from '~/concepts/metrics/kserve/types'; +import { isValidNimMetricsDataObject } from '~/concepts/metrics/kserve/utils'; + +const useNimMetricsGraphDefinitions = ( + kserveMetricsConfigMap: KserveMetricsConfigMapKind | null, +): NimMetricsDefinition => + React.useMemo(() => { + const result: NimMetricsDefinition = { + supported: false, + loaded: !!kserveMetricsConfigMap, + graphDefinitions: [], + }; + + if (kserveMetricsConfigMap) { + result.supported = kserveMetricsConfigMap.data.supported === 'true'; + + let parsed: unknown; + if (result.supported) { + try { + parsed = JSON.parse(kserveMetricsConfigMap.data.metrics); + } catch (e) { + result.error = new Error('Error reading metrics configuration: malformed JSON'); + result.loaded = true; + } + + if (!result.error) { + if (isValidNimMetricsDataObject(parsed)) { + result.graphDefinitions = parsed.config; + } else { + result.error = new Error('Error reading metrics configuration: schema mismatch'); + result.loaded = true; + } + } + } + } + return result; + }, [kserveMetricsConfigMap]); + +export default useNimMetricsGraphDefinitions; diff --git a/frontend/src/concepts/metrics/kserve/utils.ts b/frontend/src/concepts/metrics/kserve/utils.ts index ad5e7ecdea..7fb59466d0 100644 --- a/frontend/src/concepts/metrics/kserve/utils.ts +++ b/frontend/src/concepts/metrics/kserve/utils.ts @@ -2,6 +2,7 @@ import { ConfigMapKind } from '~/k8sTypes'; import { KserveMetricsConfigMapKind, KserveMetricsDataObject, + NimMetricsDataObject, } from '~/concepts/metrics/kserve/types'; export const isKserveMetricsConfigMapKind = ( @@ -21,3 +22,11 @@ export const isValidKserveMetricsDataObject = (obj: unknown): obj is KserveMetri return 'config' in obj && Array.isArray(obj.config) && obj.config.length > 0; }; + +export const isValidNimMetricsDataObject = (obj: unknown): obj is NimMetricsDataObject => { + if (typeof obj !== 'object' || obj === null) { + return false; + } + + return 'config' in obj && Array.isArray(obj.config) && obj.config.length > 0; +}; diff --git a/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx b/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx index dee0176440..310abd0ab4 100644 --- a/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx +++ b/frontend/src/pages/modelServing/screens/global/InferenceServiceTableRow.tsx @@ -47,8 +47,7 @@ const InferenceServiceTableRow: React.FC = ({ const modelMesh = isModelMesh(inferenceService); const modelMeshMetricsSupported = modelMetricsEnabled && modelMesh; - const kserveMetricsSupported = - modelMetricsEnabled && kserveMetricsEnabled && !modelMesh && !isKServeNIMEnabled; + const kserveMetricsSupported = modelMetricsEnabled && kserveMetricsEnabled && !modelMesh; const displayName = getDisplayNameFromK8sResource(inferenceService); return ( diff --git a/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx b/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx index eaede96586..39a547b749 100644 --- a/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/MetricsChart.tsx @@ -15,6 +15,7 @@ import { Chart, ChartArea, ChartAxis, + ChartDonut, ChartGroup, ChartLegendTooltip, ChartLine, @@ -77,7 +78,6 @@ const MetricsChart: React.FC = ({ const [tooltipTitle, setTooltipTitle] = React.useState( convertTimestamp(Date.now(), formatToShow(currentTimeframe)), ); - const { data: graphLines, maxYValue, @@ -93,7 +93,6 @@ const MetricsChart: React.FC = ({ const newMinYValue = Math.min(...lineValues.map((v) => v.y)); const newMaxXValue = Math.max(...lineValues.map((v) => v.x)); const newMinXValue = Math.min(...lineValues.map((v) => v.x)); - return { data: [...acc.data, { points: lineValues, name: metric.name }], maxYValue: Math.max(acc.maxYValue, newMaxYValue), @@ -106,13 +105,10 @@ const MetricsChart: React.FC = ({ ), [metrics], ); - const error = metrics.find((line) => line.metric.error)?.metric.error; const isAllLoaded = error || metrics.every((line) => line.metric.loaded); const hasSomeData = graphLines.some((line) => line.points.length > 0); - const ChartGroupWrapper = React.useMemo(() => (isStack ? ChartStack : ChartGroup), [isStack]); - React.useEffect(() => { const ref = bodyRef.current; let observer: ReturnType = () => undefined; @@ -125,7 +121,6 @@ const MetricsChart: React.FC = ({ } return () => observer(); }, []); - const handleCursorChange = React.useCallback( (xValue: number) => { if (!xValue) { @@ -140,7 +135,6 @@ const MetricsChart: React.FC = ({ }, [minXValue, currentTimeframe, maxXValue], ); - let legendProps: Partial> = {}; let containerComponent; if (metrics.length > 1 && metrics.every(({ name }) => !!name)) { @@ -170,7 +164,6 @@ const MetricsChart: React.FC = ({ /> ); } - return ( = ({ {hasSomeData ? ( = ({ theme={theme} hasPatterns={hasPatterns} data-testid="metrics-chart-has-data" + showAxis={type !== MetricsChartTypes.DONUT} {...legendProps} > = ({ ); case MetricsChartTypes.LINE: return ; + case MetricsChartTypes.DONUT: + return ( + `${datum.name}: ${datum.y}`} + constrainToVisibleArea + themeColor={metrics[i]?.color} + /> + ); default: return null; } @@ -260,5 +269,4 @@ const MetricsChart: React.FC = ({ ); }; - export default MetricsChart; diff --git a/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx b/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx index 6486761d17..65d87decbb 100644 --- a/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx +++ b/frontend/src/pages/modelServing/screens/metrics/MetricsPageTabs.tsx @@ -8,24 +8,35 @@ import useDoesTrustyAICRExist from '~/concepts/trustyai/context/useDoesTrustyAIC import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; import { InferenceServiceKind } from '~/k8sTypes'; import { TrustyInstallState } from '~/concepts/trustyai/types'; +import './MetricsPageTabs.scss'; +import useServingPlatformStatuses from '~/pages/modelServing/useServingPlatformStatuses'; +import { byName, ProjectsContext } from '~/concepts/projects/ProjectsContext'; +import { isProjectNIMSupported } from '~/pages/modelServing/screens/projects/nimUtils'; +import useMetricsPageEnabledTabs from './useMetricsPageEnabledTabs'; +import BiasConfigurationAlertPopover from './bias/BiasConfigurationPage/BiasConfigurationAlertPopover'; import PerformanceTab from './performance/PerformanceTab'; import BiasTab from './bias/BiasTab'; -import BiasConfigurationAlertPopover from './bias/BiasConfigurationPage/BiasConfigurationAlertPopover'; -import useMetricsPageEnabledTabs from './useMetricsPageEnabledTabs'; - -import './MetricsPageTabs.scss'; +import NIMTab from './nim/NimTab'; type MetricsPageTabsProps = { model: InferenceServiceKind; }; const MetricsPageTabs: React.FC = ({ model }) => { + const servingPlatformStatuses = useServingPlatformStatuses(); + const isNIMAvailable = servingPlatformStatuses.kServeNIM.enabled; + const { projects } = React.useContext(ProjectsContext); + const project = projects.find(byName(model.metadata.namespace)) ?? null; const enabledTabs = useMetricsPageEnabledTabs(); + const isKServeNIMEnabled = project ? isProjectNIMSupported(project) : false; + const isNimEnabled = isNIMAvailable && isKServeNIMEnabled; const { biasMetricConfigs, statusState } = useModelBiasData(); const [biasMetricsInstalled] = useDoesTrustyAICRExist(); const performanceMetricsAreaAvailable = useIsAreaAvailable( SupportedArea.PERFORMANCE_METRICS, ).status; + //check availability of NIM metrics + const nimMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; const { tab } = useParams<{ tab: MetricsTabKeys }>(); const navigate = useNavigate(); @@ -41,10 +52,19 @@ const MetricsPageTabs: React.FC = ({ model }) => { return ; } + //Display only one tab that is available if (enabledTabs.length === 1) { - return performanceMetricsAreaAvailable ? : ; + if (performanceMetricsAreaAvailable) { + return ; + } + if (nimMetricsAreaAvailable && isNimEnabled) { + return ; + } + + return ; } + //Display multiple available tabs return ( = ({ model }) => { )} + + {/* Add NIN metrics tab */} + {nimMetricsAreaAvailable && isNimEnabled && ( + NIM Metrics} + aria-label="Nim tab" + className="odh-metrics-page-tabs__content" + data-testid="nim-tab" + > + + + )} + {biasMetricsInstalled && ( = ({ modelName }) => { + const { namespace } = React.useContext(ModelServingMetricsContext); + + return ( + + + + ); +}; + +export default NimMetrics; diff --git a/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx b/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx new file mode 100644 index 0000000000..49b7f8ba6e --- /dev/null +++ b/frontend/src/pages/modelServing/screens/metrics/nim/NimTab.tsx @@ -0,0 +1,46 @@ +import React from 'react'; +import { EmptyState, PageSection, Stack, StackItem } from '@patternfly/react-core'; +import { WarningTriangleIcon } from '@patternfly/react-icons'; +import { InferenceServiceKind } from '~/k8sTypes'; +import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas'; +import { isModelMesh } from '~/pages/modelServing/utils'; +import MetricsPageToolbar from '~/concepts/metrics/MetricsPageToolbar'; +import NimMetrics from './NimMetrics'; + +type NIMTabProps = { + model: InferenceServiceKind; +}; + +const NIMTab: React.FC = ({ model }) => { + const modelMesh = isModelMesh(model); + const NIMMetricsEnabled = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; + + if (!modelMesh && !NIMMetricsEnabled) { + return ( + + + + + + ); + } + + return ( + + + + + + + + + ); +}; + +export default NIMTab; diff --git a/frontend/src/pages/modelServing/screens/metrics/types.ts b/frontend/src/pages/modelServing/screens/metrics/types.ts index 60e9a2c1d9..c543c21dd3 100644 --- a/frontend/src/pages/modelServing/screens/metrics/types.ts +++ b/frontend/src/pages/modelServing/screens/metrics/types.ts @@ -14,6 +14,7 @@ type MetricChartLineBase = { }; export type NamedMetricChartLine = MetricChartLineBase & { name: string; + color?: string; // Add customColor as an optional property }; export type UnnamedMetricChartLine = MetricChartLineBase & { /** Assumes chart title */ @@ -50,11 +51,13 @@ export type DomainCalculator = ( export enum MetricsChartTypes { AREA, LINE, + DONUT, } export enum MetricsTabKeys { PERFORMANCE = 'performance', BIAS = 'bias', + NIM = 'nim', } export type BiasChartConfig = { diff --git a/frontend/src/pages/modelServing/screens/metrics/useMetricsPageEnabledTabs.ts b/frontend/src/pages/modelServing/screens/metrics/useMetricsPageEnabledTabs.ts index 6951ded38e..2c128fc628 100644 --- a/frontend/src/pages/modelServing/screens/metrics/useMetricsPageEnabledTabs.ts +++ b/frontend/src/pages/modelServing/screens/metrics/useMetricsPageEnabledTabs.ts @@ -3,16 +3,23 @@ import { MetricsTabKeys } from './types'; const useMetricsPageEnabledTabs = (): MetricsTabKeys[] => { const enabledTabs: MetricsTabKeys[] = []; + //check availability of Bias metrics const biasMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.BIAS_METRICS).status; + //check availability of Performance metrics const performanceMetricsAreaAvailable = useIsAreaAvailable( SupportedArea.PERFORMANCE_METRICS, ).status; + //check availability of NIM metrics + const nimMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; if (performanceMetricsAreaAvailable) { enabledTabs.push(MetricsTabKeys.PERFORMANCE); } if (biasMetricsAreaAvailable) { enabledTabs.push(MetricsTabKeys.BIAS); } + if (nimMetricsAreaAvailable) { + enabledTabs.push(MetricsTabKeys.NIM); + } return enabledTabs; }; diff --git a/frontend/src/pages/modelServing/useModelMetricsEnabled.ts b/frontend/src/pages/modelServing/useModelMetricsEnabled.ts index 7322e4b41d..5834e92119 100644 --- a/frontend/src/pages/modelServing/useModelMetricsEnabled.ts +++ b/frontend/src/pages/modelServing/useModelMetricsEnabled.ts @@ -6,8 +6,10 @@ const useModelMetricsEnabled = (): [modelMetricsEnabled: boolean] => { ).status; const biasMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.BIAS_METRICS).status; + const nimMetricsAreaAvailable = useIsAreaAvailable(SupportedArea.NIM_MODEL).status; + const checkModelMetricsEnabled = () => - performanceMetricsAreaAvailable || biasMetricsAreaAvailable; + performanceMetricsAreaAvailable || biasMetricsAreaAvailable || nimMetricsAreaAvailable; return [checkModelMetricsEnabled()]; }; diff --git a/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx b/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx index e2f7fc7d63..97fc8692e6 100644 --- a/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx +++ b/frontend/src/pages/projects/screens/detail/overview/serverModels/deployedModels/DeployedModelCard.tsx @@ -23,8 +23,6 @@ import InferenceServiceEndpoint from '~/pages/modelServing/screens/global/Infere import TypeBorderedCard from '~/concepts/design/TypeBorderedCard'; import { SupportedArea, useIsAreaAvailable } from '~/concepts/areas/'; import { getDisplayNameFromK8sResource } from '~/concepts/k8s/utils'; -import { ProjectDetailsContext } from '~/pages/projects/ProjectDetailsContext'; -import { isProjectNIMSupported } from '~/pages/modelServing/screens/projects/nimUtils'; interface DeployedModelCardProps { inferenceService: InferenceServiceKind; @@ -38,11 +36,7 @@ const DeployedModelCard: React.FC = ({ const navigate = useNavigate(); const kserveMetricsEnabled = useIsAreaAvailable(SupportedArea.K_SERVE_METRICS).status; const modelMesh = isModelMesh(inferenceService); - const { currentProject } = React.useContext(ProjectDetailsContext); - const isKServeNIMEnabled = isProjectNIMSupported(currentProject); - - const modelMetricsSupported = - modelMetricsEnabled && (modelMesh || kserveMetricsEnabled) && !isKServeNIMEnabled; + const modelMetricsSupported = modelMetricsEnabled && (modelMesh || kserveMetricsEnabled); const inferenceServiceDisplayName = getDisplayNameFromK8sResource(inferenceService);