diff --git a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml index 908dd95f659b..2281a9ea41e0 100644 --- a/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml +++ b/docker/images/pinot/etc/jmx_prometheus_javaagent/configs/controller.yml @@ -17,7 +17,7 @@ rules: tableType: "$6" partition: "$7" # Gauges that accept the controller taskType -- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" +- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<>(\\w+)" name: "pinot_controller_$1_$3" cache: true labels: diff --git a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java index cdb99f0f904d..a978219343ec 100644 --- a/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java +++ b/pinot-common/src/main/java/org/apache/pinot/common/metrics/ControllerGauge.java @@ -68,6 +68,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge { NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true), NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true), NUM_MINION_SUBTASKS_ERROR("NumMinionSubtasksError", true), + NUM_MINION_SUBTASKS_UNKNOWN("NumMinionSubtasksUnknown", true), PERCENT_MINION_SUBTASKS_IN_QUEUE("PercentMinionSubtasksInQueue", true), PERCENT_MINION_SUBTASKS_IN_ERROR("PercentMinionSubtasksInError", true), TIER_BACKEND_TABLE_COUNT("TierBackendTableCount", true), diff --git a/pinot-common/src/test/java/org/apache/pinot/common/metrics/prometheus/ControllerPrometheusMetricsTest.java b/pinot-common/src/test/java/org/apache/pinot/common/metrics/prometheus/ControllerPrometheusMetricsTest.java index 21645d201cbb..1f458a444829 100644 --- a/pinot-common/src/test/java/org/apache/pinot/common/metrics/prometheus/ControllerPrometheusMetricsTest.java +++ b/pinot-common/src/test/java/org/apache/pinot/common/metrics/prometheus/ControllerPrometheusMetricsTest.java @@ -40,6 +40,7 @@ public abstract class ControllerPrometheusMetricsTest extends PinotPrometheusMet private static final List GLOBAL_GAUGES_ACCEPTING_TASKTYPE = List.of(ControllerGauge.NUM_MINION_TASKS_IN_PROGRESS, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING, ControllerGauge.NUM_MINION_SUBTASKS_WAITING, ControllerGauge.NUM_MINION_SUBTASKS_ERROR, + ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR); //local gauges that accept partition diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java index 48876dcb30c1..ace369448596 100644 --- a/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java +++ b/pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitter.java @@ -114,6 +114,8 @@ protected final void runTask(Properties periodicTaskProperties) { taskTypeAccumulatedCount.getWaiting()); _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_ERROR, taskType, taskTypeAccumulatedCount.getError()); + _controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN, taskType, + taskTypeAccumulatedCount.getUnknown()); int total = taskTypeAccumulatedCount.getTotal(); int percent = total != 0 ? (taskTypeAccumulatedCount.getWaiting() + taskTypeAccumulatedCount.getRunning()) * 100 / total : 0; @@ -129,6 +131,8 @@ protected final void runTask(Properties periodicTaskProperties) { ControllerGauge.NUM_MINION_SUBTASKS_WAITING, taskCount.getWaiting()); _controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR, taskCount.getError()); + _controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType, + ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN, taskCount.getUnknown()); int tableTotal = taskCount.getTotal(); int tablePercent = tableTotal != 0 ? (taskCount.getWaiting() + taskCount.getRunning()) * 100 / tableTotal : 0; _controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType, @@ -163,6 +167,7 @@ protected final void runTask(Properties periodicTaskProperties) { _controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING); _controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_WAITING); _controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR); + _controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN); _controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE); _controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR); // remove table task type level gauges @@ -192,6 +197,7 @@ private void removeTableTaskTypeMetrics(Set tableNameWithTypeSet, String _controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING); _controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_WAITING); _controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR); + _controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN); _controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE); _controllerMetrics.removeTableGauge(tableNameWithType, taskType, diff --git a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java index 6fcb708c7177..bd88f2731cef 100644 --- a/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java +++ b/pinot-controller/src/test/java/org/apache/pinot/controller/helix/core/minion/TaskMetricsEmitterTest.java @@ -84,7 +84,7 @@ public void taskType1ButNoInProgressTask() { Mockito.when(_pinotHelixTaskResourceManager.getTasksInProgress(taskType)).thenReturn(ImmutableSet.of()); _taskMetricsEmitter.runTask(null); - Assert.assertEquals(metricsRegistry.allMetrics().size(), 7); + Assert.assertEquals(metricsRegistry.allMetrics().size(), 8); Assert.assertTrue(metricsRegistry.allMetrics().containsKey( new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances"))); Assert.assertEquals(((YammerSettableGauge) metricsRegistry.allMetrics().get( @@ -144,7 +144,7 @@ public void taskType1WithTwoTablesEmitMetricTwice() { private void runAndAssertForTaskType1WithTwoTables() { PinotMetricsRegistry metricsRegistry = _controllerMetrics.getMetricsRegistry(); _taskMetricsEmitter.runTask(null); - Assert.assertEquals(metricsRegistry.allMetrics().size(), 17); + Assert.assertEquals(metricsRegistry.allMetrics().size(), 20); Assert.assertTrue(metricsRegistry.allMetrics().containsKey( new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances"))); @@ -231,7 +231,7 @@ private void oneTaskTypeWithOneTable(String taskType, String taskName1, String t PinotMetricsRegistry metricsRegistry = _controllerMetrics.getMetricsRegistry(); _taskMetricsEmitter.runTask(null); - Assert.assertEquals(metricsRegistry.allMetrics().size(), 12); + Assert.assertEquals(metricsRegistry.allMetrics().size(), 14); Assert.assertTrue(metricsRegistry.allMetrics().containsKey( new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));