Skip to content

Commit

Permalink
Adding gauge for minion subtasks in unknown state (#14693)
Browse files Browse the repository at this point in the history
  • Loading branch information
soumitra-st authored Dec 21, 2024
1 parent 0e9c30a commit 1ed25c0
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ rules:
tableType: "$6"
partition: "$7"
# Gauges that accept the controller taskType
- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<type=\"ControllerMetrics\", name=\"pinot\\.controller\\.(numMinionTasksInProgress|numMinionSubtasksRunning|numMinionSubtasksWaiting|numMinionSubtasksError|percentMinionSubtasksInQueue|percentMinionSubtasksInError)\\.(\\w+)\"><>(\\w+)"
- pattern: "\"org\\.apache\\.pinot\\.common\\.metrics\"<type=\"ControllerMetrics\", name=\"pinot\\.controller\\.(numMinionTasksInProgress|numMinionSubtasksRunning|numMinionSubtasksWaiting|numMinionSubtasksError|numMinionSubtasksUnknown|percentMinionSubtasksInQueue|percentMinionSubtasksInError)\\.(\\w+)\"><>(\\w+)"
name: "pinot_controller_$1_$3"
cache: true
labels:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public enum ControllerGauge implements AbstractMetrics.Gauge {
NUM_MINION_SUBTASKS_WAITING("NumMinionSubtasksWaiting", true),
NUM_MINION_SUBTASKS_RUNNING("NumMinionSubtasksRunning", true),
NUM_MINION_SUBTASKS_ERROR("NumMinionSubtasksError", true),
NUM_MINION_SUBTASKS_UNKNOWN("NumMinionSubtasksUnknown", true),
PERCENT_MINION_SUBTASKS_IN_QUEUE("PercentMinionSubtasksInQueue", true),
PERCENT_MINION_SUBTASKS_IN_ERROR("PercentMinionSubtasksInError", true),
TIER_BACKEND_TABLE_COUNT("TierBackendTableCount", true),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public abstract class ControllerPrometheusMetricsTest extends PinotPrometheusMet
private static final List<ControllerGauge> GLOBAL_GAUGES_ACCEPTING_TASKTYPE =
List.of(ControllerGauge.NUM_MINION_TASKS_IN_PROGRESS, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING,
ControllerGauge.NUM_MINION_SUBTASKS_WAITING, ControllerGauge.NUM_MINION_SUBTASKS_ERROR,
ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN,
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR);

//local gauges that accept partition
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ protected final void runTask(Properties periodicTaskProperties) {
taskTypeAccumulatedCount.getWaiting());
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_ERROR, taskType,
taskTypeAccumulatedCount.getError());
_controllerMetrics.setValueOfGlobalGauge(ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN, taskType,
taskTypeAccumulatedCount.getUnknown());
int total = taskTypeAccumulatedCount.getTotal();
int percent = total != 0
? (taskTypeAccumulatedCount.getWaiting() + taskTypeAccumulatedCount.getRunning()) * 100 / total : 0;
Expand All @@ -129,6 +131,8 @@ protected final void runTask(Properties periodicTaskProperties) {
ControllerGauge.NUM_MINION_SUBTASKS_WAITING, taskCount.getWaiting());
_controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
ControllerGauge.NUM_MINION_SUBTASKS_ERROR, taskCount.getError());
_controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN, taskCount.getUnknown());
int tableTotal = taskCount.getTotal();
int tablePercent = tableTotal != 0 ? (taskCount.getWaiting() + taskCount.getRunning()) * 100 / tableTotal : 0;
_controllerMetrics.setOrUpdateTableGauge(tableNameWithType, taskType,
Expand Down Expand Up @@ -163,6 +167,7 @@ protected final void runTask(Properties periodicTaskProperties) {
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_WAITING);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE);
_controllerMetrics.removeGlobalGauge(taskType, ControllerGauge.PERCENT_MINION_SUBTASKS_IN_ERROR);
// remove table task type level gauges
Expand Down Expand Up @@ -192,6 +197,7 @@ private void removeTableTaskTypeMetrics(Set<String> tableNameWithTypeSet, String
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_RUNNING);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_WAITING);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_ERROR);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType, ControllerGauge.NUM_MINION_SUBTASKS_UNKNOWN);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType,
ControllerGauge.PERCENT_MINION_SUBTASKS_IN_QUEUE);
_controllerMetrics.removeTableGauge(tableNameWithType, taskType,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public void taskType1ButNoInProgressTask() {
Mockito.when(_pinotHelixTaskResourceManager.getTasksInProgress(taskType)).thenReturn(ImmutableSet.of());
_taskMetricsEmitter.runTask(null);

Assert.assertEquals(metricsRegistry.allMetrics().size(), 7);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 8);
Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));
Assert.assertEquals(((YammerSettableGauge<?>) metricsRegistry.allMetrics().get(
Expand Down Expand Up @@ -144,7 +144,7 @@ public void taskType1WithTwoTablesEmitMetricTwice() {
private void runAndAssertForTaskType1WithTwoTables() {
PinotMetricsRegistry metricsRegistry = _controllerMetrics.getMetricsRegistry();
_taskMetricsEmitter.runTask(null);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 17);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 20);

Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));
Expand Down Expand Up @@ -231,7 +231,7 @@ private void oneTaskTypeWithOneTable(String taskType, String taskName1, String t

PinotMetricsRegistry metricsRegistry = _controllerMetrics.getMetricsRegistry();
_taskMetricsEmitter.runTask(null);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 12);
Assert.assertEquals(metricsRegistry.allMetrics().size(), 14);

Assert.assertTrue(metricsRegistry.allMetrics().containsKey(
new YammerMetricName(ControllerMetrics.class, "pinot.controller.onlineMinionInstances")));
Expand Down

0 comments on commit 1ed25c0

Please sign in to comment.