From f4ae292a457638d3226fb0491e5186fa52ae8518 Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Thu, 12 Oct 2023 16:43:21 -0400 Subject: [PATCH] NIFI-12221: This closes #7876. Be more lenient about which Disconnection Codes we allow a node to be reconnected to a cluster vs. when we notify the node to disconnect again. Also updated the timeout for OffloadIT because it occasionally times ou out while running properly. Signed-off-by: Joseph Witt --- .../heartbeat/AbstractHeartbeatMonitor.java | 12 +++++++----- .../nifi/tests/system/clustering/OffloadIT.java | 5 +++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-cluster/src/main/java/org/apache/nifi/cluster/coordination/heartbeat/AbstractHeartbeatMonitor.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-cluster/src/main/java/org/apache/nifi/cluster/coordination/heartbeat/AbstractHeartbeatMonitor.java index c8a77ed2f044..549b0308040a 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-cluster/src/main/java/org/apache/nifi/cluster/coordination/heartbeat/AbstractHeartbeatMonitor.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-cluster/src/main/java/org/apache/nifi/cluster/coordination/heartbeat/AbstractHeartbeatMonitor.java @@ -254,20 +254,22 @@ private void processHeartbeat(final NodeHeartbeat heartbeat) { case LACK_OF_HEARTBEAT: case UNABLE_TO_COMMUNICATE: case NOT_YET_CONNECTED: - case STARTUP_FAILURE: { + case MISMATCHED_FLOWS: + case MISSING_BUNDLE: + case NODE_SHUTDOWN: + case FAILED_TO_SERVICE_REQUEST: + case STARTUP_FAILURE: clusterCoordinator.reportEvent(nodeId, Severity.INFO, "Received heartbeat from node previously " + "disconnected due to " + disconnectionCode + ". Issuing reconnection request."); clusterCoordinator.requestNodeConnect(nodeId, null); break; - } - default: { + default: // disconnected nodes should not heartbeat, so we need to issue a disconnection request. - logger.info("Ignoring received heartbeat from disconnected node " + nodeId + ". Issuing disconnection request."); + logger.info("Ignoring received heartbeat from disconnected node {}. Node was disconnected due to [{}]. Issuing disconnection request.", nodeId, disconnectionCode); clusterCoordinator.requestNodeDisconnect(nodeId, disconnectionCode, connectionStatus.getReason()); removeHeartbeat(nodeId); break; - } } return; diff --git a/nifi-system-tests/nifi-system-test-suite/src/test/java/org/apache/nifi/tests/system/clustering/OffloadIT.java b/nifi-system-tests/nifi-system-test-suite/src/test/java/org/apache/nifi/tests/system/clustering/OffloadIT.java index 818bd0cb23e7..f2e2266d9dbb 100644 --- a/nifi-system-tests/nifi-system-test-suite/src/test/java/org/apache/nifi/tests/system/clustering/OffloadIT.java +++ b/nifi-system-tests/nifi-system-test-suite/src/test/java/org/apache/nifi/tests/system/clustering/OffloadIT.java @@ -25,11 +25,13 @@ import org.apache.nifi.web.api.entity.ConnectionEntity; import org.apache.nifi.web.api.entity.ProcessorEntity; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.Timeout; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Collections; +import java.util.concurrent.TimeUnit; public class OffloadIT extends NiFiSystemIT { private static final Logger logger = LoggerFactory.getLogger(OffloadIT.class); @@ -40,6 +42,9 @@ public NiFiInstanceFactory getInstanceFactory() { } @Test + @Timeout(value = 10, unit = TimeUnit.MINUTES) + // Test to ensure that node can be offloaded, reconnected, offloaded several times. This test typically takes only about 1-2 minutes + // but can occasionally take 5-6 minutes on Github Actions so we set the timeout to 10 minutes to allow for these occasions public void testOffload() throws InterruptedException, IOException, NiFiClientException { for (int i=0; i < 5; i++) { logger.info("Running iteration {}", i);