Skip to content

Commit

Permalink
NIFI-12221: This closes #7876. Be more lenient about which Disconnect…
Browse files Browse the repository at this point in the history
…ion Codes we allow a node to be reconnected to a cluster vs. when we notify the node to disconnect again. Also updated the timeout for OffloadIT because it occasionally times ou out while running properly.

Signed-off-by: Joseph Witt <joewitt@apache.org>
  • Loading branch information
markap14 authored and joewitt committed Oct 13, 2023
1 parent 0eabbcd commit f4ae292
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -254,20 +254,22 @@ private void processHeartbeat(final NodeHeartbeat heartbeat) {
case LACK_OF_HEARTBEAT:
case UNABLE_TO_COMMUNICATE:
case NOT_YET_CONNECTED:
case STARTUP_FAILURE: {
case MISMATCHED_FLOWS:
case MISSING_BUNDLE:
case NODE_SHUTDOWN:
case FAILED_TO_SERVICE_REQUEST:
case STARTUP_FAILURE:
clusterCoordinator.reportEvent(nodeId, Severity.INFO, "Received heartbeat from node previously "
+ "disconnected due to " + disconnectionCode + ". Issuing reconnection request.");

clusterCoordinator.requestNodeConnect(nodeId, null);
break;
}
default: {
default:
// disconnected nodes should not heartbeat, so we need to issue a disconnection request.
logger.info("Ignoring received heartbeat from disconnected node " + nodeId + ". Issuing disconnection request.");
logger.info("Ignoring received heartbeat from disconnected node {}. Node was disconnected due to [{}]. Issuing disconnection request.", nodeId, disconnectionCode);
clusterCoordinator.requestNodeDisconnect(nodeId, disconnectionCode, connectionStatus.getReason());
removeHeartbeat(nodeId);
break;
}
}

return;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,13 @@
import org.apache.nifi.web.api.entity.ConnectionEntity;
import org.apache.nifi.web.api.entity.ProcessorEntity;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.Timeout;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Collections;
import java.util.concurrent.TimeUnit;

public class OffloadIT extends NiFiSystemIT {
private static final Logger logger = LoggerFactory.getLogger(OffloadIT.class);
Expand All @@ -40,6 +42,9 @@ public NiFiInstanceFactory getInstanceFactory() {
}

@Test
@Timeout(value = 10, unit = TimeUnit.MINUTES)
// Test to ensure that node can be offloaded, reconnected, offloaded several times. This test typically takes only about 1-2 minutes
// but can occasionally take 5-6 minutes on Github Actions so we set the timeout to 10 minutes to allow for these occasions
public void testOffload() throws InterruptedException, IOException, NiFiClientException {
for (int i=0; i < 5; i++) {
logger.info("Running iteration {}", i);
Expand Down

0 comments on commit f4ae292

Please sign in to comment.