From f64dce7db6c5460623b77ca2fbc7d296cb49736a Mon Sep 17 00:00:00 2001 From: Zhijian Li Date: Thu, 23 Jan 2025 20:14:10 +0800 Subject: [PATCH] [sanity-check] Add IPv4 MGMT reachability check (#16645) Summary: Recently, we observed some DUT lost IPv4 MGMT reachability. However, since it's still IPv6 MGMT reachable, the issue is hard to be noticed. To catch such issue, this PR introduces IPv4 MGMT sanity check. If the device has IPv4 MGMT IP assigned but it's unreachable, then sanity_check will fail the testcase. What is the motivation for this PR? Recently, we observed some DUT lost IPv4 MGMT reachability. However, since it's still IPv6 MGMT reachable, the issue is hard to be noticed. How did you do it? To catch such issue, this PR introduces IPv4 MGMT sanity check. If the device has IPv4 MGMT IP assigned but it's unreachable, then sanity_check will fail the testcase. How did you verify/test it? Verified by run test_bgp_fact with sanity_check. --- tests/common/plugins/sanity_check/checks.py | 37 +++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/common/plugins/sanity_check/checks.py b/tests/common/plugins/sanity_check/checks.py index 83299323bc9..5b69fc0f9ae 100644 --- a/tests/common/plugins/sanity_check/checks.py +++ b/tests/common/plugins/sanity_check/checks.py @@ -35,6 +35,7 @@ 'check_monit', 'check_secureboot', 'check_neighbor_macsec_empty', + 'check_ipv4_mgmt', 'check_ipv6_mgmt', 'check_mux_simulator', 'check_orchagent_usage', @@ -1036,6 +1037,42 @@ def _check(*args, **kwargs): return _check +# check ipv4 neighbor reachability +@pytest.fixture(scope="module") +def check_ipv4_mgmt(duthosts, localhost): + def _check(*args, **kwargs): + init_result = {"failed": False, "check_item": "ipv4_mgmt"} + result = parallel_run(_check_ipv4_mgmt_to_dut, args, kwargs, duthosts, timeout=30, init_result=init_result) + return list(result.values()) + + def _check_ipv4_mgmt_to_dut(*args, **kwargs): + dut = kwargs['node'] + results = kwargs['results'] + + logger.info("Checking ipv4 mgmt interface reachability on %s..." % dut.hostname) + check_result = {"failed": False, "check_item": "ipv4_mgmt", "host": dut.hostname} + + if dut.mgmt_ip is None or dut.mgmt_ip == "": + logger.info("%s doesn't have ipv4 mgmt configured. Skip the ipv4 mgmt reachability check." % dut.hostname) + results[dut.hostname] = check_result + return + + # most of the testbed should reply within 10 ms, Set the timeout to 2 seconds to reduce the impact of delay. + try: + shell_result = localhost.shell("ping -c 2 -W 2 " + dut.mgmt_ip) + logging.info("ping output: %s" % shell_result["stdout"]) + except RunAnsibleModuleFail as e: + check_result["failed"] = True + logging.info("Failed to ping ipv4 mgmt interface on %s, exception: %s" % (dut.hostname, repr(e))) + except Exception as e: + check_result["failed"] = True + logger.info("Exception while checking ipv4_mgmt reachability for %s: %s" % (dut.hostname, repr(e))) + finally: + logger.info("Done checking ipv4 management reachability on %s" % dut.hostname) + results[dut.hostname] = check_result + return _check + + # check ipv6 neighbor reachability @pytest.fixture(scope="module") def check_ipv6_mgmt(duthosts, localhost):