From 61a6ea4adcc43ca6d381fbda5a487ff9f4d53291 Mon Sep 17 00:00:00 2001 From: Chenyang Wang <49756587+cyw233@users.noreply.github.com> Date: Fri, 17 Jan 2025 09:54:54 +1100 Subject: [PATCH] feat: add wait_for_bgp option to reboot (#16542) Description of PR The reboot process for chassis will need longer time to wait for the BGP to be established. For example, the acl/test_acl.py can be flaky if we are not waiting BGP long enough. Therefore, we are introducing wait_for_bgp option to the reboot() function. Summary: Fixes # (issue) Microsoft ADO 30862178 Type of change Bug fix Testbed and Framework(new/improvement) New Test case Skipped for non-supported platforms Test case improvement Back port request 202012 202205 202305 202311 202405 202411 Approach What is the motivation for this PR? We found that tests like acl/test_acl.py can be flaky if we are not waiting for BGP long enough on chassis after reboot. Therefore, we want to mimic what we have in config_reload() to also introduce the wait_for_bgp option to the reboot() function. How did you do it? How did you verify/test it? I ran the updated acl test code and can make sure it's working well. co-authorized by: jianquanye@microsoft.com --- tests/acl/test_acl.py | 6 +----- tests/common/reboot.py | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tests/acl/test_acl.py b/tests/acl/test_acl.py index 39e1e6783f3..f69e28e9c21 100644 --- a/tests/acl/test_acl.py +++ b/tests/acl/test_acl.py @@ -1314,8 +1314,7 @@ def post_setup_hook(self, dut, localhost, populate_vlan_arp_entries, tbinfo, con """ dut.command("config save -y") - up_bgp_neighbors = dut.get_bgp_neighbors_per_asic("established") - reboot(dut, localhost, safe_reboot=True, check_intf_up_ports=True) + reboot(dut, localhost, safe_reboot=True, check_intf_up_ports=True, wait_for_bgp=True) # We need some additional delay on e1031 if dut.facts["platform"] == "x86_64-cel_e1031-r0": time.sleep(240) @@ -1331,9 +1330,6 @@ def post_setup_hook(self, dut, localhost, populate_vlan_arp_entries, tbinfo, con assert result, "Not all transceivers are detected or interfaces are up in {} seconds".format( MAX_WAIT_TIME_FOR_INTERFACES) - pytest_assert( - wait_until(300, 10, 0, dut.check_bgp_session_state_all_asics, up_bgp_neighbors, "established"), - "All BGP sessions are not up after reboot, no point in continuing the test") # Delay 10 seconds for route convergence time.sleep(10) diff --git a/tests/common/reboot.py b/tests/common/reboot.py index aabc9bf4280..04561a53174 100644 --- a/tests/common/reboot.py +++ b/tests/common/reboot.py @@ -223,7 +223,7 @@ def execute_reboot_helper(): def reboot(duthost, localhost, reboot_type='cold', delay=10, timeout=0, wait=0, wait_for_ssh=True, wait_warmboot_finalizer=False, warmboot_finalizer_timeout=0, reboot_helper=None, reboot_kwargs=None, plt_reboot_ctrl_overwrite=True, - safe_reboot=False, check_intf_up_ports=False): + safe_reboot=False, check_intf_up_ports=False, wait_for_bgp=False): """ reboots DUT :param duthost: DUT host object @@ -233,11 +233,14 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, :param timeout: timeout for waiting ssh port state change :param wait: time to wait for DUT to initialize :param wait_for_ssh: Wait for SSH startup - :param wait_warmboot_finalizer=True: Wait for WARMBOOT_FINALIZER done + :param wait_warmboot_finalizer: Wait for WARMBOOT_FINALIZER done + :param warmboot_finalizer_timeout: Timeout for waiting WARMBOOT_FINALIZER :param reboot_helper: helper function to execute the power toggling :param reboot_kwargs: arguments to pass to the reboot_helper + :param plt_reboot_ctrl_overwrite: arguments to overwrite plt reboot control :param safe_reboot: arguments to wait DUT ready after reboot :param check_intf_up_ports: arguments to check interface after reboot + :param wait_for_bgp: arguments to wait for BGP after reboot :return: """ pool = ThreadPool() @@ -325,6 +328,13 @@ def reboot(duthost, localhost, reboot_type='cold', delay=10, assert float(dut_uptime.strftime("%s")) > float(dut_datetime.strftime("%s")), "Device {} did not reboot". \ format(hostname) + if wait_for_bgp: + bgp_neighbors = duthost.get_bgp_neighbors_per_asic(state="all") + pytest_assert( + wait_until(wait + 300, 10, 0, duthost.check_bgp_session_state_all_asics, bgp_neighbors), + "Not all bgp sessions are established after reboot", + ) + def positive_uptime(duthost, dut_datetime): dut_uptime = duthost.get_up_time()