From c53e2c0aa5787662551bd808a8b449c737999624 Mon Sep 17 00:00:00 2001 From: Nathan Wolfe Date: Fri, 20 Dec 2024 18:43:16 +0000 Subject: [PATCH] Increase tolerance for 'ip-proto' hash_key on t2 topos 'ip-proto' hash_key only has 8-bits of entropy which won't result in good distributions on systems with large ECMP groups --- .../test/files/ptftests/py3/hash_test.py | 26 ++++++++++++------- tests/fib/test_fib.py | 15 +++++++---- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/ansible/roles/test/files/ptftests/py3/hash_test.py b/ansible/roles/test/files/ptftests/py3/hash_test.py index a4d5946ded9..90a9be9ca62 100644 --- a/ansible/roles/test/files/ptftests/py3/hash_test.py +++ b/ansible/roles/test/files/ptftests/py3/hash_test.py @@ -41,6 +41,7 @@ class HashTest(BaseTest): # Class variables # --------------------------------------------------------------------- DEFAULT_BALANCING_RANGE = 0.25 + RELAXED_BALANCING_RANGE = 0.80 BALANCING_TEST_TIMES = 250 DEFAULT_SWITCH_TYPE = 'voq' @@ -109,6 +110,8 @@ def setUp(self): self.ipver = self.test_params.get('ipver', 'ipv4') self.is_active_active_dualtor = self.test_params.get("is_active_active_dualtor", False) + self.topo_name = self.test_params.get('topo_name', '') + # set the base mac here to make it persistent across calls of check_ip_route self.base_mac = self.dataplane.get_mac( *random.choice(list(self.dataplane.ports.keys()))) @@ -223,7 +226,7 @@ def check_hash(self, hash_key): hash_key, hit_count_map)) for next_hop in next_hops: - self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port) + self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port, hash_key) def check_ip_route(self, hash_key, src_port, dst_ip, dst_port_lists): if ip_network(six.text_type(dst_ip)).version == 4: @@ -457,7 +460,7 @@ def check_ipv6_route(self, hash_key, src_port, dst_port_lists): format(ip_src, ip_dst, src_port, rcvd_port, exp_src_mac, actual_src_mac)) return (rcvd_port, rcvd_pkt) - def check_within_expected_range(self, actual, expected): + def check_within_expected_range(self, actual, expected, hash_key): ''' @summary: Check if the actual number is within the accepted range of the expected number @param actual : acutal number of recieved packets @@ -465,7 +468,12 @@ def check_within_expected_range(self, actual, expected): @return (percentage, bool) ''' percentage = (actual - expected) / float(expected) - return (percentage, abs(percentage) <= self.balancing_range) + balancing_range = self.balancing_range + if hash_key == 'ip-proto' and self.topo_name == 't2': + # ip-protocol only has 8-bits of entropy which results in poor hashing distributions on topologies with + # a large number of ecmp paths so relax the hashing requirements + balancing_range = self.RELAXED_BALANCING_RANGE + return (percentage, abs(percentage) <= balancing_range) def check_same_asic(self, src_port, exp_port_list): updated_exp_port_list = list() @@ -494,7 +502,7 @@ def check_same_asic(self, src_port, exp_port_list): exp_port_list = updated_exp_port_list return exp_port_list - def check_balancing(self, dest_port_list, port_hit_cnt, src_port): + def check_balancing(self, dest_port_list, port_hit_cnt, src_port, hash_key): ''' @summary: Check if the traffic is balanced across the ECMP groups and the LAG members @param dest_port_list : a list of ECMP entries and in each ECMP entry a list of ports @@ -543,7 +551,7 @@ def check_balancing(self, dest_port_list, port_hit_cnt, src_port): for member in ecmp_entry: total_entry_hit_cnt += port_hit_cnt.get(member, 0) (p, r) = self.check_within_expected_range( - total_entry_hit_cnt, float(total_hit_cnt)/len(asic_member)) + total_entry_hit_cnt, float(total_hit_cnt)/len(asic_member), hash_key) logging.info("%-10s \t %-10s \t %10d \t %10d \t %10s" % ("ECMP", str(ecmp_entry), total_hit_cnt//len(asic_member), total_entry_hit_cnt, str(round(p, 4)*100) + '%')) @@ -552,7 +560,7 @@ def check_balancing(self, dest_port_list, port_hit_cnt, src_port): continue for member in ecmp_entry: (p, r) = self.check_within_expected_range(port_hit_cnt.get( - member, 0), float(total_entry_hit_cnt)/len(ecmp_entry)) + member, 0), float(total_entry_hit_cnt)/len(ecmp_entry), hash_key) logging.info("%-10s \t %-10s \t %10d \t %10d \t %10s" % ("LAG", str(member), total_entry_hit_cnt//len(ecmp_entry), port_hit_cnt.get(member, 0), str(round(p, 4)*100) + '%')) @@ -846,7 +854,7 @@ def check_hash(self, hash_key): hash_key, hit_count_map)) for next_hop in next_hops: - self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port) + self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port, hash_key) def runTest(self): """ @@ -1133,7 +1141,7 @@ def check_hash(self, hash_key): hash_key, hit_count_map)) for next_hop in next_hops: - self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port) + self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port, hash_key) def runTest(self): """ @@ -1450,7 +1458,7 @@ def check_hash(self, hash_key): hash_key, hit_count_map)) for next_hop in next_hops: - self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port) + self.check_balancing(next_hop.get_next_hop(), hit_count_map, src_port, hash_key) def runTest(self): """ diff --git a/tests/fib/test_fib.py b/tests/fib/test_fib.py index 0a4e5fb4bf1..cafd0f5f746 100644 --- a/tests/fib/test_fib.py +++ b/tests/fib/test_fib.py @@ -350,7 +350,8 @@ def test_hash(add_default_route_to_dut, duthosts, fib_info_files_per_function, s "ignore_ttl": ignore_ttl, "single_fib_for_duts": single_fib_for_duts, "switch_type": switch_type, - "is_active_active_dualtor": is_active_active_dualtor + "is_active_active_dualtor": is_active_active_dualtor, + "topo_name": updated_tbinfo['topo']['name'] }, log_file=log_file, qlen=PTF_QLEN, @@ -392,7 +393,8 @@ def test_ipinip_hash(add_default_route_to_dut, duthost, duthosts, fib_info_files "vlan_ids": VLANIDS, "ignore_ttl": ignore_ttl, "single_fib_for_duts": single_fib_for_duts, - "ipver": ipver + "ipver": ipver, + "topo_name": tbinfo['topo']['name'] }, log_file=log_file, qlen=PTF_QLEN, @@ -433,7 +435,8 @@ def test_ipinip_hash_negative(add_default_route_to_dut, duthosts, fib_info_files "vlan_ids": VLANIDS, "ignore_ttl": ignore_ttl, "single_fib_for_duts": single_fib_for_duts, - "ipver": ipver + "ipver": ipver, + "topo_name": tbinfo['topo']['name'] }, log_file=log_file, qlen=PTF_QLEN, @@ -480,7 +483,8 @@ def test_vxlan_hash(add_default_route_to_dut, duthost, duthosts, fib_info_files_ "vlan_ids": VLANIDS, "ignore_ttl": ignore_ttl, "single_fib_for_duts": single_fib_for_duts, - "ipver": vxlan_ipver + "ipver": vxlan_ipver, + "topo_name": tbinfo['topo']['name'] }, log_file=log_file, qlen=PTF_QLEN, @@ -530,7 +534,8 @@ def test_nvgre_hash(add_default_route_to_dut, duthost, duthosts, fib_info_files_ "vlan_ids": VLANIDS, "ignore_ttl": ignore_ttl, "single_fib_for_duts": single_fib_for_duts, - "ipver": nvgre_ipver + "ipver": nvgre_ipver, + "topo_name": tbinfo['topo']['name'] }, log_file=log_file, qlen=PTF_QLEN,