Skip to content

Commit

Permalink
ECN probabilistic marking with PFC induced congestion (#15909)
Browse files Browse the repository at this point in the history
Description of PR
Summary:
Fixes # (issue)

Approach
What is the motivation for this PR?
Add IXIA based test case to demonstrate ECN marking. The test will inject XOFF frame(s) to congest a queue that results in ECN marking.

How did you do it?
Using Snappi infra

How did you verify/test it?
setup continous traffic at 99.98% line rate with 1350B packets
Test sends a single XOFF frame to the egress port to create congestion in the queue3.
A pfc frame is sent with quanta set to values between 500 and 65000, one at a time -
read the base ECN counter using serviceability CLI
inject XOFF-
read the ECN counters
At the end of the iteration, once the data is collected, ensure that for each case, with increase in quanta, the marked count at index (n+1) is >= count observed with collection at index (n). Further if the marked count at index (n) > 0, verify that once marking happens, the marking probability increases with an increase in queue occupancy caused by increase in quanta

co-authorized by: jianquanye@microsoft.com
  • Loading branch information
sreejithsreekumaran authored and mssonicbld committed Jan 3, 2025
1 parent a9a373d commit 015580e
Show file tree
Hide file tree
Showing 4 changed files with 282 additions and 7 deletions.
14 changes: 11 additions & 3 deletions tests/common/snappi_tests/common_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ def get_wred_profiles(host_ans, asic_value=None):
return None


def config_wred(host_ans, kmin, kmax, pmax, profile=None, asic_value=None):
def config_wred(host_ans, kmin, kmax, pmax, kdrop=None, profile=None, asic_value=None):
"""
Config a WRED/ECN profile of a SONiC switch
Args:
Expand All @@ -456,10 +456,11 @@ def config_wred(host_ans, kmin, kmax, pmax, profile=None, asic_value=None):
asic_type = str(host_ans.facts["asic_type"])
if not isinstance(kmin, int) or \
not isinstance(kmax, int) or \
not isinstance(pmax, int):
not isinstance(pmax, int) or \
(kdrop is not None and not isinstance(kdrop, int)):
return False

if kmin < 0 or kmax < 0 or pmax < 0 or pmax > 100 or kmin > kmax:
if kmin < 0 or kmax < 0 or pmax < 0 or pmax > 100 or kmin > kmax or (kdrop and (kdrop < 0 or kdrop > 100)):
return False
profiles = get_wred_profiles(host_ans, asic_value)
""" Cannot find any WRED/ECN profiles """
Expand All @@ -478,6 +479,7 @@ def config_wred(host_ans, kmin, kmax, pmax, profile=None, asic_value=None):

kmax_arg = '-{}max'.format(color[0])
kmin_arg = '-{}min'.format(color[0])
kdrop_arg = '-{}drop'.format(color[0])

for p in profiles:
""" This is not the profile to configure """
Expand All @@ -486,6 +488,7 @@ def config_wred(host_ans, kmin, kmax, pmax, profile=None, asic_value=None):

kmin_old = int(profiles[p]['{}_min_threshold'.format(color)])
kmax_old = int(profiles[p]['{}_max_threshold'.format(color)])
kdrop_old = int(profiles[p]['{}_drop_probability'.format(color)])

if kmin_old > kmax_old:
return False
Expand All @@ -494,10 +497,12 @@ def config_wred(host_ans, kmin, kmax, pmax, profile=None, asic_value=None):

kmax_cmd = ' '.join(['sudo ecnconfig -p {}', kmax_arg, '{}'])
kmin_cmd = ' '.join(['sudo ecnconfig -p {}', kmin_arg, '{}'])
kdrop_cmd = ' '.join(['sudo ecnconfig -p {}', kdrop_arg, '{}'])

if asic_value is not None:
kmax_cmd = ' '.join(['sudo ip netns exec', asic_value, 'ecnconfig -p {}', kmax_arg, '{}'])
kmin_cmd = ' '.join(['sudo ip netns exec', asic_value, 'ecnconfig -p {}', kmin_arg, '{}'])
kdrop_cmd = ' '.join(['sudo ip netns exec', asic_value, 'ecnconfig -p {}', kdrop_arg, '{}'])
if asic_type == 'broadcom':
disable_packet_aging(host_ans, asic_value)

Expand All @@ -508,6 +513,9 @@ def config_wred(host_ans, kmin, kmax, pmax, profile=None, asic_value=None):
host_ans.shell(kmin_cmd.format(p, kmin))
host_ans.shell(kmax_cmd.format(p, kmax))

if kdrop and kdrop != kdrop_old:
host_ans.shell(kdrop_cmd.format(p, kdrop))

return True


Expand Down
9 changes: 8 additions & 1 deletion tests/common/snappi_tests/traffic_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,10 @@ def generate_pause_flows(testbed_config,
pause_time = []
for x in range(8):
if x in pause_prio_list:
pause_time.append(int('ffff', 16))
if "flow_quanta" in pause_flow_config:
pause_time.append(pause_flow_config["flow_quanta"])
else:
pause_time.append(int('ffff', 16))
else:
pause_time.append(int('0000', 16))

Expand Down Expand Up @@ -286,6 +289,10 @@ def generate_pause_flows(testbed_config,
pause_flow.duration.fixed_seconds.seconds = pause_flow_config["flow_dur_sec"]
elif pause_flow_config["flow_traffic_type"] == traffic_flow_mode.CONTINUOUS:
pause_flow.duration.choice = pause_flow.duration.CONTINUOUS
elif pause_flow_config["flow_traffic_type"] == traffic_flow_mode.FIXED_PACKETS:
pause_flow.duration.fixed_packets.packets = pause_flow_config["flow_pkt_count"]
pause_flow.duration.fixed_packets.delay.nanoseconds = int(sec_to_nanosec
(pause_flow_config["flow_delay_sec"]))

pause_flow.metrics.enable = True
pause_flow.metrics.loss = True
Expand Down
191 changes: 188 additions & 3 deletions tests/snappi_tests/multidut/ecn/files/multidut_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging
import time
import csv
import os
from tests.common.helpers.assertions import pytest_assert
from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts # noqa: F401
from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port, \
Expand All @@ -23,16 +25,20 @@
DATA_FLOW_NAME = 'Data Flow'


def get_npu_voq_queue_counters(duthost, interface, priority):
def get_npu_voq_queue_counters(duthost, interface, priority, clear=False):

asic_namespace_string = ""
if duthost.is_multi_asic:
asic = duthost.get_port_asic_instance(interface)
asic_namespace_string = " -n " + asic.namespace

clear_cmd = ""
if clear:
clear_cmd = " -c"

full_line = "".join(duthost.shell(
"show platform npu voq queue_counters -t {} -i {} -d{}".
format(priority, interface, asic_namespace_string))['stdout_lines'])
"show platform npu voq queue_counters -t {} -i {} -d{}{}".
format(priority, interface, asic_namespace_string, clear_cmd))['stdout_lines'])
dict_output = json.loads(full_line)
for entry, value in zip(dict_output['stats_name'], dict_output['counters']):
dict_output[entry] = value
Expand Down Expand Up @@ -631,3 +637,182 @@ def run_ecn_marking_test(api,
]

verify_ecn_counters_for_flow_percent(ecn_counters, test_flow_percent)


def run_ecn_marking_with_pfc_quanta_variance(
api,
testbed_config,
port_config_list,
dut_port,
test_prio_list,
prio_dscp_map,
test_ecn_config,
log_dir=None,
snappi_extra_params=None):

pytest_assert(testbed_config is not None, 'Fail to get L2/3 testbed config')
pytest_assert(len(test_prio_list) >= 1, 'Must have atleast two lossless priorities')

DATA_FLOW_PKT_SIZE = 1350
DATA_FLOW_DURATION_SEC = 5
DATA_FLOW_DELAY_SEC = 0

if snappi_extra_params is None:
snappi_extra_params = SnappiTestParams()

# Traffic flow:
# tx_port (TGEN) --- ingress DUT --- egress DUT --- rx_port (TGEN)

rx_port = snappi_extra_params.multi_dut_params.multi_dut_ports[0]
egress_duthost = rx_port['duthost']

duthost = egress_duthost

port_id = 0
# Generate base traffic config
base_flow_config = setup_base_traffic_config(testbed_config=testbed_config,
port_config_list=port_config_list,
port_id=port_id)

snappi_extra_params.base_flow_config = base_flow_config

# Set default traffic flow configs if not set
if snappi_extra_params.traffic_flow_config.data_flow_config is None:
snappi_extra_params.traffic_flow_config.data_flow_config = {
"flow_name": DATA_FLOW_NAME,
"flow_dur_sec": DATA_FLOW_DURATION_SEC,
"flow_rate_percent": 50,
"flow_rate_pps": None,
"flow_rate_bps": None,
"flow_pkt_size": DATA_FLOW_PKT_SIZE,
"flow_pkt_count": None,
"flow_delay_sec": DATA_FLOW_DELAY_SEC,
"flow_traffic_type": traffic_flow_mode.FIXED_DURATION
}

generate_test_flows(testbed_config=testbed_config,
test_flow_prio_list=[test_prio_list[0]],
prio_dscp_map=prio_dscp_map,
snappi_extra_params=snappi_extra_params)

PAUSE_FLOW_NAME = "Pause flow"

# 10 PFC frames at 2 frames/sec.
# The pauses caused by each PFC frame do not overlap.

PAUSE_FLOW_PKT_COUNT = 10
PAUSE_FLOW_DELAY_SEC = 1

if snappi_extra_params.traffic_flow_config.pause_flow_config is None:
snappi_extra_params.traffic_flow_config.pause_flow_config = {
"flow_name": PAUSE_FLOW_NAME,
"flow_dur_sec": None,
"flow_rate_percent": None,
"flow_rate_pps": 2,
"flow_rate_bps": None,
"flow_pkt_size": 64,
"flow_pkt_count": PAUSE_FLOW_PKT_COUNT,
"flow_delay_sec": PAUSE_FLOW_DELAY_SEC,
"flow_traffic_type": traffic_flow_mode.FIXED_PACKETS
}

asic_namespace = None
if duthost.is_multi_asic:
asic = duthost.get_port_asic_instance(dut_port)
asic_namespace = asic.namespace
gmin, gmax, gdrop = test_ecn_config

# Configure WRED/ECN thresholds
logger.info("Configuring WRED and ECN thresholds gmin {}MB gmax {}MB gdrop {}%".format(gmin, gmax, gdrop))

config_result = config_wred(host_ans=duthost,
kmin=gmin * 1024 * 1024,
kmax=gmax * 1024 * 1024,
pmax=0,
kdrop=gdrop,
asic_value=asic_namespace)

pytest_assert(config_result is True, 'Failed to configure WRED/ECN at the DUT')

start_quanta = 500
end_quanta = 65000
n = 15 # Number of quanta values

step = (end_quanta - start_quanta) // (n - 1)
# Generate all but the last value
pause_quanta_list = [start_quanta + i * step for i in range(n - 1)]
# The last value is exactly `end_quanta`
pause_quanta_list.append(end_quanta)

logging.info("PFC quanta list: {}".format(pause_quanta_list))

_ = get_npu_voq_queue_counters(duthost, dut_port, test_prio_list[0], True)
results = []
for quanta in pause_quanta_list:
snappi_extra_params.traffic_flow_config.pause_flow_config["flow_quanta"] = quanta

# Remove any existing pause flow
for index, flow in enumerate(testbed_config.flows):
if PAUSE_FLOW_NAME in flow.name:
testbed_config.flows.remove(index)

# Generate pause flow config
generate_pause_flows(testbed_config=testbed_config,
pause_prio_list=[test_prio_list[0]],
global_pause=False,
snappi_extra_params=snappi_extra_params)

flows = testbed_config.flows

all_flow_names = [flow.name for flow in flows]
data_flow_names = [flow.name for flow in flows if PAUSE_FLOW_NAME not in flow.name]

""" Run traffic """
_tgen_flow_stats, _switch_flow_stats, _in_flight_flow_metrics = run_traffic(
duthost,
api=api,
config=testbed_config,
data_flow_names=data_flow_names,
all_flow_names=all_flow_names,
exp_dur_sec=DATA_FLOW_DURATION_SEC +
DATA_FLOW_DELAY_SEC,
snappi_extra_params=snappi_extra_params)

ctr_3 = get_npu_voq_queue_counters(duthost, dut_port, test_prio_list[0])
stats_only = {key: ctr_3[key] for key in ctr_3['stats_name']}
results.append((quanta, stats_only))

file_name = "xoff_quanta_variance_results_{}_{}_{}.csv".format(gmin, gmax, gdrop)
if log_dir:
file_name = os.path.join(log_dir, file_name)

with open(file_name, 'w', newline='') as csvfile:
if results:
first_ctr = results[0][1]
fieldnames = ['quanta'] + list(first_ctr.keys()) + ['AVERAGE_ECN_MARKING']

writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()

prev_ecn_marked = 0
for quanta, ctr in results:
row = {'quanta': quanta}
row.update(ctr)
current_ecn_marked = ctr.get('SAI_QUEUE_STAT_WRED_ECN_MARKED_PACKETS', 0)
average_ecn_marking = round((current_ecn_marked - prev_ecn_marked) / PAUSE_FLOW_PKT_COUNT)
row['AVERAGE_ECN_MARKING'] = average_ecn_marking
prev_ecn_marked = current_ecn_marked
writer.writerow(row)

for i in range(len(results) - 1):
ecn_i = results[i][1]['SAI_QUEUE_STAT_WRED_ECN_MARKED_PACKETS']
ecn_i_plus_1 = results[i + 1][1]['SAI_QUEUE_STAT_WRED_ECN_MARKED_PACKETS']

if ecn_i > 0:
pytest_assert(ecn_i_plus_1 > ecn_i,
"ecn marked {} at quanta {} should be less than ecn marked {} at quanta {}".
format(ecn_i, results[i][0], ecn_i_plus_1, results[i+1][0]))
else:
pytest_assert(ecn_i_plus_1 >= ecn_i,
"ecn marked {} at quanta {} should not be greater than ecn marked {} at quanta {}".
format(ecn_i, results[i][0], ecn_i_plus_1, results[i+1][0]))
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import pytest
import logging
import os
from tabulate import tabulate # noqa F401
from tests.common.helpers.assertions import pytest_assert # noqa: F401
from tests.common.fixtures.conn_graph_facts import conn_graph_facts, fanout_graph_facts, \
fanout_graph_facts_multidut # noqa: F401
from tests.common.snappi_tests.snappi_fixtures import snappi_api_serv_ip, snappi_api_serv_port, \
snappi_api, snappi_dut_base_config, get_snappi_ports, get_snappi_ports_for_rdma, cleanup_config, \
is_snappi_multidut, get_snappi_ports_multi_dut, get_snappi_ports_single_dut # noqa: F401
from tests.common.snappi_tests.qos_fixtures import prio_dscp_map, \
lossless_prio_list, disable_pfcwd # noqa F401
from tests.snappi_tests.files.helper import multidut_port_info, setup_ports_and_dut, enable_debug_shell # noqa: F401
from tests.snappi_tests.multidut.ecn.files.multidut_helper import run_ecn_marking_with_pfc_quanta_variance
from tests.common.snappi_tests.snappi_test_params import SnappiTestParams
logger = logging.getLogger(__name__)
pytestmark = [pytest.mark.topology('multidut-tgen', 'tgen')]


@pytest.fixture(autouse=True)
def number_of_tx_rx_ports():
yield (1, 1)


# tuple of -gmin in MB, -gmax in MB and -gdrop in percentage
test_ecn_config = [(1, 4, 5), (1, 4, 10), (2, 4, 5), (2, 4, 10)]


@pytest.mark.parametrize("test_ecn_config", test_ecn_config)
def test_ecn_marking_with_pfc_quanta_variance(
request,
snappi_api, # noqa: F811
conn_graph_facts, # noqa: F811
fanout_graph_facts_multidut, # noqa: F811
duthosts,
lossless_prio_list, # noqa: F811
tbinfo, # noqa: F811
test_ecn_config,
prio_dscp_map, # noqa: F811
setup_ports_and_dut): # noqa: F811

"""
Verify ECN marking on lossless prio with varying XOFF quanta
Args:
request (pytest fixture): pytest request object
snappi_api (pytest fixture): SNAPPI session
conn_graph_facts (pytest fixture): connection graph
fanout_graph_facts (pytest fixture): fanout graph
duthosts (pytest fixture): list of DUTs
lossless_prio_list (pytest fixture): list of all the lossless priorities
prio_dscp_map (pytest fixture): priority vs. DSCP map (key = priority).
tbinfo (pytest fixture): fixture provides information about testbed
test_flow_percent: Percentage of flow rate used for the two lossless prio
Returns:
N/A
"""

testbed_config, port_config_list, snappi_ports = setup_ports_and_dut
log_file_path = request.config.getoption("--log-file", default=None)

logger.info("Snappi Ports : {}".format(snappi_ports))
snappi_extra_params = SnappiTestParams()
snappi_extra_params.multi_dut_params.multi_dut_ports = snappi_ports

run_ecn_marking_with_pfc_quanta_variance(
api=snappi_api,
testbed_config=testbed_config,
port_config_list=port_config_list,
dut_port=snappi_ports[0]['peer_port'],
test_prio_list=lossless_prio_list,
prio_dscp_map=prio_dscp_map,
log_dir=os.path.dirname(log_file_path) if log_file_path else None,
test_ecn_config=test_ecn_config,
snappi_extra_params=snappi_extra_params)

0 comments on commit 015580e

Please sign in to comment.