Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Snappi] Adding Ungraceful Restart script for BGP Outbound cases #16359

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
9359465
Adding ungraceful restart test
selldinesh Jan 6, 2025
bf71095
reverting to v6_prefix import from common and removing hostname check
selldinesh Jan 6, 2025
6d17cc8
fixing import error, with email correction
selldinesh Jan 7, 2025
00a7aba
Support qos test in KVM testbed and skip fanout config and traffic te…
xwjiang-ms Jan 7, 2025
161bc21
Add a timeout for acquiring dpkg lock. (#16328)
yutongzhang-microsoft Jan 7, 2025
d6a2423
chore: add back port option 202411 (#16367)
cyw233 Jan 7, 2025
25c97fc
Updated JR2 tuning values for T2 systems (#15773)
arista-nwolfe Jan 7, 2025
7c74a0d
sonic-mgmt: Fix namespace issues for qos tests on T2 single ASIC (#15…
patrickmacarthur Jan 7, 2025
533c871
Create /etc/tacacs folder on PTF when it's missing (#16352)
liuh-80 Jan 7, 2025
58d69f6
Remove golden config file and revert config when load golden config f…
liuh-80 Jan 7, 2025
e7c576e
Check AN enabled port is in connection graph (#16368)
bingwang-ms Jan 7, 2025
08f2545
Add some tests to PR checker and skip some tests in PR test (#16376)
xwjiang-ms Jan 8, 2025
c8a7568
refactor: optimize reliable tsa test (#16366)
cyw233 Jan 8, 2025
8256c2a
[sanity_check][bgp] Enhance sanity check recover for bgp default rout…
yaqiangz Jan 8, 2025
9dbeb1e
Temporarily skipping test_arp_update_for_failed_standby_neighbor for …
mramezani95 Jan 8, 2025
b1d3af7
Correcting client arguments to dynamically_compensate_leakout (#16169)
arista-nwolfe Jan 8, 2025
34432df
Fix the test_nhop_group nexthop map for ld DUTs (#16166)
veronica-arista Jan 8, 2025
55aff2e
Temporarily skipped a failing test to let the sonic-swss submodule be…
mramezani95 Jan 9, 2025
fcc0ce3
[T0/T1/T2]: New ECMP Hashing test upon member flap trigger (#15199)
deepak-singhal0408 Jan 9, 2025
a4c553b
adding route range
selldinesh Jan 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 164 additions & 2 deletions tests/snappi_tests/multidut/bgp/files/bgp_outbound_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import time
import math
import os
import pexpect
from ixnetwork_restpy import SessionAssistant
from ixnetwork_restpy.testplatform.testplatform import TestPlatform
Expand All @@ -20,8 +21,7 @@
snappi_portchannel_ipv4_list, snappi_portchannel_ipv6_list, AS_PATHS, \
BGP_TYPE, t1_side_interconnected_port, t2_side_interconnected_port, router_ids, \
snappi_community_for_t1, snappi_community_for_t1_drop, snappi_community_for_t2, num_regionalhubs, \
SNAPPI_TRIGGER, DUT_TRIGGER, fanout_presence, t2_uplink_fanout_info # noqa: F401
from tests.common.snappi_tests.variables import v6_prefix_length
SNAPPI_TRIGGER, DUT_TRIGGER, fanout_presence, t2_uplink_fanout_info, v6_prefix_length # noqa: F401
selldinesh marked this conversation as resolved.
Show resolved Hide resolved

logger = logging.getLogger(__name__)
total_routes = 0
Expand Down Expand Up @@ -1870,3 +1870,165 @@ def get_convergence_for_blackout(duthosts,
total_routes, mean(avg_pld)], [test_name+' (Link Up)', iteration,
traffic_type, portchannel_count, total_routes, mean(avg_pld2)]], headers=columns,
tablefmt="psql"))


def send_kernel_panic_command(duthost, creds):
username = creds.get('sonicadmin_user')
password = creds.get('sonicadmin_password')
ip = duthost.mgmt_ip
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(ip, port=22, username=username, password=password)
command = 'echo c | sudo tee /proc/sysrq-trigger'
stdin, stdout, stderr = ssh.exec_command(command)


def ping_device(duthost, timeout):
response = os.system(f"ping -c 1 {duthost.mgmt_ip}")
start_time = time.time()
while True:
response = os.system(f"ping -c 1 {duthost.mgmt_ip}")
if response == 0:
logger.info('PASS:PING SUCCESSFUL for {}'.format(duthost.hostname))
break
logger.info('Polling for {} to come UP.....'.format(duthost.hostname))
elapsed_time = time.time() - start_time
pytest_assert(elapsed_time < timeout, "Unable to ping for {}".format(timeout))
time.sleep(1)


def get_convergence_for_ungraceful_restart(duthosts,
api,
snappi_bgp_config,
traffic_type,
iteration,
device_name,
route_range,
test_name,
creds,
is_supervisor):
"""
Args:
duthost (pytest fixture): duthost fixture
api (pytest fixture): Snappi API
snappi_bgp_config: __snappi_bgp_config
flap_details: contains device name and port / services that needs to be flapped
traffic_type : IPv4 / IPv6 traffic type
iteration : Number of iterations
device_name: Device in which restart needs to be performed
route_range: V4 and v6 routes
test_name: Name of the test
"""
api.set_config(snappi_bgp_config)
avg_pld = []
avg_pld2 = []

test_platform = TestPlatform(api._address)
test_platform.Authenticate(api._username, api._password)
session = SessionAssistant(IpAddress=api._address, UserName=api._username,
SessionId=test_platform.Sessions.find()[-1].Id, Password=api._password)
ixnetwork = session.Ixnetwork
for index, topology in enumerate(ixnetwork.Topology.find()):
try:
topology.DeviceGroup.find()[0].RouterData.find().RouterId.Single(router_ids[index])
logger.info('Setting Router id {} for {}'.format(router_ids[index], topology.DeviceGroup.find()[0].Name))
except Exception:
logger.info('Skipping Router id for {}, Since bgp is not configured'.
format(topology.DeviceGroup.find()[0].Name))
continue
logger.info('\n')
logger.info('Testing with Route Range: {}'.format(route_range))
logger.info('\n')
for i in range(0, iteration):
logger.info(
'|--------------------------- Iteration : {} -----------------------|'.format(i+1))
logger.info("Starting all protocols ...")
ps = api.protocol_state()
ps.state = ps.START
api.set_protocol_state(ps)
wait(SNAPPI_TRIGGER, "For Protocols To start")
logger.info('Verifying protocol sessions state')
protocolsSummary = StatViewAssistant(ixnetwork, 'Protocols Summary')
protocolsSummary.CheckCondition('Sessions Down', StatViewAssistant.EQUAL, 0)
logger.info('Starting Traffic')
ts = api.transmit_state()
ts.state = ts.START
api.set_transmit_state(ts)
wait(SNAPPI_TRIGGER, "For Traffic To start")

flow_stats = get_flow_stats(api)
port_stats = get_port_stats(api)
logger.info('\n')
logger.info('Rx Snappi Port Name : Rx Frame Rate')
for port_stat in port_stats:
if 'Snappi_Tx_Port' not in port_stat.name:
logger.info('{} : {}'.format(port_stat.name, port_stat.frames_rx_rate))
pytest_assert(port_stat.frames_rx_rate > 0, '{} is not receiving any packet'.format(port_stat.name))
logger.info('\n')
for i in range(0, len(traffic_type)):
logger.info('{} Loss %: {}'.format(flow_stats[i].name, int(flow_stats[i].loss)))
pytest_assert(int(flow_stats[i].loss) == 0, f'Loss Observed in {flow_stats[i].name}')

# Getting rx rate on uplink ports
sum_t2_rx_frame_rate = 0
for port_stat in port_stats:
if 'Snappi_Uplink' in port_stat.name:
sum_t2_rx_frame_rate = sum_t2_rx_frame_rate + int(port_stat.frames_rx_rate)

logger.info('Issuing Ungraceful restart')
for duthost in duthosts:
if duthost.hostname == device_name:
send_kernel_panic_command(duthost, creds)
wait(DUT_TRIGGER, "Issued ungraceful restart on {}".format(device_name))
for i in range(0, len(traffic_type)):
pytest_assert(float((int(flow_stats[i].frames_tx_rate) - int(flow_stats[i].frames_rx_rate)) /
int(flow_stats[i].frames_tx_rate)) < 0.005,
'Traffic has not converged after issuing kernel panic')
logger.info('Traffic has converged after issuing kernel panic command in {}'.format(device_name))
flow_stats = get_flow_stats(api)
delta_frames = 0
for i in range(0, len(traffic_type)):
delta_frames = delta_frames + flow_stats[i].frames_tx - flow_stats[i].frames_rx
pkt_loss_duration = 1000 * (delta_frames / sum_t2_rx_frame_rate)
logger.info('Delta Frames : {}'.format(delta_frames))
logger.info('PACKET LOSS DURATION After Device is DOWN (ms): {}'.format(pkt_loss_duration))
avg_pld.append(pkt_loss_duration)

logger.info('Clearing Stats')
ixnetwork.ClearStats()
for duthost in duthosts:
ping_device(duthost, timeout=180)
wait(DUT_TRIGGER, "Contaniers on the DUT to stabalize after restart")

flow_stats = get_flow_stats(api)
delta_frames = 0
for i in range(0, len(traffic_type)):
delta_frames = delta_frames + flow_stats[i].frames_tx - flow_stats[i].frames_rx
pkt_loss_duration = 1000 * (delta_frames / sum_t2_rx_frame_rate)
logger.info('Delta Frames : {}'.format(delta_frames))
logger.info('PACKET LOSS DURATION After device is UP (ms): {}'.format(pkt_loss_duration))
avg_pld2.append(pkt_loss_duration)

for duthost in duthosts:
if duthost.hostname == device_name:
if is_supervisor is True:
exec_tsa_tsb_cmd_on_linecard(duthost, creds, "sudo TSB")
else:
duthost.command('sudo TSB')
logger.info('Stopping Traffic')
ts = api.transmit_state()
ts.state = ts.STOP
api.set_transmit_state(ts)

logger.info("Stopping all protocols ...")
ps = api.protocol_state()
ps.state = ps.STOP
api.set_protocol_state(ps)
logger.info('\n')

columns = ['Test Name', 'Iterations', 'Traffic Type', 'Uplink ECMP Paths', 'Route Count',
'Avg Calculated Packet Loss Duration (ms)']
logger.info("\n%s" % tabulate([[test_name+' (DOWN))', iteration, traffic_type, portchannel_count,
total_routes, mean(avg_pld)], [test_name+' (UP)', iteration,
traffic_type, portchannel_count, total_routes, mean(avg_pld2)]], headers=columns,
tablefmt="psql"))
Loading
Loading