Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for redis memory check failure after link flap and also sometimes cpu usage high failure #15732

Merged
merged 5 commits into from
Dec 9, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions tests/platform_tests/link_flap/link_flap_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""
import logging
import random
import time

from tests.common.platform.device_utils import fanout_switch_port_lookup, __get_dut_if_status

Expand Down Expand Up @@ -129,3 +130,25 @@ def check_bgp_routes(dut, start_time_ipv4_route_counts, start_time_ipv6_route_co
incr_ipv4_route_counts = abs(int(float(start_time_ipv4_route_counts)) - int(float(routesv4)))
incr_ipv6_route_counts = abs(int(float(start_time_ipv6_route_counts)) - int(float(routesv6)))
return incr_ipv4_route_counts < MAX_DIFF and incr_ipv6_route_counts < MAX_DIFF


def get_avg_redis_mem_usage(duthost, interval, num_times):
"""
Redis memory usage is not a stable value. It's fluctuating even when the device is stable stage.
202205 has larger redis memory usage (~ 5.5M) so the fluctuation of 0.2M is not an issue.
With 202405 redis memory usage is optimized (~ 2.5M) and 0.2M usage could make the test fail
if memory threshold is 5%.

This API returns the average radis memory usage during a period.
Args:
duthost: DUT host object
interval: time interval to wait for next query
num_times: number of times to query
"""
logger.info("Checking average redis memory usage")
cmd = r"redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\1/'"
redis_memory = 0.0
for i in range(num_times):
redis_memory += float(duthost.shell(cmd)["stdout"])
time.sleep(interval)
return float(redis_memory/num_times)
24 changes: 11 additions & 13 deletions tests/platform_tests/link_flap/test_cont_link_flap.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common import port_toggle
from tests.platform_tests.link_flap.link_flap_utils import build_test_candidates,\
check_orch_cpu_utilization, check_bgp_routes
check_orch_cpu_utilization, check_bgp_routes, get_avg_redis_mem_usage
from tests.common.utilities import wait_until
from tests.common.devices.eos import EosHost
from tests.common.devices.sonic import SonicHost
Expand Down Expand Up @@ -77,9 +77,8 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws
logging.info("Memory Status at start: %s", memory_output)

# Record Redis Memory at start
start_time_redis_memory = duthost.shell(
r"redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\1/'")["stdout"]
logging.info("Redis Memory: %s M", start_time_redis_memory)
start_time_redis_memory = get_avg_redis_mem_usage(duthost, 5, 5)
logging.info("Redis Memory: %f M", start_time_redis_memory)

# Record ipv4 route counts at start
sumv4, sumv6 = duthost.get_ip_route_summary(skip_kernel_tunnel=True)
Expand Down Expand Up @@ -208,26 +207,25 @@ def test_cont_link_flap(self, request, duthosts, nbrhosts, enum_rand_one_per_hws
logging.info("Orchagent PID {0} CPU Util at end: {1}".format(pid, util))

# Record Redis Memory at end
end_time_redis_memory = duthost.shell(
r"redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\1/'")["stdout"]
logging.info("Redis Memory at start: %s M", start_time_redis_memory)
logging.info("Redis Memory at end: %s M", end_time_redis_memory)
end_time_redis_memory = get_avg_redis_mem_usage(duthost, 5, 5)
logging.info("Redis Memory at start: %f M", start_time_redis_memory)
logging.info("Redis Memory at end: %f M", end_time_redis_memory)

# Calculate diff in Redis memory
incr_redis_memory = float(end_time_redis_memory) - float(start_time_redis_memory)
logging.info("Redis absolute difference: %d", incr_redis_memory)
incr_redis_memory = end_time_redis_memory - start_time_redis_memory
logging.info("Redis absolute difference: %f", incr_redis_memory)
wumiaont marked this conversation as resolved.
Show resolved Hide resolved

# Check redis memory only if it is increased else default to pass
if incr_redis_memory > 0.0:
percent_incr_redis_memory = (incr_redis_memory / float(start_time_redis_memory)) * 100
percent_incr_redis_memory = (incr_redis_memory / start_time_redis_memory) * 100
logging.info("Redis Memory percentage Increase: %d", percent_incr_redis_memory)
incr_redis_memory_threshold = 10 if tbinfo["topo"]["type"] in ["m0", "mx"] else 5
incr_redis_memory_threshold = 15 if tbinfo["topo"]["type"] in ["m0", "mx"] else 10
wumiaont marked this conversation as resolved.
Show resolved Hide resolved
pytest_assert(percent_incr_redis_memory < incr_redis_memory_threshold,
"Redis Memory Increase more than expected: {}".format(percent_incr_redis_memory))

# Orchagent CPU should consume < orch_cpu_threshold at last.
logging.info("watch orchagent CPU utilization when it goes below %d", orch_cpu_threshold)
pytest_assert(wait_until(45, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold),
pytest_assert(wait_until(120, 5, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold),
"Orch CPU utilization {} > orch cpu threshold {} after link flap"
.format(duthost.shell("show processes cpu | grep orchagent | awk '{print $9}'")["stdout"],
orch_cpu_threshold))
25 changes: 12 additions & 13 deletions tests/platform_tests/link_flap/test_link_flap.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import logging
import pytest

from tests.platform_tests.link_flap.link_flap_utils import check_orch_cpu_utilization, build_test_candidates
from tests.platform_tests.link_flap.link_flap_utils import check_orch_cpu_utilization, build_test_candidates, \
get_avg_redis_mem_usage
from tests.common.platform.device_utils import toggle_one_link
from tests.common.helpers.assertions import pytest_assert, pytest_require
from tests.common.utilities import wait_until
Expand Down Expand Up @@ -35,9 +36,8 @@ def test_link_flap(request, duthosts, rand_one_dut_hostname, tbinfo, fanouthosts
logger.info("Memory Status at start: %s", memory_output)

# Record Redis Memory at start
start_time_redis_memory = duthost.shell(
r"redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\1/'")["stdout"]
logger.info("Redis Memory: %s M", start_time_redis_memory)
start_time_redis_memory = get_avg_redis_mem_usage(duthost, 5, 5)
logging.info("Redis Memory: %f M", start_time_redis_memory)

# Make Sure Orch CPU < orch_cpu_threshold before starting test.
logger.info("Make Sure orchagent CPU utilization is less that %d before link flap", orch_cpu_threshold)
Expand Down Expand Up @@ -70,26 +70,25 @@ def test_link_flap(request, duthosts, rand_one_dut_hostname, tbinfo, fanouthosts
logger.info("Orchagent CPU Util at end: %s", orch_cpu)

# Record Redis Memory at end
end_time_redis_memory = duthost.shell(
r"redis-cli info memory | grep used_memory_human | sed -e 's/.*:\(.*\)M/\1/'")["stdout"]
logger.info("Redis Memory at start: %s M", start_time_redis_memory)
logger.info("Redis Memory at end: %s M", end_time_redis_memory)
end_time_redis_memory = get_avg_redis_mem_usage(duthost, 5, 5)
logging.info("Redis Memory at start: %f M", start_time_redis_memory)
logging.info("Redis Memory at end: %f M", end_time_redis_memory)

# Calculate diff in Redis memory
incr_redis_memory = float(end_time_redis_memory) - float(start_time_redis_memory)
logger.info("Redis absolute difference: %d", incr_redis_memory)
incr_redis_memory = end_time_redis_memory - start_time_redis_memory
logging.info("Redis absolute difference: %f", incr_redis_memory)

# Check redis memory only if it is increased else default to pass
if incr_redis_memory > 0.0:
percent_incr_redis_memory = (incr_redis_memory / float(start_time_redis_memory)) * 100
percent_incr_redis_memory = (incr_redis_memory / start_time_redis_memory) * 100
logger.info("Redis Memory percentage Increase: %d", percent_incr_redis_memory)
incr_redis_memory_threshold = 10 if tbinfo["topo"]["type"] in ["m0", "mx"] else 5
incr_redis_memory_threshold = 15 if tbinfo["topo"]["type"] in ["m0", "mx"] else 10
wumiaont marked this conversation as resolved.
Show resolved Hide resolved
pytest_assert(percent_incr_redis_memory < incr_redis_memory_threshold,
"Redis Memory Increase more than expected: {}".format(percent_incr_redis_memory))

# Orchagent CPU should consume < orch_cpu_threshold at last.
logger.info("watch orchagent CPU utilization when it goes below %d", orch_cpu_threshold)
pytest_assert(wait_until(45, 2, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold),
pytest_assert(wait_until(120, 5, 0, check_orch_cpu_utilization, duthost, orch_cpu_threshold),
"Orch CPU utilization {} > orch cpu threshold {} before link flap"
.format(duthost.shell("show processes cpu | grep orchagent | awk '{print $9}'")["stdout"],
orch_cpu_threshold))
Loading