Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write test log for deployment failures #3596

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion lisa/runners/lisa_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
deep_update_dict,
is_unittest,
)
from lisa.util.logger import add_handler, remove_handler
from lisa.util.parallel import Task, check_cancelled
from lisa.variable import VariableEntry

Expand Down Expand Up @@ -616,12 +617,15 @@ def _attach_failed_environment_to_result(
# so deployment failure can be tracked.
environment.platform = self.platform
result.environment = environment
result.handle_exception(exception=exception, log=self._log, phase="deployment")

result.handle_exception(exception=exception, log=result.log, phase="deployment")
self._log.info(
f"'{environment.name}' attached to test case "
f"'{result.runtime_data.metadata.full_name}({result.id_})': "
f"{exception}"
)
result.save_nodes_serial_console_logs()
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@squirrelsc , this doesn't work as environment.nodes is empty list

Shall I make _save_console_log_and_check_panic as a public method of Platform and then invoke platform.save_console_log(path=result.log_path)

def _save_console_log_and_check_panic(

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_attach_failed_environment_to_result is too late to attach log. What I mean is to attach log when this method is called in deploy/prepare methods. It can attach to the environment log, not the node log. env should be created already, and it includes all node logs.

result.close_log_file_handler()
# release environment reference to optimize memory.
result.environment = None

Expand Down
94 changes: 55 additions & 39 deletions lisa/testsuite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import copy
import logging
import traceback
from dataclasses import dataclass, field
from functools import wraps
Expand Down Expand Up @@ -83,6 +84,7 @@ class TestResult:
check_results: Optional[search_space.ResultReason] = None
information: Dict[str, Any] = field(default_factory=dict)
log_file: str = ""
_log_file_handler: Optional[logging.FileHandler] = None
stacktrace: Optional[str] = None
retried_times: int = 0

Expand All @@ -91,6 +93,10 @@ def __post_init__(self, *args: Any, **kwargs: Any) -> None:
self._timer: Timer

self._environment_information: Dict[str, Any] = {}
parent_log = get_logger("suite", self.runtime_data.metadata.suite.name)
self.log = get_logger("case", self.name, parent=parent_log)
self.case_log_path = self.__create_case_log_path()
self._log_file_handler = self.get_log_file_handler()

@property
def is_queued(self) -> bool:
Expand Down Expand Up @@ -265,6 +271,32 @@ def check_platform(

return result

def get_log_file_handler(self) -> logging.FileHandler:
if not self._log_file_handler:
case_log_file = self.case_log_path / f"{self.case_log_path.name}.log"
self._log_file_handler = create_file_handler(case_log_file, self.log)
self.log_file = case_log_file.relative_to(
constants.RUN_LOCAL_LOG_PATH
).as_posix()
return self._log_file_handler

def close_log_file_handler(self) -> None:
if self._log_file_handler:
remove_handler(self._log_file_handler, self.log)
self._log_file_handler.close()
self._log_file_handler = None

def save_nodes_serial_console_logs(self) -> None:
if not self.environment:
raise LisaException("Environment is not set for saving environment logs")
nodes = self.environment.nodes
for node in nodes.list():
if hasattr(node, "features") and node.features.is_supported(SerialConsole):
serial_console = node.features[SerialConsole]
log_dir = self.case_log_path / Path(f"serial_console_{node.name}")
log_dir.mkdir(parents=True)
serial_console.get_console_log(log_dir, force_run=True)

def _send_result_message(self, stacktrace: Optional[str] = None) -> None:
self.elapsed = self.get_elapsed()

Expand Down Expand Up @@ -316,6 +348,23 @@ def _send_result_message(self, stacktrace: Optional[str] = None) -> None:

notifier.notify(result_message)

@retry(exceptions=FileExistsError, tries=30, delay=0.1)
def __create_case_log_path(self) -> Path:
case_name = self.runtime_data.name
while True:
path = (
constants.RUN_LOCAL_LOG_PATH
/ "tests"
/ f"{get_datetime_path()}-{case_name}"
)
if not path.exists():
break
sleep(0.1)
# avoid to create folder for UT
if not is_unittest():
path.mkdir(parents=True)
return path


@dataclass
class TestCaseRequirement:
Expand Down Expand Up @@ -636,18 +685,15 @@ def start(
raise LisaException("after_suite is not supported. Please use after_case")
# replace to case's logger temporarily
for case_result in case_results:
case_name = case_result.runtime_data.name

case_result.environment = environment
case_log = get_logger("case", case_name, parent=self.__log)
case_log = case_result.log
case_log_handler = case_result.get_log_file_handler()
add_handler(case_log_handler, environment.log)
case_log_path = case_result.case_log_path

case_log_path = self.__create_case_log_path(case_name)
case_part_path = self.__get_test_part_path(case_log_path)
case_working_path = self.__get_case_working_path(case_part_path)
case_unique_name = case_log_path.name
case_log_file = case_log_path / f"{case_log_path.name}.log"
case_log_handler = create_file_handler(case_log_file, case_log)
add_handler(case_log_handler, environment.log)

case_kwargs = test_kwargs.copy()
case_kwargs.update({"case_name": case_unique_name})
Expand All @@ -662,9 +708,6 @@ def start(
)
is_continue: bool = is_suite_continue
total_timer = create_timer()
case_result.log_file = case_log_file.relative_to(
constants.RUN_LOCAL_LOG_PATH
).as_posix()
case_result.set_status(TestStatus.RUNNING, "")

# check for positive value just to be clearer
Expand Down Expand Up @@ -708,7 +751,7 @@ def start(

if case_result.status == TestStatus.FAILED:
try:
self.__save_serial_log(environment, case_log_path)
case_result.save_nodes_serial_console_logs()
except Exception as e:
suite_log.debug(
f"exception thrown during serial console log read. [{e}]"
Expand All @@ -717,10 +760,8 @@ def start(
case_log.info(
f"result: {case_result.status.name}, " f"elapsed: {total_timer}"
)
remove_handler(case_log_handler, case_log)
remove_handler(case_log_handler, environment.log)
if case_log_handler:
case_log_handler.close()
case_result.close_log_file_handler()

if self._should_stop:
suite_log.info("received stop message, stop run")
Expand All @@ -729,31 +770,6 @@ def start(
def stop(self) -> None:
self._should_stop = True

def __save_serial_log(self, environment: Environment, log_path: Path) -> None:
nodes = environment.nodes
for node in nodes.list():
if hasattr(node, "features") and node.features.is_supported(SerialConsole):
serial_console = node.features[SerialConsole]
log_dir = log_path / Path(f"serial_console_{node.name}")
log_dir.mkdir(parents=True)
serial_console.get_console_log(log_dir, force_run=True)

@retry(exceptions=FileExistsError, tries=30, delay=0.1)
def __create_case_log_path(self, case_name: str) -> Path:
while True:
path = (
constants.RUN_LOCAL_LOG_PATH
/ "tests"
/ f"{get_datetime_path()}-{case_name}"
)
if not path.exists():
break
sleep(0.1)
# avoid to create folder for UT
if not is_unittest():
path.mkdir(parents=True)
return path

def __get_test_part_path(self, log_path: Path) -> Path:
if is_unittest():
return Path()
Expand Down
Loading