Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write test log for deployment failures #3596

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lisa/platform_.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def prepare_environment(self, environment: Environment) -> Environment:
def deploy_environment(self, environment: Environment) -> None:
platform_runbook = cast(schema.Platform, self.runbook)

log = get_logger(f"deploy[{environment.name}]", parent=self._log)
log = get_logger("deploy", parent=environment.log)
log.info(f"deploying environment: {environment.name}")
timer = create_timer()
environment.platform = self
Expand Down
3 changes: 3 additions & 0 deletions lisa/runners/lisa_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,12 +616,15 @@ def _attach_failed_environment_to_result(
# so deployment failure can be tracked.
environment.platform = self.platform
result.environment = environment

result.subscribe_log(self._log)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it need to subscribe runner here? The below line result.handle_exception should have enough log. The runner is called in multi threads, and may bring logs from other cases.

Copy link
Collaborator Author

@adityagesh adityagesh Jan 15, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems to be sufficient for failed deployment.
In case of successful deployment, the deployment logs wouldn't show up in test log. That should be fine, right?

Copy link
Member

@squirrelsc squirrelsc Jan 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

subscribe here is too late, it won't capture the log generated when deploying. In another word, it includes nothing except the exception is logged out again.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you recommending to subscribe and unsubscribe in _deploy_environment_task itself rather than _attach_failed_environment_to_result ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, subscribe in deploy and prepare, not in the attach failed.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am trying to find which test result to use in _prepare_environment

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can manually create an exception in prepare, so it can be tested easier.

result.handle_exception(exception=exception, log=self._log, phase="deployment")
self._log.info(
f"'{environment.name}' attached to test case "
f"'{result.runtime_data.metadata.full_name}({result.id_})': "
f"{exception}"
)
result.unsubscribe_log(self._log)
# release environment reference to optimize memory.
result.environment = None

Expand Down
106 changes: 66 additions & 40 deletions lisa/testsuite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from __future__ import annotations

import copy
import logging
import traceback
from dataclasses import dataclass, field
from functools import wraps
Expand Down Expand Up @@ -85,12 +86,16 @@ class TestResult:
log_file: str = ""
stacktrace: Optional[str] = None
retried_times: int = 0
_log_file_handler: Optional[logging.FileHandler] = None
_case_log_path: Optional[Path] = None

def __post_init__(self, *args: Any, **kwargs: Any) -> None:
self._send_result_message()
self._timer: Timer

self._environment_information: Dict[str, Any] = {}
# parent_log = get_logger("suite", self.runtime_data.metadata.suite.name)
self.log = get_logger("case", self.name)

@property
def is_queued(self) -> bool:
Expand Down Expand Up @@ -265,6 +270,44 @@ def check_platform(

return result

def subscribe_log(self, log: Logger) -> None:
add_handler(self._get_log_file_handler(), log)

def unsubscribe_log(self, log: Logger) -> None:
remove_handler(self._get_log_file_handler(), log)

def get_case_log_path(self) -> Path:
if not self._case_log_path:
self._case_log_path = self.__create_case_log_path()
return self._case_log_path

def _get_log_file_handler(self) -> logging.FileHandler:
if not self._log_file_handler:
case_log_path = self.get_case_log_path()
case_log_file = case_log_path / f"{case_log_path.name}.log"
self._log_file_handler = create_file_handler(case_log_file, self.log)
self.log_file = case_log_file.relative_to(
constants.RUN_LOCAL_LOG_PATH
).as_posix()
return self._log_file_handler

def close_log_file_handler(self) -> None:
if self._log_file_handler:
remove_handler(self._log_file_handler, self.log)
self._log_file_handler.close()
self._log_file_handler = None

def capture_serial_console_log(self) -> None:
if not self.environment:
raise LisaException("Environment is not set for saving environment logs")
nodes = self.environment.nodes
for node in nodes.list():
if hasattr(node, "features") and node.features.is_supported(SerialConsole):
serial_console = node.features[SerialConsole]
log_dir = self.get_case_log_path() / Path(f"serial_console_{node.name}")
log_dir.mkdir(parents=True)
serial_console.get_console_log(log_dir, force_run=True)

def _send_result_message(self, stacktrace: Optional[str] = None) -> None:
self.elapsed = self.get_elapsed()

Expand Down Expand Up @@ -316,6 +359,23 @@ def _send_result_message(self, stacktrace: Optional[str] = None) -> None:

notifier.notify(result_message)

@retry(exceptions=FileExistsError, tries=30, delay=0.1)
def __create_case_log_path(self) -> Path:
case_name = self.runtime_data.name
while True:
path = (
constants.RUN_LOCAL_LOG_PATH
/ "tests"
/ f"{get_datetime_path()}-{case_name}"
)
if not path.exists():
break
sleep(0.1)
# avoid to create folder for UT
if not is_unittest():
path.mkdir(parents=True)
return path


@dataclass
class TestCaseRequirement:
Expand Down Expand Up @@ -636,18 +696,14 @@ def start(
raise LisaException("after_suite is not supported. Please use after_case")
# replace to case's logger temporarily
for case_result in case_results:
case_name = case_result.runtime_data.name

case_result.environment = environment
case_log = get_logger("case", case_name, parent=self.__log)
case_log = case_result.log
case_result.subscribe_log(environment.log)
case_log_path = case_result.get_case_log_path()

case_log_path = self.__create_case_log_path(case_name)
case_part_path = self.__get_test_part_path(case_log_path)
case_working_path = self.__get_case_working_path(case_part_path)
case_unique_name = case_log_path.name
case_log_file = case_log_path / f"{case_log_path.name}.log"
case_log_handler = create_file_handler(case_log_file, case_log)
add_handler(case_log_handler, environment.log)

case_kwargs = test_kwargs.copy()
case_kwargs.update({"case_name": case_unique_name})
Expand All @@ -662,9 +718,6 @@ def start(
)
is_continue: bool = is_suite_continue
total_timer = create_timer()
case_result.log_file = case_log_file.relative_to(
constants.RUN_LOCAL_LOG_PATH
).as_posix()
case_result.set_status(TestStatus.RUNNING, "")

# check for positive value just to be clearer
Expand Down Expand Up @@ -708,7 +761,7 @@ def start(

if case_result.status == TestStatus.FAILED:
try:
self.__save_serial_log(environment, case_log_path)
case_result.capture_serial_console_log()
except Exception as e:
suite_log.debug(
f"exception thrown during serial console log read. [{e}]"
Expand All @@ -717,10 +770,8 @@ def start(
case_log.info(
f"result: {case_result.status.name}, " f"elapsed: {total_timer}"
)
remove_handler(case_log_handler, case_log)
remove_handler(case_log_handler, environment.log)
if case_log_handler:
case_log_handler.close()
case_result.unsubscribe_log(environment.log)
case_result.close_log_file_handler()

if self._should_stop:
suite_log.info("received stop message, stop run")
Expand All @@ -729,31 +780,6 @@ def start(
def stop(self) -> None:
self._should_stop = True

def __save_serial_log(self, environment: Environment, log_path: Path) -> None:
nodes = environment.nodes
for node in nodes.list():
if hasattr(node, "features") and node.features.is_supported(SerialConsole):
serial_console = node.features[SerialConsole]
log_dir = log_path / Path(f"serial_console_{node.name}")
log_dir.mkdir(parents=True)
serial_console.get_console_log(log_dir, force_run=True)

@retry(exceptions=FileExistsError, tries=30, delay=0.1)
def __create_case_log_path(self, case_name: str) -> Path:
while True:
path = (
constants.RUN_LOCAL_LOG_PATH
/ "tests"
/ f"{get_datetime_path()}-{case_name}"
)
if not path.exists():
break
sleep(0.1)
# avoid to create folder for UT
if not is_unittest():
path.mkdir(parents=True)
return path

def __get_test_part_path(self, log_path: Path) -> Path:
if is_unittest():
return Path()
Expand Down
Loading