Skip to content

Commit 92b4344

Browse files
committed
Merge branch 'development' into alex_sku
2 parents 68d6894 + 4e49aca commit 92b4344

2 files changed

Lines changed: 76 additions & 30 deletions

File tree

nodescraper/plugins/inband/dmesg/dmesg_analyzer.py

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,32 @@ def _norm(s: str) -> str:
535535
return True
536536
return False
537537

538-
def resolve_priority(
538+
def update_error_regex_priorities(
539+
self,
540+
error_regexes: list[ErrorRegex],
541+
priority_override_rules: list[dict],
542+
) -> list[EventPriority]:
543+
"""Updates the priorities of a list of ErrorRegex objects based on given priority rules
544+
545+
Args:
546+
error_regexes (list[ErrorRegex]): A list of ErrorRegex objects to have their priorities updated
547+
priority_override_rules (list[dict]): The list of rules which determine what the updated priority should be
548+
549+
Returns:
550+
list[ErrorRegex]: A list of the same ErrorRegex objects but with their priorities updated
551+
"""
552+
553+
if priority_override_rules is None:
554+
return error_regexes
555+
556+
updated_error_regexes = []
557+
for regex_obj in error_regexes:
558+
new_priority = self._resolve_priority(regex_obj, priority_override_rules)
559+
regex_obj = regex_obj.model_copy(update={"event_priority": new_priority})
560+
updated_error_regexes.append(regex_obj)
561+
return updated_error_regexes
562+
563+
def _resolve_priority(
539564
self,
540565
regex_obj: ErrorRegex,
541566
priority_override_rules: list[dict],
@@ -621,14 +646,9 @@ def analyze_data(
621646
args = DmesgAnalyzerArgs()
622647

623648
final_error_regex = self._convert_and_extend_error_regex(args.error_regex, self.ERROR_REGEX)
624-
625-
if args.priority_override_rules:
626-
updated_regex = []
627-
for regex_obj in final_error_regex:
628-
new_priority = self.resolve_priority(regex_obj, args.priority_override_rules)
629-
regex_obj = regex_obj.model_copy(update={"event_priority": new_priority})
630-
updated_regex.append(regex_obj)
631-
final_error_regex = updated_regex
649+
final_error_regex = self.update_error_regex_priorities(
650+
final_error_regex, args.priority_override_rules
651+
) # updates the priorities of the ErrorRegex objects using the given rules. makes no changes if no rules are provided.
632652

633653
if args.analysis_range_start or args.analysis_range_end:
634654
self.logger.info(
@@ -662,19 +682,24 @@ def analyze_data(
662682
self.result.events += known_err_events
663683

664684
if args.check_unknown_dmesg_errors:
685+
unknown_dmesg_error_regexes = [
686+
ErrorRegex(
687+
regex=re.compile(
688+
r"kern :(?:err|crit|alert|emerg)\s+: \d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+ (.*)"
689+
),
690+
message="Unknown dmesg error",
691+
event_category=EventCategory.UNKNOWN,
692+
event_priority=EventPriority.WARNING,
693+
)
694+
]
695+
unknown_dmesg_error_regexes = self.update_error_regex_priorities(
696+
unknown_dmesg_error_regexes, args.priority_override_rules
697+
) # updates the priorities of the ErrorRegex objects using the given rules. makes no changes if no rules are provided.
698+
665699
err_events = self.check_all_regexes(
666700
content=dmesg_content,
667701
source="dmesg",
668-
error_regex=[
669-
ErrorRegex(
670-
regex=re.compile(
671-
r"kern :(?:err|crit|alert|emerg)\s+: \d{4}-\d+-\d+T\d+:\d+:\d+,\d+[+-]\d+:\d+ (.*)"
672-
),
673-
message="Unknown dmesg error",
674-
event_category=EventCategory.UNKNOWN,
675-
event_priority=EventPriority.WARNING,
676-
)
677-
],
702+
error_regex=unknown_dmesg_error_regexes,
678703
num_timestamps=args.num_timestamps,
679704
interval_to_collapse_event=args.interval_to_collapse_event,
680705
)

test/unit/plugin/test_dmesg_analyzer.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ def test_resolve_priority_no_match(system_info):
720720
event_category=EventCategory.RAS,
721721
)
722722
rules = [{"event_category": "SW_DRIVER", "new_priority": "WARNING"}]
723-
assert analyzer.resolve_priority(regex_obj, rules) == EventPriority.ERROR
723+
assert analyzer._resolve_priority(regex_obj, rules) == EventPriority.ERROR
724724

725725

726726
def test_resolve_priority_match_by_category(system_info):
@@ -732,7 +732,7 @@ def test_resolve_priority_match_by_category(system_info):
732732
event_category=EventCategory.RAS,
733733
)
734734
rules = [{"event_category": "RAS", "new_priority": "WARNING"}]
735-
result = analyzer.resolve_priority(regex_obj, rules)
735+
result = analyzer._resolve_priority(regex_obj, rules)
736736
assert result == EventPriority.WARNING
737737

738738

@@ -750,7 +750,7 @@ def test_resolve_priority_match_by_message_list(system_info):
750750
"new_priority": "WARNING",
751751
}
752752
]
753-
result = analyzer.resolve_priority(regex_obj, rules)
753+
result = analyzer._resolve_priority(regex_obj, rules)
754754
assert result == EventPriority.WARNING
755755

756756

@@ -763,7 +763,7 @@ def test_resolve_priority_no_change(system_info):
763763
event_category=EventCategory.RAS,
764764
)
765765
rules = [{"event_category": "RAS", "new_priority": "NO_CHANGE"}]
766-
assert analyzer.resolve_priority(regex_obj, rules) == EventPriority.ERROR
766+
assert analyzer._resolve_priority(regex_obj, rules) == EventPriority.ERROR
767767

768768

769769
def test_resolve_priority_first_match_wins(system_info):
@@ -778,7 +778,7 @@ def test_resolve_priority_first_match_wins(system_info):
778778
{"event_category": "RAS", "new_priority": "WARNING"},
779779
{"event_category": "RAS", "new_priority": "ERROR"},
780780
]
781-
result = analyzer.resolve_priority(regex_obj, rules)
781+
result = analyzer._resolve_priority(regex_obj, rules)
782782
assert result == EventPriority.WARNING
783783

784784

@@ -794,13 +794,13 @@ def test_resolve_priority_multiple_filter_fields(system_info):
794794
rules = [
795795
{"event_category": "RAS", "message": "GPU reset failed", "new_priority": "WARNING"},
796796
]
797-
assert analyzer.resolve_priority(regex_obj, rules) == EventPriority.WARNING
797+
assert analyzer._resolve_priority(regex_obj, rules) == EventPriority.WARNING
798798

799799
# Does NOT match because message differs → returns original priority
800800
rules_mismatch = [
801801
{"event_category": "RAS", "message": "ACA Error", "new_priority": "WARNING"},
802802
]
803-
assert analyzer.resolve_priority(regex_obj, rules_mismatch) == EventPriority.ERROR
803+
assert analyzer._resolve_priority(regex_obj, rules_mismatch) == EventPriority.ERROR
804804

805805

806806
def test_resolve_priority_match_all_matches_any_regex(system_info):
@@ -818,7 +818,7 @@ def test_resolve_priority_match_all_matches_any_regex(system_info):
818818
event_category=EventCategory.SW_DRIVER,
819819
),
820820
]:
821-
result = analyzer.resolve_priority(
821+
result = analyzer._resolve_priority(
822822
regex_obj, [{"match_all": True, "new_priority": "WARNING"}]
823823
)
824824
assert (
@@ -836,7 +836,7 @@ def test_resolve_priority_match_all_ignores_non_matching_filters(system_info):
836836
)
837837
# event_category is RAS, but filter says SW_DRIVER — would normally NOT match.
838838
# match_all=True should bypass this check and still apply the rule.
839-
result = analyzer.resolve_priority(
839+
result = analyzer._resolve_priority(
840840
regex_obj,
841841
[{"match_all": True, "event_category": "SW_DRIVER", "new_priority": "WARNING"}],
842842
)
@@ -852,14 +852,14 @@ def test_resolve_priority_match_all_false_still_filters(system_info):
852852
event_category=EventCategory.RAS,
853853
)
854854
# match_all=False with a non-matching filter → returns original priority
855-
result = analyzer.resolve_priority(
855+
result = analyzer._resolve_priority(
856856
regex_obj,
857857
[{"match_all": False, "event_category": "SW_DRIVER", "new_priority": "WARNING"}],
858858
)
859859
assert result == EventPriority.ERROR
860860

861861
# match_all=False with a matching filter → should match
862-
result = analyzer.resolve_priority(
862+
result = analyzer._resolve_priority(
863863
regex_obj,
864864
[{"match_all": False, "event_category": "RAS", "new_priority": "WARNING"}],
865865
)
@@ -953,3 +953,24 @@ def test_custom_regex_with_multiline_pattern(system_info):
953953
assert len(res.events) >= 1
954954
start_events = [e for e in res.events if e.description == "Start Error Block"]
955955
assert len(start_events) == 1
956+
957+
958+
def test_priority_override_updates_unkown_dmesg_error(system_info):
959+
"""Updating an 'Unknown dmesg error', which is added after the base ErrorRegex list, successfully changes its priority"""
960+
dmesg_data = DmesgData(
961+
dmesg_content=("kern :err : 2024-10-07T10:17:15,145363-04:00 UNKOWN DMESG ERROR")
962+
)
963+
964+
analyzer = DmesgAnalyzer(system_info=system_info)
965+
res = analyzer.analyze_data(
966+
dmesg_data,
967+
args=DmesgAnalyzerArgs(
968+
check_unknown_dmesg_errors=True,
969+
priority_override_rules=[
970+
{"message": "Unknown dmesg error", "new_priority": "ERROR"},
971+
],
972+
),
973+
)
974+
975+
assert len(res.events) == 1
976+
assert res.events[0].priority == EventPriority.ERROR

0 commit comments

Comments
 (0)