diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index b7a126eaf35..9df3805444a 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Callable from executorch.backends.test.harness import Tester @@ -35,6 +35,12 @@ class TestFlow: is_delegated: bool = True """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """ + skip_patterns: list[str] = field(default_factory=lambda: []) + """ Tests with names containing any substrings in this list are skipped. """ + + def should_skip_test(self, test_name: str) -> bool: + return any(pattern in test_name for pattern in self.skip_patterns) + def all_flows() -> dict[str, TestFlow]: flows = [] diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py index fd956b64f05..8a532ff0003 100644 --- a/backends/test/suite/flows/coreml.py +++ b/backends/test/suite/flows/coreml.py @@ -19,6 +19,7 @@ def _create_coreml_flow( CoreMLTester, minimum_deployment_target=minimum_deployment_target ), quantize=quantize, + skip_patterns=["test_argmin", "test_argmax"], ) diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py index 2a8c4e506fa..a3a4fb55aba 100644 --- a/backends/test/suite/flows/vulkan.py +++ b/backends/test/suite/flows/vulkan.py @@ -20,6 +20,7 @@ def _create_vulkan_flow_base( tester_factory=VulkanTester, quantize=quantize_stage_factory is not None, quantize_stage_factory=quantize_stage_factory, + skip_patterns=["float16", "float64"], # Not supported in swiftshader ) diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py index 37bf758fed0..73da8fba678 100644 --- a/backends/test/suite/generate_markdown_summary.py +++ b/backends/test/suite/generate_markdown_summary.py @@ -12,6 +12,25 @@ # +def escape_for_markdown(text: str) -> str: + """ + Modify a string to properly display in a markdown table cell. + """ + if not text: + return text + + # Replace newlines with
tags + escaped = text.replace("\n", "
") + + # Escape backslashes. + escaped = escaped.replace("\\", "\\\\") + + # Escape pipe characters that would break table structure + escaped = escaped.replace("|", "\\|") + + return escaped + + def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) # Print warning if exit code is non-zero if exit_code != 0: @@ -46,7 +65,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) for row in data_rows: # Make a copy of the row to avoid modifying the original - processed_row = row.copy() + processed_row = [escape_for_markdown(cell) for cell in row] # Count results and collect failed tests if result_column_index is not None and result_column_index < len(row): @@ -96,7 +115,8 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) # Generate Failed Tests section print("# Failed Tests\n") if failed_tests: - print("| " + " | ".join(header) + " |") + escaped_header = [escape_for_markdown(col) for col in header] + print("| " + " | ".join(escaped_header) + " |") print("|" + "|".join(["---"] * len(header)) + "|") for row in failed_tests: print("| " + " | ".join(row) + " |") diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 65b546b0eb5..ea44275a463 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -52,6 +52,11 @@ def wrapped_test(self): "use_dynamic_shapes": use_dynamic_shapes, } with TestContext(test_name, test_func.__name__, flow.name, params): + if flow.should_skip_test(test_name): + raise unittest.SkipTest( + f"Skipping test due to matching flow {flow.name} skip patterns" + ) + test_func(self, flow, dtype, use_dynamic_shapes) wrapped_test._name = test_func.__name__ # type: ignore diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 6ceb9086f71..9c550b3a49c 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -97,6 +97,11 @@ def _make_wrapped_test( ): def wrapped_test(self): with TestContext(test_name, test_base_name, flow.name, params): + if flow.should_skip_test(test_name): + raise unittest.SkipTest( + f"Skipping test due to matching flow {flow.name} skip patterns" + ) + test_kwargs = copy.copy(params) or {} test_kwargs["flow"] = flow diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index ce8a48dcc12..cdf2ce870e1 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -45,6 +45,8 @@ ] ) +CSV_FIELD_NAMES.append("Error") + # Operators that are excluded from the counts returned by count_ops. These are used to # exclude operatations that are not logically relevant or delegatable to backends. @@ -365,6 +367,15 @@ def write_csv_header(output: TextIO): def write_csv_row(record: TestCaseSummary, output: TextIO): writer = csv.DictWriter(output, CSV_FIELD_NAMES) + # Truncate error message if it's too long, keeping first and last 200 characters + error_message = "" + if record.error is not None: + error_str = str(record.error) + if len(error_str) > 400: + error_message = error_str[:200] + "..." + error_str[-200:] + else: + error_message = error_str + row = { "Test ID": record.name, "Test Case": record.base_name, @@ -373,6 +384,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO): "Params": _serialize_params(record.params), "Result": record.result.to_short_str(), "Result Detail": record.result.to_detail_str(), + "Error": error_message, "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( f"{record.quantize_time.total_seconds():.3f}"