diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py
index b7a126eaf35..9df3805444a 100644
--- a/backends/test/suite/flow.py
+++ b/backends/test/suite/flow.py
@@ -1,6 +1,6 @@
import logging
-from dataclasses import dataclass
+from dataclasses import dataclass, field
from typing import Callable
from executorch.backends.test.harness import Tester
@@ -35,6 +35,12 @@ class TestFlow:
is_delegated: bool = True
""" Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """
+ skip_patterns: list[str] = field(default_factory=lambda: [])
+ """ Tests with names containing any substrings in this list are skipped. """
+
+ def should_skip_test(self, test_name: str) -> bool:
+ return any(pattern in test_name for pattern in self.skip_patterns)
+
def all_flows() -> dict[str, TestFlow]:
flows = []
diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py
index fd956b64f05..8a532ff0003 100644
--- a/backends/test/suite/flows/coreml.py
+++ b/backends/test/suite/flows/coreml.py
@@ -19,6 +19,7 @@ def _create_coreml_flow(
CoreMLTester, minimum_deployment_target=minimum_deployment_target
),
quantize=quantize,
+ skip_patterns=["test_argmin", "test_argmax"],
)
diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py
index 2a8c4e506fa..a3a4fb55aba 100644
--- a/backends/test/suite/flows/vulkan.py
+++ b/backends/test/suite/flows/vulkan.py
@@ -20,6 +20,7 @@ def _create_vulkan_flow_base(
tester_factory=VulkanTester,
quantize=quantize_stage_factory is not None,
quantize_stage_factory=quantize_stage_factory,
+ skip_patterns=["float16", "float64"], # Not supported in swiftshader
)
diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py
index 37bf758fed0..73da8fba678 100644
--- a/backends/test/suite/generate_markdown_summary.py
+++ b/backends/test/suite/generate_markdown_summary.py
@@ -12,6 +12,25 @@
#
+def escape_for_markdown(text: str) -> str:
+ """
+ Modify a string to properly display in a markdown table cell.
+ """
+ if not text:
+ return text
+
+ # Replace newlines with
tags
+ escaped = text.replace("\n", "
")
+
+ # Escape backslashes.
+ escaped = escaped.replace("\\", "\\\\")
+
+ # Escape pipe characters that would break table structure
+ escaped = escaped.replace("|", "\\|")
+
+ return escaped
+
+
def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
# Print warning if exit code is non-zero
if exit_code != 0:
@@ -46,7 +65,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
for row in data_rows:
# Make a copy of the row to avoid modifying the original
- processed_row = row.copy()
+ processed_row = [escape_for_markdown(cell) for cell in row]
# Count results and collect failed tests
if result_column_index is not None and result_column_index < len(row):
@@ -96,7 +115,8 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
# Generate Failed Tests section
print("# Failed Tests\n")
if failed_tests:
- print("| " + " | ".join(header) + " |")
+ escaped_header = [escape_for_markdown(col) for col in header]
+ print("| " + " | ".join(escaped_header) + " |")
print("|" + "|".join(["---"] * len(header)) + "|")
for row in failed_tests:
print("| " + " | ".join(row) + " |")
diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py
index 65b546b0eb5..ea44275a463 100644
--- a/backends/test/suite/models/__init__.py
+++ b/backends/test/suite/models/__init__.py
@@ -52,6 +52,11 @@ def wrapped_test(self):
"use_dynamic_shapes": use_dynamic_shapes,
}
with TestContext(test_name, test_func.__name__, flow.name, params):
+ if flow.should_skip_test(test_name):
+ raise unittest.SkipTest(
+ f"Skipping test due to matching flow {flow.name} skip patterns"
+ )
+
test_func(self, flow, dtype, use_dynamic_shapes)
wrapped_test._name = test_func.__name__ # type: ignore
diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py
index 6ceb9086f71..9c550b3a49c 100644
--- a/backends/test/suite/operators/__init__.py
+++ b/backends/test/suite/operators/__init__.py
@@ -97,6 +97,11 @@ def _make_wrapped_test(
):
def wrapped_test(self):
with TestContext(test_name, test_base_name, flow.name, params):
+ if flow.should_skip_test(test_name):
+ raise unittest.SkipTest(
+ f"Skipping test due to matching flow {flow.name} skip patterns"
+ )
+
test_kwargs = copy.copy(params) or {}
test_kwargs["flow"] = flow
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
index ce8a48dcc12..cdf2ce870e1 100644
--- a/backends/test/suite/reporting.py
+++ b/backends/test/suite/reporting.py
@@ -45,6 +45,8 @@
]
)
+CSV_FIELD_NAMES.append("Error")
+
# Operators that are excluded from the counts returned by count_ops. These are used to
# exclude operatations that are not logically relevant or delegatable to backends.
@@ -365,6 +367,15 @@ def write_csv_header(output: TextIO):
def write_csv_row(record: TestCaseSummary, output: TextIO):
writer = csv.DictWriter(output, CSV_FIELD_NAMES)
+ # Truncate error message if it's too long, keeping first and last 200 characters
+ error_message = ""
+ if record.error is not None:
+ error_str = str(record.error)
+ if len(error_str) > 400:
+ error_message = error_str[:200] + "..." + error_str[-200:]
+ else:
+ error_message = error_str
+
row = {
"Test ID": record.name,
"Test Case": record.base_name,
@@ -373,6 +384,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO):
"Params": _serialize_params(record.params),
"Result": record.result.to_short_str(),
"Result Detail": record.result.to_detail_str(),
+ "Error": error_message,
"Delegated": "True" if record.is_delegated() else "False",
"Quantize Time (s)": (
f"{record.quantize_time.total_seconds():.3f}"