diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py
index b7a126eaf35..9df3805444a 100644
--- a/backends/test/suite/flow.py
+++ b/backends/test/suite/flow.py
@@ -1,6 +1,6 @@
 import logging
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Callable
 
 from executorch.backends.test.harness import Tester
@@ -35,6 +35,12 @@ class TestFlow:
     is_delegated: bool = True
     """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """
 
+    skip_patterns: list[str] = field(default_factory=lambda: [])
+    """ Tests with names containing any substrings in this list are skipped. """
+
+    def should_skip_test(self, test_name: str) -> bool:
+        return any(pattern in test_name for pattern in self.skip_patterns)
+
 
 def all_flows() -> dict[str, TestFlow]:
     flows = []
diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py
index fd956b64f05..8a532ff0003 100644
--- a/backends/test/suite/flows/coreml.py
+++ b/backends/test/suite/flows/coreml.py
@@ -19,6 +19,7 @@ def _create_coreml_flow(
             CoreMLTester, minimum_deployment_target=minimum_deployment_target
         ),
         quantize=quantize,
+        skip_patterns=["test_argmin", "test_argmax"],
     )
 
 
diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py
index 2a8c4e506fa..a3a4fb55aba 100644
--- a/backends/test/suite/flows/vulkan.py
+++ b/backends/test/suite/flows/vulkan.py
@@ -20,6 +20,7 @@ def _create_vulkan_flow_base(
         tester_factory=VulkanTester,
         quantize=quantize_stage_factory is not None,
         quantize_stage_factory=quantize_stage_factory,
+        skip_patterns=["float16", "float64"],  # Not supported in swiftshader
     )
 
 
diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py
index 37bf758fed0..73da8fba678 100644
--- a/backends/test/suite/generate_markdown_summary.py
+++ b/backends/test/suite/generate_markdown_summary.py
@@ -12,6 +12,25 @@
 #
 
 
+def escape_for_markdown(text: str) -> str:
+    """
+    Modify a string to properly display in a markdown table cell.
+    """
+    if not text:
+        return text
+
+    # Replace newlines with <br /> tags
+    escaped = text.replace("\n", "<br />")
+
+    # Escape backslashes.
+    escaped = escaped.replace("\\", "\\\\")
+
+    # Escape pipe characters that would break table structure
+    escaped = escaped.replace("|", "\\|")
+
+    return escaped
+
+
 def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     # Print warning if exit code is non-zero
     if exit_code != 0:
@@ -46,7 +65,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
 
     for row in data_rows:
         # Make a copy of the row to avoid modifying the original
-        processed_row = row.copy()
+        processed_row = [escape_for_markdown(cell) for cell in row]
 
         # Count results and collect failed tests
         if result_column_index is not None and result_column_index < len(row):
@@ -96,7 +115,8 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     # Generate Failed Tests section
     print("# Failed Tests\n")
     if failed_tests:
-        print("| " + " | ".join(header) + " |")
+        escaped_header = [escape_for_markdown(col) for col in header]
+        print("| " + " | ".join(escaped_header) + " |")
         print("|" + "|".join(["---"] * len(header)) + "|")
         for row in failed_tests:
             print("| " + " | ".join(row) + " |")
diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py
index 65b546b0eb5..ea44275a463 100644
--- a/backends/test/suite/models/__init__.py
+++ b/backends/test/suite/models/__init__.py
@@ -52,6 +52,11 @@ def wrapped_test(self):
             "use_dynamic_shapes": use_dynamic_shapes,
         }
         with TestContext(test_name, test_func.__name__, flow.name, params):
+            if flow.should_skip_test(test_name):
+                raise unittest.SkipTest(
+                    f"Skipping test due to matching flow {flow.name} skip patterns"
+                )
+
             test_func(self, flow, dtype, use_dynamic_shapes)
 
     wrapped_test._name = test_func.__name__  # type: ignore
diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py
index 6ceb9086f71..9c550b3a49c 100644
--- a/backends/test/suite/operators/__init__.py
+++ b/backends/test/suite/operators/__init__.py
@@ -97,6 +97,11 @@ def _make_wrapped_test(
 ):
     def wrapped_test(self):
         with TestContext(test_name, test_base_name, flow.name, params):
+            if flow.should_skip_test(test_name):
+                raise unittest.SkipTest(
+                    f"Skipping test due to matching flow {flow.name} skip patterns"
+                )
+
             test_kwargs = copy.copy(params) or {}
             test_kwargs["flow"] = flow
 
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
index ce8a48dcc12..cdf2ce870e1 100644
--- a/backends/test/suite/reporting.py
+++ b/backends/test/suite/reporting.py
@@ -45,6 +45,8 @@
         ]
     )
 
+CSV_FIELD_NAMES.append("Error")
+
 
 # Operators that are excluded from the counts returned by count_ops. These are used to
 # exclude operatations that are not logically relevant or delegatable to backends.
@@ -365,6 +367,15 @@ def write_csv_header(output: TextIO):
 def write_csv_row(record: TestCaseSummary, output: TextIO):
     writer = csv.DictWriter(output, CSV_FIELD_NAMES)
 
+    # Truncate error message if it's too long, keeping first and last 200 characters
+    error_message = ""
+    if record.error is not None:
+        error_str = str(record.error)
+        if len(error_str) > 400:
+            error_message = error_str[:200] + "..." + error_str[-200:]
+        else:
+            error_message = error_str
+
     row = {
         "Test ID": record.name,
         "Test Case": record.base_name,
@@ -373,6 +384,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO):
         "Params": _serialize_params(record.params),
         "Result": record.result.to_short_str(),
         "Result Detail": record.result.to_detail_str(),
+        "Error": error_message,
         "Delegated": "True" if record.is_delegated() else "False",
         "Quantize Time (s)": (
             f"{record.quantize_time.total_seconds():.3f}"