diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json
index fb07441..86fdcfb 100644
--- a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.json
@@ -7,11 +7,12 @@
  "field_order": [
   "enabled",
   "column_break_title",
-  "section_break_file",
-  "file",
-  "file_type",
-  "column_break_gbap",
+  "files_section",
+  "files",
+  "is_multiple_files",
+  "section_break_txfs",
   "transaction_type",
+  "column_break_xusg",
   "country",
   "section_break_sobg",
   "company",
@@ -59,20 +60,12 @@
    "label": "Enabled"
   },
   {
-   "fieldname": "section_break_file",
-   "fieldtype": "Section Break"
-  },
-  {
-   "fieldname": "file",
-   "fieldtype": "Attach",
-   "label": "File",
-   "print_hide": 1,
-   "reqd": 1
-  },
-  {
-   "fieldname": "file_type",
-   "fieldtype": "Data",
-   "label": "File Type",
+   "allow_on_submit": 1,
+   "default": "0",
+   "fieldname": "is_multiple_files",
+   "fieldtype": "Check",
+   "in_standard_filter": 1,
+   "label": "Multiple Files",
    "read_only": 1
   },
   {
@@ -91,10 +84,6 @@
    "options": "India\nOther",
    "reqd": 1
   },
-  {
-   "fieldname": "column_break_gbap",
-   "fieldtype": "Column Break"
-  },
   {
    "fieldname": "company",
    "fieldtype": "Link",
@@ -115,6 +104,19 @@
    "label": "Party",
    "options": "party_type"
   },
+  {
+   "fieldname": "files_section",
+   "fieldtype": "Section Break",
+   "label": "Files"
+  },
+  {
+   "allow_on_submit": 1,
+   "fieldname": "files",
+   "fieldtype": "Table",
+   "label": "Files",
+   "options": "Parser Benchmark Dataset File",
+   "reqd": 1
+  },
   {
    "fieldname": "processing_section",
    "fieldtype": "Section Break",
@@ -181,7 +183,6 @@
    "label": "Google Gemini Flash-2.5"
   },
   {
-   "depends_on": "eval: doc.file_type === \"PDF\"",
    "fieldname": "pdf_processor_section",
    "fieldtype": "Section Break",
    "label": "PDF Processors"
@@ -273,6 +274,15 @@
   {
    "fieldname": "column_break_aoce",
    "fieldtype": "Column Break"
+  },
+  {
+   "fieldname": "section_break_txfs",
+   "fieldtype": "Section Break",
+   "label": "Transaction Details"
+  },
+  {
+   "fieldname": "column_break_xusg",
+   "fieldtype": "Column Break"
   }
  ],
  "index_web_pages_for_search": 1,
@@ -283,7 +293,7 @@
    "link_fieldname": "dataset"
   }
  ],
- "modified": "2026-03-27 10:32:46.620190",
+ "modified": "2026-03-30 15:01:03.977940",
  "modified_by": "Administrator",
  "module": "Parser Benchmark",
  "name": "Parser Benchmark Dataset",
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py
index 861a245..26feffa 100644
--- a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset/parser_benchmark_dataset.py
@@ -34,6 +34,9 @@ class ParserBenchmarkDataset(Document):
     if TYPE_CHECKING:
         from frappe.types import DF
 
+        from transaction_parser.parser_benchmark.doctype.parser_benchmark_dataset_file.parser_benchmark_dataset_file import (
+            ParserBenchmarkDatasetFile,
+        )
         from transaction_parser.parser_benchmark.doctype.parser_benchmark_expected_field.parser_benchmark_expected_field import (
             ParserBenchmarkExpectedField,
         )
@@ -46,10 +49,10 @@ class ParserBenchmarkDataset(Document):
         docling: DF.Check
         enabled: DF.Check
         expected_fields: DF.Table[ParserBenchmarkExpectedField]
-        file: DF.Attach
-        file_type: DF.Data | None
+        files: DF.Table[ParserBenchmarkDatasetFile]
         google_gemini_flash_25: DF.Check
         google_gemini_pro_25: DF.Check
+        is_multiple_files: DF.Check
         naming_series: DF.Literal["PAR-BM-DTS-"]
         ocrmypdf: DF.Check
         openai_gpt_4o: DF.Check
@@ -62,39 +65,27 @@ class ParserBenchmarkDataset(Document):
         transaction_type: DF.Literal["Sales Order", "Expense"]
     # end: auto-generated types
 
-    SUPPORTED_FILE_TYPES = ("PDF", "CSV", "XLSX", "XLS")
-
     def validate(self):
-        self.set_file_type()
-        self.validate_file_type()
+        self.validate_files()
         self.validate_selected_models()
-        self.validate_selected_processors()
         self.validate_expected_fields()
 
-    def set_file_type(self):
-        if self.file_type and not self.has_value_changed("file"):
-            return
+    def before_update_after_submit(self):
+        self.validate_files()
 
-        file_doc = frappe.get_last_doc("File", filters={"file_url": self.file})
-        self.file_type = file_doc.file_type
+    def validate_files(self):
+        """Set file_type for each row and auto-set is_multiple_files."""
+        for row in self.files:
+            if row.file and (not row.file_type or row.has_value_changed("file")):
+                file_doc = frappe.get_last_doc("File", filters={"file_url": row.file})
+                row.file_type = file_doc.file_type
 
-    def validate_file_type(self):
-        if self.file_type not in self.SUPPORTED_FILE_TYPES:
-            frappe.throw(_("Unsupported file type: {0}").format(self.file_type))
+        self.is_multiple_files = len(self.files) > 1
 
     def validate_selected_models(self):
         if not self.get_selected_models():
             frappe.throw(_("Please select at least one AI Model."))
 
-    def validate_selected_processors(self):
-        if self.file_type != "PDF":
-            for field in PDF_PROCESSOR_FIELD_MAP:
-                self.set(field, 0)
-            return
-
-        if not self.get_selected_processors():
-            frappe.throw(_("Please select at least one PDF Processor."))
-
     def validate_expected_fields(self):
         if not self.expected_fields:
             return
@@ -129,6 +120,18 @@ def get_selected_processors(self) -> list[str]:
             label for field, label in PDF_PROCESSOR_FIELD_MAP.items() if self.get(field)
         ]
 
+    def has_pdf_file(self) -> bool:
+        """Check if any file in the child table is a PDF."""
+        return any(row.file_type == "PDF" for row in self.files)
+
+    def get_file_docs(self) -> list:
+        """Return File documents for each row in the files child table."""
+        file_docs = []
+        for row in self.files:
+            file_doc = frappe.get_last_doc("File", filters={"file_url": row.file})
+            file_docs.append(file_doc)
+        return file_docs
+
 
 @frappe.whitelist()
 def run_benchmark(dataset_name: str):
@@ -156,7 +159,7 @@ def create_and_enqueue_benchmark_logs(dataset_name: str) -> list[str]:
     models = dataset.get_selected_models()
     processors = (
         (dataset.get_selected_processors() or [None])
-        if dataset.file_type == "PDF"
+        if dataset.has_pdf_file()
         else [None]
     )
 
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/__init__.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/__init__.py
new file mode 100644
index 0000000..c4fea77
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.json
new file mode 100644
index 0000000..5f54a45
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.json
@@ -0,0 +1,43 @@
+{
+ "actions": [],
+ "creation": "2026-03-30 00:00:00",
+ "doctype": "DocType",
+ "engine": "InnoDB",
+ "field_order": [
+  "file",
+  "column_break_yahx",
+  "file_type"
+ ],
+ "fields": [
+  {
+   "fieldname": "file",
+   "fieldtype": "Attach",
+   "in_list_view": 1,
+   "label": "File",
+   "reqd": 1
+  },
+  {
+   "fieldname": "file_type",
+   "fieldtype": "Data",
+   "in_list_view": 1,
+   "label": "File Type",
+   "read_only": 1
+  },
+  {
+   "fieldname": "column_break_yahx",
+   "fieldtype": "Column Break"
+  }
+ ],
+ "istable": 1,
+ "links": [],
+ "modified": "2026-03-30 15:01:27.752102",
+ "modified_by": "Administrator",
+ "module": "Parser Benchmark",
+ "name": "Parser Benchmark Dataset File",
+ "owner": "Administrator",
+ "permissions": [],
+ "row_format": "Dynamic",
+ "sort_field": "modified",
+ "sort_order": "DESC",
+ "states": []
+}
\ No newline at end of file
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.py
new file mode 100644
index 0000000..78b5e42
--- /dev/null
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_dataset_file/parser_benchmark_dataset_file.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2026, Resilient Tech and contributors
+# For license information, please see license.txt
+
+from frappe.model.document import Document
+
+
+class ParserBenchmarkDatasetFile(Document):
+    # begin: auto-generated types
+    # This code is auto-generated. Do not modify anything in this block.
+
+    from typing import TYPE_CHECKING
+
+    if TYPE_CHECKING:
+        from frappe.types import DF
+
+        file: DF.Attach
+        file_type: DF.Data | None
+        parent: DF.Data
+        parentfield: DF.Data
+        parenttype: DF.Data
+    # end: auto-generated types
+
+    pass
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json
index 9ba294b..0bbe3c6 100644
--- a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.json
@@ -26,7 +26,6 @@
   "column_break_ubhs",
   "file_parsing_tab",
   "pdf_processor",
-  "file_type",
   "column_break_file_metrics",
   "page_limit",
   "section_break_umzr",
@@ -107,7 +106,6 @@
    "read_only": 1
   },
   {
-   "depends_on": "eval: doc.file_type === \"PDF\"",
    "fieldname": "pdf_processor",
    "fieldtype": "Select",
    "label": "PDF Processor",
@@ -121,13 +119,7 @@
    "label": "Total Time (s)",
    "read_only": 1
   },
-  {
-   "fieldname": "file_type",
-   "fieldtype": "Data",
-   "is_virtual": 1,
-   "label": "File Type",
-   "read_only": 1
-  },
+
   {
    "fieldname": "file_parsing_tab",
    "fieldtype": "Tab Break",
diff --git a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py
index 0b9e29a..5cd5b7f 100644
--- a/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py
+++ b/transaction_parser/parser_benchmark/doctype/parser_benchmark_log/parser_benchmark_log.py
@@ -97,7 +97,3 @@ def party(self):
     @property
     def page_limit(self):
         return self.get_from_dataset("page_limit") or 0
-
-    @property
-    def file_type(self):
-        return self.get_from_dataset("file_type")
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js
index c22fab8..5c84d22 100644
--- a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.js
@@ -12,8 +12,6 @@ const AI_MODELS = [
 	"Google Gemini Flash-2.5",
 ];
 
-const FILE_TYPES = ["PDF", "CSV", "XLSX", "XLS"];
-
 const PDF_PROCESSORS = ["OCRMyPDF", "Docling"];
 
 const PARTY_TYPE_MAP = {
@@ -68,12 +66,6 @@ frappe.query_reports["Transaction Parser Accuracy Analysis"] = {
 			fieldtype: "Dynamic Link",
 			options: "party_type",
 		},
-		{
-			fieldname: "file_type",
-			label: __("File Type"),
-			fieldtype: "MultiSelectList",
-			get_data: (txt) => make_options(FILE_TYPES, txt),
-		},
 		{
 			fieldname: "ai_model",
 			label: __("AI Model"),
@@ -92,6 +84,12 @@ frappe.query_reports["Transaction Parser Accuracy Analysis"] = {
 			fieldtype: "Check",
 			default: 0,
 		},
+		{
+			fieldname: "is_multiple_files",
+			label: __("Multiple Files Only"),
+			fieldtype: "Check",
+			default: 0,
+		},
 	],
 };
 
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py
index 3d1e7ac..27283ad 100644
--- a/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_accuracy_analysis/transaction_parser_accuracy_analysis.py
@@ -34,13 +34,6 @@
     "Docling": 1,
 }
 
-_FILE_TYPE_ORDER = {
-    "PDF": 0,
-    "CSV": 1,
-    "XLSX": 2,
-    "XLS": 3,
-}
-
 
 class Col(StrEnum):
     """Column fieldnames — single source of truth for the report."""
@@ -50,7 +43,6 @@ class Col(StrEnum):
     ACCURACY_SCORE = "accuracy_score"
     AI_MODEL = "ai_model"
     PDF_PROCESSOR = "pdf_processor"
-    FILE_TYPE = "file_type"
     FILE_PARSE_TIME = "file_parse_time"
     FILE_PARSE_MEMORY = "file_parse_memory"
     AI_PARSE_TIME = "ai_parse_time"
@@ -155,12 +147,6 @@ def _get_columns(self):
                 "fieldtype": "Data",
                 "width": 110,
             },
-            {
-                "fieldname": Col.FILE_TYPE,
-                "label": _("File Type"),
-                "fieldtype": "Data",
-                "width": 90,
-            },
             {
                 "fieldname": Col.FILE_PARSE_TIME,
                 "label": _("File Parse (s)"),
@@ -254,19 +240,21 @@ def _fetch_logs(self):
                 log.currency,
                 log.dataset,
                 ds.party,
-                ds.file_type,
                 Coalesce(cust.customer_name, supp.supplier_name, ds.party).as_(
                     "party_name"
                 ),
             )
             .where(log.status == "Completed")
             .where(ds.docstatus == 1)
-            .orderby(ds.party, log.ai_model, ds.file_type)
+            .orderby(ds.party, log.ai_model)
         )
 
         if not self.filters.get("include_disabled_datasets"):
             query = query.where(ds.enabled == 1)
 
+        if self.filters.get("is_multiple_files"):
+            query = query.where(ds.is_multiple_files == 1)
+
         # exact-match filters
         for column, key in (
             (ds.company, "company"),
@@ -279,7 +267,6 @@ def _fetch_logs(self):
 
         # multi-select IN filters
         for column, key in (
-            (ds.file_type, "file_type"),
             (log.ai_model, "ai_model"),
             (log.pdf_processor, "pdf_processor"),
         ):
@@ -334,7 +321,6 @@ def _build_row(self, r, score_details_map):
             Col.ACCURACY_SCORE: r.accuracy_score,
             Col.AI_MODEL: r.ai_model,
             Col.PDF_PROCESSOR: r.pdf_processor,
-            Col.FILE_TYPE: r.file_type,
             Col.DATASET: r.dataset,
             Col.FILE_PARSE_TIME: r.file_parse_time,
             Col.FILE_PARSE_MEMORY: r.file_parse_memory,
@@ -362,7 +348,6 @@ def _aggregate_by_config(self):
                 row.get(Col.DATASET),
                 row.get(Col.AI_MODEL),
                 row.get(Col.PDF_PROCESSOR) or "",
-                row.get(Col.FILE_TYPE),
             )
             groups[key].append(row)
 
@@ -374,7 +359,6 @@ def _aggregate_by_config(self):
                 Col.PARTY_NAME: rows[0].get(Col.PARTY_NAME),
                 Col.AI_MODEL: rows[0].get(Col.AI_MODEL),
                 Col.PDF_PROCESSOR: rows[0].get(Col.PDF_PROCESSOR),
-                Col.FILE_TYPE: rows[0].get(Col.FILE_TYPE),
                 Col.CURRENCY: rows[0].get(Col.CURRENCY),
                 Col.RUN_COUNT: count,
             }
@@ -431,11 +415,10 @@ def _group_by_party(self):
 
     @staticmethod
     def _sort_key(row):
-        """Sort key for child rows: AI Model → PDF Processor → File Type."""
+        """Sort key for child rows: AI Model → PDF Processor."""
         return (
             _AI_MODEL_ORDER.get(row.get(Col.AI_MODEL), 99),
             _PDF_PROCESSOR_ORDER.get(row.get(Col.PDF_PROCESSOR), 99),
-            _FILE_TYPE_ORDER.get(row.get(Col.FILE_TYPE), 99),
         )
 
     def _group_row(self, party, rows):
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js
index 6061239..2c252e2 100644
--- a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.js
@@ -12,8 +12,6 @@ const AI_MODELS = [
 	"Google Gemini Flash-2.5",
 ];
 
-const FILE_TYPES = ["PDF", "CSV", "XLSX", "XLS"];
-
 const PDF_PROCESSORS = ["OCRMyPDF", "Docling"];
 
 const PARTY_TYPE_MAP = {
@@ -68,12 +66,6 @@ frappe.query_reports["Transaction Parser Version Comparison"] = {
 			fieldtype: "Dynamic Link",
 			options: "party_type",
 		},
-		{
-			fieldname: "file_type",
-			label: __("File Type"),
-			fieldtype: "MultiSelectList",
-			get_data: (txt) => make_options(FILE_TYPES, txt),
-		},
 		{
 			fieldname: "ai_model",
 			label: __("AI Model"),
@@ -92,6 +84,12 @@ frappe.query_reports["Transaction Parser Version Comparison"] = {
 			fieldtype: "Check",
 			default: 0,
 		},
+		{
+			fieldname: "is_multiple_files",
+			label: __("Multiple Files Only"),
+			fieldtype: "Check",
+			default: 0,
+		},
 	],
 };
 
diff --git a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py
index 6773fb8..e20aae9 100644
--- a/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py
+++ b/transaction_parser/parser_benchmark/report/transaction_parser_version_comparison/transaction_parser_version_comparison.py
@@ -30,13 +30,6 @@
     "Docling": 1,
 }
 
-_FILE_TYPE_ORDER = {
-    "PDF": 0,
-    "CSV": 1,
-    "XLSX": 2,
-    "XLS": 3,
-}
-
 
 class Col(StrEnum):
     """Column fieldnames — single source of truth for the report."""
@@ -46,7 +39,6 @@ class Col(StrEnum):
     DATASET = "dataset"
     AI_MODEL = "ai_model"
     PDF_PROCESSOR = "pdf_processor"
-    FILE_TYPE = "file_type"
     COMMIT_HASH = "commit_hash"
     COMMIT_MESSAGE = "commit_message"
     ACCURACY_SCORE = "accuracy_score"
@@ -116,12 +108,6 @@ def _get_columns(self):
                 "fieldtype": "Data",
                 "width": 110,
             },
-            {
-                "fieldname": Col.FILE_TYPE,
-                "label": _("File Type"),
-                "fieldtype": "Data",
-                "width": 90,
-            },
             {
                 "fieldname": Col.COMMIT_HASH,
                 "label": _("Commit"),
@@ -179,7 +165,6 @@ def _fetch_logs(self):
                 log.commit_hash,
                 log.commit_message,
                 ds.party,
-                ds.file_type,
                 Coalesce(cust.customer_name, supp.supplier_name, ds.party).as_(
                     "party_name"
                 ),
@@ -187,12 +172,15 @@ def _fetch_logs(self):
             .where(log.status == "Completed")
             .where(ds.docstatus == 1)
             .where(Coalesce(log.commit_hash, "") != "")
-            .orderby(ds.party, log.ai_model, ds.file_type)
+            .orderby(ds.party, log.ai_model)
         )
 
         if not self.filters.get("include_disabled_datasets"):
             query = query.where(ds.enabled == 1)
 
+        if self.filters.get("is_multiple_files"):
+            query = query.where(ds.is_multiple_files == 1)
+
         # exact-match filters
         for column, key in (
             (ds.company, "company"),
@@ -205,7 +193,6 @@ def _fetch_logs(self):
 
         # multi-select IN filters
         for column, key in (
-            (ds.file_type, "file_type"),
             (log.ai_model, "ai_model"),
             (log.pdf_processor, "pdf_processor"),
         ):
@@ -264,7 +251,6 @@ def _build_row(self, r, score_details_map):
             Col.DATASET: r.dataset,
             Col.AI_MODEL: r.ai_model,
             Col.PDF_PROCESSOR: r.pdf_processor,
-            Col.FILE_TYPE: r.file_type,
             Col.COMMIT_HASH: short_hash,
             Col.COMMIT_MESSAGE: commit_msg,
             Col.ACCURACY_SCORE: r.accuracy_score,
@@ -286,7 +272,6 @@ def _aggregate_by_config(self):
                 row.get(Col.DATASET),
                 row.get(Col.AI_MODEL),
                 row.get(Col.PDF_PROCESSOR) or "",
-                row.get(Col.FILE_TYPE),
                 row.get(Col.COMMIT_HASH),
             )
             groups[key].append(row)
@@ -300,7 +285,6 @@ def _aggregate_by_config(self):
                 Col.DATASET: rows[0].get(Col.DATASET),
                 Col.AI_MODEL: rows[0].get(Col.AI_MODEL),
                 Col.PDF_PROCESSOR: rows[0].get(Col.PDF_PROCESSOR),
-                Col.FILE_TYPE: rows[0].get(Col.FILE_TYPE),
                 Col.COMMIT_HASH: rows[0].get(Col.COMMIT_HASH),
                 Col.COMMIT_MESSAGE: rows[0].get(Col.COMMIT_MESSAGE),
                 Col.RUN_COUNT: count,
@@ -353,11 +337,10 @@ def _group_by_party(self):
 
     @staticmethod
     def _sort_key(row):
-        """Sort: AI Model → PDF Processor → File Type → Commit Hash."""
+        """Sort: AI Model → PDF Processor → Commit Hash."""
         return (
             _AI_MODEL_ORDER.get(row.get(Col.AI_MODEL), 99),
             _PDF_PROCESSOR_ORDER.get(row.get(Col.PDF_PROCESSOR), 99),
-            _FILE_TYPE_ORDER.get(row.get(Col.FILE_TYPE), 99),
             row.get(Col.COMMIT_HASH) or "",
         )
 
diff --git a/transaction_parser/parser_benchmark/runner.py b/transaction_parser/parser_benchmark/runner.py
index 716fffe..8f949d7 100644
--- a/transaction_parser/parser_benchmark/runner.py
+++ b/transaction_parser/parser_benchmark/runner.py
@@ -2,6 +2,7 @@
 from timeit import default_timer
 
 import frappe
+from frappe import _
 from frappe.core.doctype.file.file import File
 from frappe.utils import cint, flt
 
@@ -57,11 +58,11 @@ def run(self):
         total_start = default_timer()
 
         try:
-            file_doc: File = self._get_file_doc()
-            self.controller: Transaction = self._get_controller(file_doc)
+            file_docs: list[File] = self._get_file_docs()
+            self.controller: Transaction = self._get_controller(file_docs[0])
 
-            file_content = self._run_file_parsing(file_doc)
-            ai_content = self._run_ai_parsing(file_content, file_doc.name)
+            file_content = self._run_file_parsing(file_docs)
+            ai_content = self._run_ai_parsing(file_content, file_docs[0].name)
             self._calculate_cost()
             self._score_response(ai_content)
 
@@ -80,8 +81,11 @@ def run(self):
 
     # ── helpers ──────────────────────────────────────────────
 
-    def _get_file_doc(self):
-        return frappe.get_last_doc("File", filters={"file_url": self.dataset.file})
+    def _get_file_docs(self) -> list[File]:
+        file_docs = self.dataset.get_file_docs()
+        if not file_docs:
+            frappe.throw(_("No files in dataset {0}").format(self.dataset.name))
+        return file_docs
 
     def _get_controller(self, file_doc: File) -> Transaction:
         ds = self.dataset
@@ -103,11 +107,7 @@ def _get_cost_row(self):
 
     # ── step 1: file parsing ────────────────────────────────
 
-    def _run_file_parsing(self, file_doc: File) -> str:
-        pdf_processor = None
-        if self.log.file_type == "PDF" and self.log.pdf_processor:
-            pdf_processor = get_pdf_processor(self.log.pdf_processor)
-
+    def _run_file_parsing(self, file_docs: list[File]) -> str:
         # to prevent stopping an already running tracemalloc instance
         was_tracing = tracemalloc.is_tracing()
         if not was_tracing:
@@ -115,10 +115,23 @@ def _run_file_parsing(self, file_doc: File) -> str:
 
         start = default_timer()
         try:
-            content = FileProcessor().get_content(
-                file_doc,
-                self.dataset.page_limit or None,
-                pdf_processor,
+            contents = []
+            for file_doc in file_docs:
+                pdf_processor = None
+                if file_doc.file_type == "PDF" and self.log.pdf_processor:
+                    pdf_processor = get_pdf_processor(self.log.pdf_processor)
+
+                content = FileProcessor().get_content(
+                    file_doc,
+                    self.dataset.page_limit or None,
+                    pdf_processor,
+                )
+                contents.append(content)
+
+            combined = (
+                "\n\n--- Document Separator ---\n\n".join(contents)
+                if len(contents) > 1
+                else contents[0]
             )
         finally:
             self.log.file_parse_time = flt(default_timer() - start, self.precision)
@@ -129,8 +142,8 @@ def _run_file_parsing(self, file_doc: File) -> str:
                 peak / 1024 / 1024, self.precision
             )  # bytes → MB
 
-        self.log.file_content = content
-        return content
+        self.log.file_content = combined
+        return combined
 
     # ── step 2: AI parsing ──────────────────────────────────
 
diff --git a/transaction_parser/patches.txt b/transaction_parser/patches.txt
index d88f879..9107383 100644
--- a/transaction_parser/patches.txt
+++ b/transaction_parser/patches.txt
@@ -2,9 +2,11 @@
 # Patches added in this section will be executed before doctypes are migrated
 # Read docs to understand patches: https://frappeframework.com/docs/v14/user/en/database-migrations
 transaction_parser.patches.rename_gemini_models
+transaction_parser.patches.remove_dataset_file_field
 
 [post_model_sync]
 # Patches added in this section will be executed after doctypes are migrated
 execute:from transaction_parser.install import after_install; after_install() #2
 transaction_parser.patches.set_default_pdf_processor #1
 transaction_parser.patches.recalculate_accuracy
+transaction_parser.patches.populate_dataset_files_table
diff --git a/transaction_parser/patches/populate_dataset_files_table.py b/transaction_parser/patches/populate_dataset_files_table.py
new file mode 100644
index 0000000..07a415f
--- /dev/null
+++ b/transaction_parser/patches/populate_dataset_files_table.py
@@ -0,0 +1,52 @@
+"""
+Populate the new ``files`` child table on Parser Benchmark Dataset.
+
+After model_sync creates the ``Parser Benchmark Dataset File`` child table,
+this patch reads the File documents that were previously attached (by the
+``remove_dataset_file_field`` pre_model_sync patch) and inserts them as child
+rows so the new child-table based workflow works seamlessly.
+"""
+
+import frappe
+
+
+def execute():
+    datasets = frappe.get_all("Parser Benchmark Dataset", fields=["name"])
+
+    for ds in datasets:
+        # Skip if already has files in child table
+        if frappe.db.count("Parser Benchmark Dataset File", {"parent": ds.name}):
+            continue
+
+        # Find File docs attached to this dataset
+        files = frappe.get_all(
+            "File",
+            filters={
+                "attached_to_doctype": "Parser Benchmark Dataset",
+                "attached_to_name": ds.name,
+            },
+            fields=["file_url", "file_type"],
+        )
+
+        if not files:
+            continue
+
+        for idx, f in enumerate(files, 1):
+            child = frappe.new_doc("Parser Benchmark Dataset File")
+            child.update(
+                {
+                    "parent": ds.name,
+                    "parenttype": "Parser Benchmark Dataset",
+                    "parentfield": "files",
+                    "idx": idx,
+                    "file": f.file_url,
+                    "file_type": f.file_type or "",
+                }
+            )
+            child.db_insert()
+
+        # Update is_multiple_files flag
+        is_multiple = 1 if len(files) > 1 else 0
+        frappe.db.set_value(
+            "Parser Benchmark Dataset", ds.name, "is_multiple_files", is_multiple
+        )
diff --git a/transaction_parser/patches/remove_dataset_file_field.py b/transaction_parser/patches/remove_dataset_file_field.py
new file mode 100644
index 0000000..87e2b48
--- /dev/null
+++ b/transaction_parser/patches/remove_dataset_file_field.py
@@ -0,0 +1,73 @@
+"""
+Migrate `file` field data on Parser Benchmark Dataset to Frappe File attachments.
+
+Before the `file` column is dropped (pre_model_sync), ensure every Dataset that
+had a file URL stored in the `file` field has a corresponding File doc properly
+linked via `attached_to_doctype` / `attached_to_name`.
+"""
+
+import frappe
+
+
+def execute():
+    if not frappe.db.has_column("Parser Benchmark Dataset", "file"):
+        return
+
+    datasets = frappe.get_all(
+        "Parser Benchmark Dataset",
+        filters={"file": ("is", "set")},
+        fields=["name", "file"],
+    )
+
+    for ds in datasets:
+        file_url = ds.file
+        if not file_url:
+            continue
+
+        # Check if a properly-linked File doc already exists
+        existing = frappe.db.exists(
+            "File",
+            {
+                "file_url": file_url,
+                "attached_to_doctype": "Parser Benchmark Dataset",
+                "attached_to_name": ds.name,
+            },
+        )
+
+        if existing:
+            continue
+
+        # Try to find an unlinked File doc with the same URL and link it
+        unlinked = frappe.db.get_value(
+            "File",
+            {"file_url": file_url},
+            ["name", "attached_to_doctype", "attached_to_name"],
+            as_dict=True,
+        )
+
+        if unlinked:
+            if not unlinked.attached_to_doctype:
+                # Link the orphan File doc to this dataset
+                frappe.db.set_value(
+                    "File",
+                    unlinked.name,
+                    {
+                        "attached_to_doctype": "Parser Benchmark Dataset",
+                        "attached_to_name": ds.name,
+                    },
+                )
+            else:
+                # File is attached to something else — create a copy
+                _create_attachment(ds.name, file_url)
+        else:
+            # No File doc exists at all — create one
+            _create_attachment(ds.name, file_url)
+
+
+def _create_attachment(dataset_name: str, file_url: str):
+    """Create a new File doc attached to the given dataset."""
+    f = frappe.new_doc("File")
+    f.file_url = file_url
+    f.attached_to_doctype = "Parser Benchmark Dataset"
+    f.attached_to_name = dataset_name
+    f.insert(ignore_permissions=True)