Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
105 commits
Select commit Hold shift + click to select a range
2d8dfd2
feat: add Parser Benchmark module
Abdeali099 Mar 16, 2026
23983f7
feat: add Parser Benchmark module with dataset, log, and settings fun…
Abdeali099 Mar 16, 2026
f41abe5
chore: Minor changes
Abdeali099 Mar 16, 2026
0450241
chore: update Parser Benchmark Dataset naming
Abdeali099 Mar 17, 2026
66cdbdc
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
f3fb150
fix: update autoname format and add naming series field in parser ben…
Abdeali099 Mar 17, 2026
5bf6d8a
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
957eaff
fix: replace resource usage tracking with tracemalloc for memory meas…
Abdeali099 Mar 17, 2026
c5e309b
fix: add parse_with_response method to AIParser for enhanced diagnost…
Abdeali099 Mar 17, 2026
7ecb89c
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
402325f
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
b592529
fix: add 'is_created_by_benchmark' field to custom fields for Sales O…
Abdeali099 Mar 17, 2026
fa1446f
fix: ensure file parsing time is logged correctly with error handling
Abdeali099 Mar 17, 2026
2edea6e
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
060905b
fix: streamline document creation process in BenchmarkRunner and Tran…
Abdeali099 Mar 17, 2026
83dbb2e
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
2f8a2bb
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
1bc6ceb
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
58f4ff9
fix: some fixes in parser benchmark dataset
Abdeali099 Mar 17, 2026
d5ca47b
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
164f518
refactor: enhance parser benchmark dataset and log with currency and …
Abdeali099 Mar 17, 2026
a35bcaf
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
a35f60e
refactor: refactor parser benchmark settings and token cost configura…
Abdeali099 Mar 17, 2026
5834e2e
fix: update patch number
Abdeali099 Mar 17, 2026
b1e4672
Merge branch 'use-docling-to-extract-data' into test-suite
Abdeali099 Mar 17, 2026
b59ab2f
refactor: simplify AIParser's parse method and remove parse_with_resp…
Abdeali099 Mar 17, 2026
3beb555
refactor: enhance BenchmarkRunner for improved precision and clarity …
Abdeali099 Mar 17, 2026
32d3d1b
fix: update modified timestamps and correct field types in benchmark …
Abdeali099 Mar 18, 2026
0f7eb14
revert: remove redundant benchmark fields from CUSTOM_FIELDS and upda…
Abdeali099 Mar 18, 2026
22a12ed
refactor: remove unused fields from ParserBenchmarkLog and update fie…
Abdeali099 Mar 18, 2026
e234932
refactor: update field types and modify structure in benchmark datase…
Abdeali099 Mar 18, 2026
ca02fd7
refactor: reorder fields in parser benchmark dataset and add column b…
Abdeali099 Mar 18, 2026
a0e9f83
refactor: update AI model options and modify timestamps in benchmark …
Abdeali099 Mar 18, 2026
5344529
refactor: enhance parser benchmark dataset structure with additional …
Abdeali099 Mar 18, 2026
c5a601d
refactor: remove unnecessary label from "Run Benchmark" button in Par…
Abdeali099 Mar 18, 2026
a05f8d5
refactor: add standard filter options for status, transaction type, a…
Abdeali099 Mar 18, 2026
316180a
refactor: update AI model and PDF processor fields to use select opti…
Abdeali099 Mar 18, 2026
4dcc6f2
refactor: update parser benchmark settings and dataset structure by r…
Abdeali099 Mar 18, 2026
20943df
refactor: enhance parser benchmark functionality with scheduling and …
Abdeali099 Mar 18, 2026
02b2744
refactor: add party type and party fields to parser benchmark dataset…
Abdeali099 Mar 18, 2026
2dcfb60
refactor: update parser benchmark dataset and log to include party ty…
Abdeali099 Mar 18, 2026
cd4922a
chore: minor fixes
Abdeali099 Mar 18, 2026
db21a51
refactor: update benchmark log to handle dynamic processor selection …
Abdeali099 Mar 18, 2026
a6b54a8
chore: minor fixes
Abdeali099 Mar 18, 2026
b0a2632
refactor: add file_type field to parser benchmark dataset and log, up…
Abdeali099 Mar 18, 2026
e6a749b
refactor: update parser benchmark dataset and log to include file_typ…
Abdeali099 Mar 18, 2026
595267a
refactor: enhance processor validation and error handling in benchmar…
Abdeali099 Mar 18, 2026
5c3abb8
refactor: remove title field from ParserBenchmarkDataset class and ad…
Abdeali099 Mar 19, 2026
1a5f1d3
refactor: update scheduler_events to use cron format for daily benchm…
Abdeali099 Mar 19, 2026
1877ef7
chore: temp party patch
Abdeali099 Mar 20, 2026
6989ec1
refactor: enhance parser benchmark dataset and log with accuracy scor…
Abdeali099 Mar 20, 2026
651c3a0
chore: minor changes
Abdeali099 Mar 20, 2026
4b91704
chore: minor changes
Abdeali099 Mar 20, 2026
3818205
refactor: remove response_hash from ParserBenchmarkLog and related sc…
Abdeali099 Mar 21, 2026
8c77061
fix: correct default distance value in score_response function
Abdeali099 Mar 21, 2026
c2e7bb3
refactor: enhance mismatch formatting to handle unhandled DeepDiff ch…
Abdeali099 Mar 21, 2026
e0e8558
refactor: add validation for expected result to ensure valid JSON format
Abdeali099 Mar 21, 2026
8e90ef7
refactor: update conditions and add fields for Parser Benchmark Datas…
Abdeali099 Mar 21, 2026
019f946
refactor: update validate_selected_processors to reset processor fiel…
Abdeali099 Mar 21, 2026
ef37aaf
refactor: streamline validation logic by consolidating expected resul…
Abdeali099 Mar 24, 2026
1198a3b
refactor: add dataset check in _get_dataset and enhance _normalize_em…
Abdeali099 Mar 24, 2026
f32b985
refactor: add currency field to log creation for better financial tra…
Abdeali099 Mar 24, 2026
2da7ff2
feat: add Transaction Parser Accuracy Analysis report files and imple…
Abdeali099 Mar 24, 2026
1cd7f65
feat: enhance Transaction Parser Accuracy Analysis with new filters a…
Abdeali099 Mar 25, 2026
f5b048b
feat: enhance Accuracy Analysis Report with new metrics and aggregati…
Abdeali099 Mar 25, 2026
299e95e
fix: remove ignore_order parameter from DeepDiff in score_response fu…
Abdeali099 Mar 25, 2026
e870e00
refactor: optimize SQL query construction in AccuracyAnalysisReport
Abdeali099 Mar 25, 2026
0f1fe55
fix: update default value for enabled field in Parser Benchmark Settings
Abdeali099 Mar 25, 2026
a842310
feat: add commit information fields to Parser Benchmark Log and Settings
Abdeali099 Mar 25, 2026
87403dc
refactor: rename and update log creation functions for benchmark proc…
Abdeali099 Mar 25, 2026
f7cf22e
feat: enhance score_response function to accept significant_digits pa…
Abdeali099 Mar 25, 2026
e7781d9
chore: linter fix with db-commit msg
Abdeali099 Mar 26, 2026
bd7b3a3
fix: add error handling for benchmark log enqueueing and improve trac…
Abdeali099 Mar 26, 2026
41f0d23
feat: proper accuracy measuring
Abdeali099 Mar 27, 2026
32810fa
refactor: add validation for key weights to prevent duplicates
Abdeali099 Mar 30, 2026
61964e7
refactor: replace hardcoded DOCTYPE with constant and improve Benchma…
Abdeali099 Mar 30, 2026
70a54ff
feat: add Transaction Parser Version Comparison report
Abdeali099 Mar 30, 2026
cd3affb
feat: add option to include disabled datasets in accuracy analysis an…
Abdeali099 Mar 30, 2026
afd74c7
feat: implement multiple file support in Parser Benchmark Dataset and…
Abdeali099 Mar 30, 2026
cbefeb9
fix: add recalculation of accuracy scores for completed Parser Benchm…
Abdeali099 Mar 30, 2026
80c5e60
Merge branch 'test-suite' into multifiles-dataset
Abdeali099 Mar 30, 2026
b891982
fix: filter completed logs by document status in accuracy analysis an…
Abdeali099 Mar 30, 2026
2ec73f4
Merge branch 'test-suite' into multifiles-dataset
Abdeali099 Mar 30, 2026
494aa65
refactor: enhance Parser Benchmark Dataset with multiple file support…
Abdeali099 Mar 30, 2026
51ccd69
Merge pull request #81 from resilient-tech/multifiles-dataset
Abdeali099 Mar 30, 2026
7471bc5
fix: add file type validation for Parser Benchmark Dataset
Abdeali099 Mar 31, 2026
ed3e5f5
feat: enhance transaction parsing to support multiple file attachments
karm1000 Mar 31, 2026
edb56d0
feat: add log names column and formatter to accuracy analysis and ver…
Abdeali099 Mar 31, 2026
995852f
feat: refactor file processing to support multiple attachments and im…
karm1000 Mar 31, 2026
1906196
feat: enhance AI parsing and prompts with company details integration
Abdeali099 Apr 1, 2026
df95e1d
fix: enhance prompts with company details and improve email account m…
Abdeali099 Apr 1, 2026
5d45c3d
fix: ensure company address is checked before accessing display prope…
Abdeali099 Apr 1, 2026
6480774
fix: ensure company address is only appended if it is not empty in th…
Abdeali099 Apr 1, 2026
a7d0194
feat: enhance response merging and attachment processing for improved…
karm1000 Apr 1, 2026
b9a2eb1
Merge pull request #83 from resilient-tech/enhance-prompt-with-compan…
Abdeali099 Apr 1, 2026
bac88e8
Merge branch 'multiple-files-attachments' into test-suite
karm1000 Apr 1, 2026
6b760f0
fix: update file name reference and enhance AI model options in settings
karm1000 Apr 1, 2026
d62f670
refactor: add seller document type and update user prompt role hint l…
Abdeali099 Apr 1, 2026
a596978
refactor: enhance file handling and AI parsing logic to support multi…
Abdeali099 Apr 1, 2026
21ec294
refactor: patch for process one document per communication
Abdeali099 Apr 1, 2026
f35a05d
refactor: add dependency for process_one_document_per_communication s…
Abdeali099 Apr 1, 2026
7a2e974
fix: correct file_url to file_urls in parse function and handle None …
Abdeali099 Apr 1, 2026
7110c5e
feat: implement custom exception handling for file processing errors …
karm1000 Apr 3, 2026
2268689
Merge branch 'multiple-files-attachments' into test-suite
karm1000 Apr 3, 2026
84d8258
fix: update parameter name in _create_log_entry method for consistency
karm1000 Apr 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion transaction_parser/modules.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
Transaction Parser
Transaction Parser
Parser Benchmark
Empty file.
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright (c) 2026, Resilient Tech and contributors
// For license information, please see license.txt

frappe.ui.form.on("Parser Benchmark Dataset", {
refresh(frm) {
if (!frm.is_new()) {
frm.add_custom_button(__("Run Benchmark"), () => run_benchmark(frm), __("Actions"));
}

set_pdf_processor_options(frm);
},
});

function run_benchmark(frm) {
frappe.call({
method: "transaction_parser.parser_benchmark.doctype.parser_benchmark_dataset.parser_benchmark_dataset.run_benchmark",
args: { dataset_name: frm.doc.name },
freeze: true,
freeze_message: __("Queuing benchmark..."),
callback(r) {
if (r.message) {
frappe.msgprint({
message: __("Benchmark queued. Redirecting to log..."),
alert: true,
});
frappe.set_route("Form", "Parser Benchmark Log", r.message);
}
},
});
}

function set_pdf_processor_options(frm) {
frappe.call({
method: "transaction_parser.transaction_parser.doctype.transaction_parser_settings.transaction_parser_settings.get_pdf_processors",
callback(r) {
if (r.message) {
frm.set_df_property("pdf_processor", "options", r.message);
}
},
});
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{
"actions": [],
"autoname": "format:PBD-{#####}",
"creation": "2026-03-16 00:00:00.000000",
"doctype": "DocType",
"engine": "InnoDB",
"field_order": [
"title",
"column_break_title",
"enabled",
"section_break_file",
"file",
"column_break_file",
"transaction_type",
"country",
"processing_section",
"ai_model",
"pdf_processor",
"column_break_processing",
"company",
"page_limit"
],
"fields": [
{
"fieldname": "title",
"fieldtype": "Data",
"label": "Title",
"reqd": 1
},
{
"fieldname": "column_break_title",
"fieldtype": "Column Break"
},
{
"default": "1",
"fieldname": "enabled",
"fieldtype": "Check",
"label": "Enabled"
},
{
"fieldname": "section_break_file",
"fieldtype": "Section Break"
},
{
"fieldname": "file",
"fieldtype": "Attach",
"label": "File",
"reqd": 1
},
{
"fieldname": "column_break_file",
"fieldtype": "Column Break"
},
{
"fieldname": "transaction_type",
"fieldtype": "Select",
"in_list_view": 1,
"label": "Transaction Type",
"options": "Sales Order\nExpense",
"reqd": 1
},
{
"default": "Other",
"fieldname": "country",
"fieldtype": "Select",
"label": "Country",
"options": "India\nOther",
"reqd": 1
},
{
"fieldname": "processing_section",
"fieldtype": "Section Break",
"label": "Processing Configuration"
},
{
"fieldname": "ai_model",
"fieldtype": "Select",
"in_list_view": 1,
"label": "AI Model",
"options": "DeepSeek Chat\nDeepSeek Reasoner\nOpenAI gpt-4o\nOpenAI gpt-4o-mini\nOpenAI gpt-5\nOpenAI gpt-5-mini\nGoogle Gemini Pro\nGoogle Gemini Flash",
"reqd": 1
},
{
"fieldname": "pdf_processor",
"fieldtype": "Select",
"label": "PDF Processor",
"options": "OCRMyPDF\nDocling"
},
{
"fieldname": "column_break_processing",
"fieldtype": "Column Break"
},
{
"fieldname": "company",
"fieldtype": "Link",
"label": "Company",
"options": "Company",
"reqd": 1
},
{
"fieldname": "page_limit",
"fieldtype": "Int",
"label": "Page Limit"
}
],
"index_web_pages_for_search": 1,
"links": [],
"modified": "2026-03-16 00:00:00.000000",
"modified_by": "Administrator",
"module": "Parser Benchmark",
"name": "Parser Benchmark Dataset",
"naming_rule": "Expression",
"owner": "Administrator",
"permissions": [
{
"create": 1,
"delete": 1,
"email": 1,
"export": 1,
"print": 1,
"read": 1,
"report": 1,
"role": "System Manager",
"share": 1,
"write": 1
}
],
"sort_field": "modified",
"sort_order": "DESC",
"states": [],
"title_field": "title",
"show_title_field_in_link": 1
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) 2026, Resilient Tech and contributors
# For license information, please see license.txt

import frappe
from frappe.model.document import Document


class ParserBenchmarkDataset(Document):
# begin: auto-generated types
# This code is auto-generated. Do not modify anything in this block.

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from frappe.types import DF

ai_model: DF.Select
company: DF.Link
country: DF.Select
enabled: DF.Check
file: DF.Attach
page_limit: DF.Int
pdf_processor: DF.Select | None
title: DF.Data
transaction_type: DF.Select
# end: auto-generated types

pass


@frappe.whitelist()
def run_benchmark(dataset_name: str):
"""Create a Benchmark Log and enqueue the benchmark run."""
frappe.has_permission("Parser Benchmark Dataset", "write", throw=True)

log = frappe.get_doc(
{
"doctype": "Parser Benchmark Log",
"dataset": dataset_name,
"status": "Queued",
}
).insert(ignore_permissions=True)

frappe.db.commit()

frappe.enqueue(
_run_benchmark,
log_name=log.name,
queue="long",
now=frappe.conf.developer_mode,
)

return log.name


def _run_benchmark(log_name: str):
from transaction_parser.parser_benchmark.runner import BenchmarkRunner

BenchmarkRunner(log_name).run()
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Copyright (c) 2026, Resilient Tech and contributors
// For license information, please see license.txt

// frappe.ui.form.on("Parser Benchmark Log", {
// refresh(frm) {

// },
// });
Loading
Loading