Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions transaction_parser/parser_benchmark/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def _run_ai_parsing(self, file_content: str, file_name: str) -> dict:
document_schema=self.controller.get_schema(),
document_data=file_content,
file_doc_name=file_name,
company=self.dataset.company,
)
self.log.ai_parse_time = flt(default_timer() - start, self.precision)

Expand Down
11 changes: 11 additions & 0 deletions transaction_parser/public/js/transaction_parser_dialog.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,17 @@ async function create_transaction_parser_dialog(transaction_type, list_view) {
default: get_default_country(),
reqd: 1,
},
{
fieldtype: "Section Break",
},
{
fieldname: "company",
label: __("Company"),
fieldtype: "Link",
options: "Company",
default: frappe.defaults.get_user_default("Company"),
reqd: 1,
},
],
primary_action_label: __("Submit"),
primary_action(values) {
Expand Down
3 changes: 2 additions & 1 deletion transaction_parser/transaction_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@


@frappe.whitelist()
def parse(transaction, country, file_url, ai_model=None, page_limit=None):
def parse(transaction, country, file_url, ai_model=None, page_limit=None, company=None):
is_enabled()

frappe.has_permission(TRANSACTION_MAP[transaction], "create", throw=True)
Expand All @@ -27,6 +27,7 @@ def parse(transaction, country, file_url, ai_model=None, page_limit=None):
file_url=cstr(file_url),
ai_model=cstr(ai_model),
page_limit=cint(page_limit),
company=cstr(company) if company else None,
queue="long",
now=frappe.conf.developer_mode,
)
Expand Down
31 changes: 28 additions & 3 deletions transaction_parser/transaction_parser/ai_integration/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,19 +36,27 @@ def parse(
document_schema: dict,
document_data: str,
file_doc_name: str | None = None,
company: str | None = None,
) -> dict:
messages = self._build_messages(document_type, document_schema, document_data)
messages = self._build_messages(
document_type, document_schema, document_data, company
)
self.ai_response = self.send_message(
messages=messages, file_doc_name=file_doc_name
)
return self.get_content(self.ai_response)

def _build_messages(
self, document_type: str, document_schema: dict, document_data: str
self,
document_type: str,
document_schema: dict,
document_data: str,
company: str | None = None,
) -> tuple:
"""Build the message structure for AI API call."""
company_info = self._get_company_info(company) if company else ""
system_prompt = get_system_prompt(document_schema)
user_prompt = get_user_prompt(document_type, document_data)
user_prompt = get_user_prompt(document_type, document_data, company_info)

return (
{
Expand All @@ -61,6 +69,23 @@ def _build_messages(
},
)

@staticmethod
def _get_company_info(company: str) -> str:
"""Build a company context string with name and address if available."""
from frappe.contacts.doctype.address.address import get_company_address
from frappe.utils import strip_html

info = f"Company: {company}"

address = get_company_address(company)
if address and address.company_address_display:
address_text = strip_html(address.company_address_display).strip()

if address_text:
info += f"\nLocated at: {address_text}"

return info

def send_message(self, messages: tuple, file_doc_name: str | None = None) -> dict:
"""Send messages to AI API and handle the response."""
log = self._create_log_entry(file_doc_name)
Comment on lines 71 to 91
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 company_address_display may contain unresolved HTML entities

strip_html removes HTML tags, but it does not decode HTML entities such as &,  , ', etc. If the address stored in Frappe contains these entities (which is common for addresses stored as rich text), they will appear verbatim in the AI prompt, potentially confusing the model.

Consider adding an HTML entity decode step:

import html

address_text = html.unescape(strip_html(address.company_address_display).strip())

Expand Down
21 changes: 19 additions & 2 deletions transaction_parser/transaction_parser/ai_integration/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,27 @@ def get_system_prompt(document_schema: dict) -> str:
{document_schema}"""


def get_user_prompt(document_type: str, document_data: str) -> str:
def get_user_prompt(
document_type: str, document_data: str, company_info: str = ""
) -> str:
input_doc_type = INPUT_DOCUMENTS.get(document_type, "document")

return f"""Generate {document_type} for given {input_doc_type} according to above JSON schema.
company_context = ""
if company_info:
if document_type == "Sales Order":
role_hint = "Use this to correctly identify the company as the seller/vendor and the other party as the customer/buyer."
else:
role_hint = "Use this to correctly identify the company as the buyer/recipient and the other party as the vendor/supplier."
Comment on lines +44 to +47
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Role hint only checks for "Sales Order" — may be incomplete for future types

The role hint currently branches on exactly "Sales Order" and defaults to "buyer/recipient" for everything else. If a new selling-side document type is added in the future (e.g. a "Quotation"), it would silently get the wrong role hint.

Consider aligning this with the INPUT_DOCUMENTS dict so the selling-side types are enumerated in one place:

SELLER_DOCUMENT_TYPES = {"Sales Order"}

...

if document_type in SELLER_DOCUMENT_TYPES:
    role_hint = "Use this to correctly identify the company as the seller/vendor and the other party as the customer/buyer."
else:
    role_hint = "Use this to correctly identify the company as the buyer/recipient and the other party as the vendor/supplier."


company_context = f"""

This {input_doc_type} is received by the following company:
{company_info}

{role_hint}
"""

return f"""Generate {document_type} for the given {input_doc_type} according to above JSON schema.{company_context}
Document data is given below:
{document_data}"""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def _parse_file_content(
document_schema=schema,
document_data=content,
file_doc_name=self.file.name,
company=self.company,
)

###################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@
"sort_field": "modified",
"sort_order": "DESC",
"states": []
}
}
25 changes: 13 additions & 12 deletions transaction_parser/transaction_parser/overrides/communication.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ def on_update(doc, method=None):
if not (settings.enabled and settings.parse_incoming_emails):
return

matched_account = next(
(
row
for row in settings.incoming_email_accounts
if row.to_email in doc.recipients
),
None,
)

if not matched_account:
return
Comment on lines +16 to +26
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Behavioral change: matched_account now gates party-email processing

Moving the matched_account lookup and early-return to the top of on_update means that emails from known party senders are no longer processed unless the destination address also matches an entry in incoming_email_accounts. Previously the parse_party_emails path ran independently of matched_account.

This is a meaningful breaking change for any installation that has parse_party_emails configured with party-email entries but without a corresponding incoming_email_accounts row covering the same destination address. Those emails will now silently be dropped.

If the intent is intentional (i.e. you always need a matched account to obtain matched_account.company), consider adding a code comment explaining the dependency to prevent future confusion:

# matched_account must be resolved before any processing path so we can
# derive the correct company for both the general and party-email flows.
matched_account = next(
    (
        row
        for row in settings.incoming_email_accounts
        if row.to_email in doc.recipients
    ),
    None,
)

if not matched_account:
    return


if settings.parse_party_emails:
matched_party_config = next(
(row for row in settings.party_emails if row.party_email == doc.sender),
Expand Down Expand Up @@ -41,21 +53,10 @@ def on_update(doc, method=None):
settings,
default_user,
matched_party_config.party,
matched_account.company,
)
return

matched_account = next(
(
row
for row in settings.incoming_email_accounts
if row.to_email in doc.recipients
),
None,
)

if not matched_account:
return

# Attachments are not available when the Communication doc is created.
# Next time the doc is updated, we will check for attachments,
# and update the flag `is_processed_by_transaction_parser` accordingly.
Expand Down
Loading