diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index d1689155..5ebcd0a4 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -2160,6 +2160,10 @@ def export_to_markdown( # noqa: C901 text = f"{list_indent}{marker} {item.text}" mdtexts.append(text) + elif isinstance(item, TextItem) and item.label in [DocItemLabel.FORMULA]: + in_list = False + mdtexts.append(f"$${item.text}$$") + elif isinstance(item, TextItem) and item.label in labels: in_list = False if len(item.text) and text_width > 0: @@ -2208,10 +2212,14 @@ def escape_underscores(text): """Escape underscores but leave them intact in the URL..""" # Firstly, identify all the URL patterns. url_pattern = r"!\[.*?\]\((.*?)\)" + # Matches both inline ($...$) and block ($$...$$) LaTeX equations: + latex_pattern = r"\$\$?(?:\\.|[^$\\])*\$\$?" + combined_pattern = f"({url_pattern})|({latex_pattern})" + parts = [] last_end = 0 - for match in re.finditer(url_pattern, text): + for match in re.finditer(combined_pattern, text): # Text to add before the URL (needs to be escaped) before_url = text[last_end : match.start()] parts.append(re.sub(r"(?