Skip to content

Commit

Permalink
fallback showing equations as image
Browse files Browse the repository at this point in the history
Signed-off-by: Michele Dolfi <[email protected]>
  • Loading branch information
dolfim-ibm committed Jan 31, 2025
1 parent eb0cac4 commit deabcec
Showing 1 changed file with 28 additions and 7 deletions.
35 changes: 28 additions & 7 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2288,7 +2288,7 @@ def save_as_html(
to_element: int = sys.maxsize,
labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
formula_to_mathml: bool = False,
formula_to_mathml: bool = True,
page_no: Optional[int] = None,
html_lang: str = "en",
html_head: str = _HTML_DEFAULT_HEAD,
Expand Down Expand Up @@ -2355,7 +2355,7 @@ def export_to_html( # noqa: C901
to_element: int = sys.maxsize,
labels: set[DocItemLabel] = DEFAULT_EXPORT_LABELS,
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
formula_to_mathml: bool = False,
formula_to_mathml: bool = True,
page_no: Optional[int] = None,
html_lang: str = "en",
html_head: str = _HTML_DEFAULT_HEAD,
Expand Down Expand Up @@ -2467,9 +2467,27 @@ def _prepare_tag_content(
math_formula = _prepare_tag_content(
item.text, do_escape_html=False, do_replace_newline=False
)
if formula_to_mathml:
# Building a math equation in MathML format
# ref https://www.w3.org/TR/wai-aria-1.1/#math
text = ""

# If the formula is not processed correcty, use its image
if (
item.text == ""
and item.orig != ""
and image_mode == ImageRefMode.EMBEDDED
and len(item.prov) > 0
):
item_image = item.get_image(doc=self)
if item_image is not None:
img_ref = ImageRef.from_pil(item_image, dpi=72)
text = (
"<figure>"
f'<img src="{img_ref.uri}" alt="{item.orig}" />'
"</figure>"
)

# Building a math equation in MathML format
# ref https://www.w3.org/TR/wai-aria-1.1/#math
elif formula_to_mathml:
mathml_element = latex2mathml.converter.convert_to_element(
math_formula, display="block"
)
Expand All @@ -2479,9 +2497,12 @@ def _prepare_tag_content(
annotation.text = math_formula
mathml = unescape(tostring(mathml_element, encoding="unicode"))
text = f"<div>{mathml}</div>"
else:

elif math_formula != "":
text = f"<pre>{math_formula}</pre>"
html_texts.append(text)

if text != "":
html_texts.append(text)

elif isinstance(item, ListItem):

Expand Down

0 comments on commit deabcec

Please sign in to comment.