From c8f23087fa67f503859938652a3a1037c74eedb6 Mon Sep 17 00:00:00 2001
From: S1MS4 <arijussimasius@gmail.com>
Date: Fri, 3 Jul 2026 13:30:11 +0300
Subject: [PATCH] fix: handle math run with no text child in OMML->LaTeX
 conversion

do_r() called elm.findtext("./m:t") and iterated over the result
directly. When a math run (<m:r>) has no <m:t> text child (e.g. a
run that only carries formatting properties, produced by some Word
equation editors), findtext() returns None and iterating over it
raises TypeError: 'NoneType' object is not iterable.

Because equation pre-processing is applied at the whole-document.xml
level with a blanket try/except, this single malformed run aborts
LaTeX conversion for every equation in the document, silently
dropping all native Word equations from the output.
---
 .../converter_utils/docx/math/omml.py         |  2 +-
 packages/markitdown/tests/test_docx_omml.py   | 48 +++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 packages/markitdown/tests/test_docx_omml.py

diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
index dfa734cdc..97ccdf0c8 100644
--- a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
+++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py
@@ -373,7 +373,7 @@ def do_r(self, elm):
         @todo \text (latex pure text support)
         """
         _str = []
-        for s in elm.findtext("./{0}t".format(OMML_NS)):
+        for s in elm.findtext("./{0}t".format(OMML_NS)) or "":
             # s = s if isinstance(s,unicode) else unicode(s,'utf-8')
             _str.append(self._t_dict.get(s, s))
         return escape_latex(BLANK.join(_str))
diff --git a/packages/markitdown/tests/test_docx_omml.py b/packages/markitdown/tests/test_docx_omml.py
new file mode 100644
index 000000000..98574108c
--- /dev/null
+++ b/packages/markitdown/tests/test_docx_omml.py
@@ -0,0 +1,48 @@
+# -*- coding: utf-8 -*-
+"""
+Regression test for a crash in the OMML -> LaTeX converter when a math run
+(<m:r>) has no <m:t> text child (e.g. a run that only carries formatting
+properties). Previously `do_r()` called `elm.findtext(...)` directly and
+iterated over the result, which raised `TypeError: 'NoneType' object is not
+iterable` when the run had no text, aborting equation conversion for the
+entire document.
+"""
+
+from xml.etree import ElementTree as ET
+
+from markitdown.converter_utils.docx.math.omml import OMML_NS, oMath2Latex
+
+MATH_NS_DECL = f'xmlns:m="{OMML_NS[1:-1]}"'
+
+
+def _parse_omath(xml_fragment: str):
+    wrapped = f"<m:oMath {MATH_NS_DECL}>{xml_fragment}</m:oMath>"
+    return ET.fromstring(wrapped)
+
+
+def test_run_without_text_child_does_not_crash():
+    # <m:r> with only <m:rPr>, no <m:t> child.
+    element = _parse_omath("<m:r><m:rPr/></m:r>")
+    # Should not raise TypeError: 'NoneType' object is not iterable
+    result = oMath2Latex(element)
+    assert result.latex == ""
+
+
+def test_run_with_text_still_converts():
+    element = _parse_omath("<m:r><m:t>x</m:t></m:r>")
+    result = oMath2Latex(element)
+    assert result.latex == "x"
+
+
+def test_subscript_with_missing_text_run_does_not_crash():
+    # Mirrors a real-world document: a subscript expression where one of the
+    # runs involved has no text (e.g. produced by some Word equation editors).
+    element = _parse_omath(
+        "<m:sSub>"
+        "<m:e><m:r><m:t>l</m:t></m:r></m:e>"
+        "<m:sub><m:r><m:rPr/></m:r><m:r><m:t>1</m:t></m:r></m:sub>"
+        "</m:sSub>"
+    )
+    result = oMath2Latex(element)
+    assert "l" in result.latex
+    assert "1" in result.latex