diff --git a/docling/backend/html_backend.py b/docling/backend/html_backend.py index 7d14c2ebf..9cd1e29b9 100644 --- a/docling/backend/html_backend.py +++ b/docling/backend/html_backend.py @@ -120,6 +120,8 @@ def analyse_element(self, element, idx, doc): self.handle_header(element, idx, doc) elif element.name in ["p"]: self.handle_paragraph(element, idx, doc) + elif element.name in ["pre"]: + self.handle_code(element, idx, doc) elif element.name in ["ul", "ol"]: self.handle_list(element, idx, doc) elif element.name in ["li"]: @@ -205,6 +207,16 @@ def handle_header(self, element, idx, doc): level=hlevel, ) + def handle_code(self, element, idx, doc): + """Handles monospace code snippets (pre).""" + if element.text is None: + return + text = element.text.strip() + label = DocItemLabel.CODE + if len(text) == 0: + return + doc.add_text(parent=self.parents[self.level], label=label, text=text) + def handle_paragraph(self, element, idx, doc): """Handles paragraph tags (p).""" if element.text is None: