From 08a45fa4bdb063b92631acaadbea59779ea0b7fb Mon Sep 17 00:00:00 2001 From: ZeyuTeng96 <96521059+ZeyuTeng96@users.noreply.github.com> Date: Mon, 6 Jan 2025 09:59:46 +0800 Subject: [PATCH] remove leading and trailing \n for HtmlConverter --- src/markitdown/_markitdown.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index b6acfe8..3ce0a0b 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -223,6 +223,9 @@ def _convert(self, html_content: str) -> Union[None, DocumentConverterResult]: assert isinstance(webpage_text, str) + # remove leading and trailing \n + webpage_text = webpage_text.strip() + return DocumentConverterResult( title=None if soup.title is None else soup.title.string, text_content=webpage_text,