Skip to content

Commit 0b22332

Browse files
committed
Optimize several regexes from quadratic time to linear time
Part of the discussion in Python-Markdown#798. Signed-off-by: Anders Kaseorg <[email protected]>
1 parent 4b11593 commit 0b22332

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

markdown/inlinepatterns.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,13 +147,13 @@ def build_inlinepatterns(md, **kwargs):
147147
NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'
148148

149149
# <http://www.123.com>
150-
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>'
150+
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
151151

152152
153-
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
153+
AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>'
154154

155155
# <...>
156-
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
156+
HTML_RE = r'(<([a-zA-Z/][^<>]*|!--[^<>]*--)>)'
157157

158158
# "&#38;" (decimal) or "&#x26;" (hex) or "&amp;" (named)
159159
ENTITY_RE = r'(&(?:\#[0-9]+|\#x[0-9a-fA-F]+|[a-zA-Z0-9]+);)'
@@ -433,7 +433,7 @@ def get_stash(m):
433433

434434
class LinkInlineProcessor(InlineProcessor):
435435
""" Return a link element from the given match. """
436-
RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
436+
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
437437
RE_TITLE_CLEAN = re.compile(r'\s')
438438

439439
def handleMatch(self, m, data):
@@ -467,8 +467,8 @@ def getLink(self, data, index):
467467
if m and m.group(1):
468468
# Matches [Text](<link> "title")
469469
href = m.group(1)[1:-1].strip()
470-
if m.group(3):
471-
title = m.group(3)
470+
if m.group(2):
471+
title = m.group(2)[1:-1]
472472
index = m.end(0)
473473
handled = True
474474
elif m:

0 commit comments

Comments
 (0)