Skip to content

Commit cb47805

Browse files
anderskwaylan
authored andcommitted
Optimize several regexes from quadratic time to linear time
Part of the discussion in #798. Signed-off-by: Anders Kaseorg <[email protected]>
1 parent 4b11593 commit cb47805

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

Diff for: markdown/inlinepatterns.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -147,10 +147,10 @@ def build_inlinepatterns(md, **kwargs):
147147
NOT_STRONG_RE = r'((^|\s)(\*|_)(\s|$))'
148148

149149
# <http://www.123.com>
150-
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>'
150+
AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^<>]*)>'
151151

152152
153-
AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'
153+
AUTOMAIL_RE = r'<([^<> !]*@[^@<> ]*)>'
154154

155155
# <...>
156156
HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'
@@ -433,7 +433,7 @@ def get_stash(m):
433433

434434
class LinkInlineProcessor(InlineProcessor):
435435
""" Return a link element from the given match. """
436-
RE_LINK = re.compile(r'''\(\s*(?:(<.*?>)\s*(?:(['"])(.*?)\2\s*)?\))?''', re.DOTALL | re.UNICODE)
436+
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
437437
RE_TITLE_CLEAN = re.compile(r'\s')
438438

439439
def handleMatch(self, m, data):
@@ -467,8 +467,8 @@ def getLink(self, data, index):
467467
if m and m.group(1):
468468
# Matches [Text](<link> "title")
469469
href = m.group(1)[1:-1].strip()
470-
if m.group(3):
471-
title = m.group(3)
470+
if m.group(2):
471+
title = m.group(2)[1:-1]
472472
index = m.end(0)
473473
handled = True
474474
elif m:

Diff for: tests/misc/html.html

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ <h1>Block level html</h1>
2020
</div>
2121

2222
<p>And of course <script>blah</script>.</p>
23-
<p><a href="script&gt;stuff&lt;/script">this <script>link</a></p>
23+
<p><a href="&lt;script&gt;stuff&lt;/script&gt;">this <script>link</a></p>
2424
<p>Some funky <x\]> inline stuff with markdown escaping syntax.</p>
2525
<p><img scr="foo.png" title="Only one inline element on a line." /></p>
2626
<p>And now a line with only an opening bracket:</p>

0 commit comments

Comments
 (0)