Skip to content

Commit 17e6c67

Browse files
committed
simplifies quotation in the comment parser
This commit makes the comment parser more robust. Instead of trying to support both kind of quotes (single and double) we now switch to the double quotes as the only method to delimit a token that has non-wordy symbols. A signle quote is no longer considered a special symbol and should not be escaped. For the future pioneers of comment parsing I added a debug option, that will be passed to the underlying shlex lexer (where all the problems usually happen). If `debug=3`, then it will print every single state. In case even if this robust version will fail, I've added a final line of defense - if a comment cannot be parsed the BAP comments View will not fail, but will just ignore the failed comment (of course a diagnostic message will be printed into the message box).
1 parent 07ad00c commit 17e6c67

File tree

3 files changed

+23
-17
lines changed

3 files changed

+23
-17
lines changed

plugins/bap/plugins/bap_comments.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,14 @@ def run(self, arg):
4545
for addr in ida.addresses():
4646
comm = idaapi.get_cmt(addr, 0)
4747
if comm:
48-
parsed = bap_comment.parse(comm)
49-
if parsed:
50-
for (name, data) in parsed.items():
51-
comms[(addr, name)] = data
48+
try:
49+
parsed = bap_comment.parse(comm)
50+
if parsed:
51+
for (name, data) in parsed.items():
52+
comms[(addr, name)] = data
53+
except:
54+
idc.Message("BAP> failed to parse string {0}\n{1}".
55+
format(comm, str(sys.exc_info()[1])))
5256
comms = [(name, addr, data)
5357
for ((addr, name), data) in comms.items()]
5458
attrs = Attributes(comms)

plugins/bap/utils/bap_comment.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
Basically, the comment string includes an arbitrary amount of
1212
key=value pairs. If a value contains whitespaces, punctuation or any
1313
non-word character, then it should be delimited with double quotes. If
14-
a value contains quote character, then it should be escaped with the
14+
a value contains a quote character, then it should be escaped with the
1515
backslash character (the backslash character can escape
1616
itself). Properties that doesn't have values (or basically has a
1717
property of a unit type, so called boolean properties) are represented
@@ -96,15 +96,17 @@
9696
WORDCHARS = ''.join(['-:', string.ascii_letters, string.digits])
9797

9898

99-
def parse(comment):
99+
def parse(comment, debug=0):
100100
""" Parse comment string.
101101
102102
Returns a dictionary that maps properties to their values.
103103
Raises SyntaxError if the comment is syntactically incorrect.
104104
Returns None if comment doesn't start with the `BAP:` prefix.
105105
"""
106-
lexer = shlex(comment)
106+
lexer = shlex(comment, posix=True)
107107
lexer.wordchars = WORDCHARS
108+
lexer.debug = debug
109+
lexer.quotes = '"'
108110
result = {}
109111
key = ''
110112
values = []
@@ -193,14 +195,9 @@ def quote(token):
193195
>>> quote('hello, world')
194196
'"hello, world"'
195197
"""
196-
if set(token) - set(WORDCHARS):
197-
if "'" not in token:
198-
return "'{}'".format(token)
199-
elif '"' not in token:
200-
return '"{}"'.format(token)
201-
else: # we ran out of quotes, so we need
202-
return "'{}'".format(''.join('\\'+c if c == "'" else c
203-
for c in token))
198+
if not token.startswith('"') and set(token) - set(WORDCHARS):
199+
return '"{}"'.format(''.join('\\'+c if c == '"' else c
200+
for c in token))
204201
else:
205202
return token
206203

tests/test_bap_comment.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_dumps():
1818
assert 'BAP:' in dumps({'hello': []})
1919
assert dumps({'hello': ['cruel', 'world'], 'nice': [], 'thing': []}) == \
2020
'BAP: nice,thing hello=cruel,world'
21-
assert dumps({'hello': ["world\'"]}) == 'BAP: hello="world\'"'
21+
assert dumps({'hello': ["world'"]}) == 'BAP: hello="world\'"'
2222

2323

2424
def test_is_valid():
@@ -39,6 +39,11 @@ def test_roundup():
3939

4040

4141
def test_quotation():
42-
data = 'BAP: chars=\'{"a", "b", "c"}\''
42+
data = 'BAP: chars="{\\\"a\\\", \\\"b\\\", \\\"c\\\"}"'
4343
assert parse(data) == {'chars': ['{"a", "b", "c"}']}
4444
assert parse(data) == parse(dumps(parse(data)))
45+
46+
47+
def test_single_quote():
48+
data = 'BAP: key="{can\\\'t do}"'
49+
assert parse(data) == {'key': ["{can\\'t do}"]}

0 commit comments

Comments
 (0)