Skip to content

Commit f9a2255

Browse files
authored
Merge pull request #27 from ivg/robust-comment-parser
simplifies quotation in the comment parser
2 parents 07ad00c + 17e6c67 commit f9a2255

File tree

3 files changed

+23
-17
lines changed

3 files changed

+23
-17
lines changed

plugins/bap/plugins/bap_comments.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,14 @@ def run(self, arg):
4545
for addr in ida.addresses():
4646
comm = idaapi.get_cmt(addr, 0)
4747
if comm:
48-
parsed = bap_comment.parse(comm)
49-
if parsed:
50-
for (name, data) in parsed.items():
51-
comms[(addr, name)] = data
48+
try:
49+
parsed = bap_comment.parse(comm)
50+
if parsed:
51+
for (name, data) in parsed.items():
52+
comms[(addr, name)] = data
53+
except:
54+
idc.Message("BAP> failed to parse string {0}\n{1}".
55+
format(comm, str(sys.exc_info()[1])))
5256
comms = [(name, addr, data)
5357
for ((addr, name), data) in comms.items()]
5458
attrs = Attributes(comms)

plugins/bap/utils/bap_comment.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
Basically, the comment string includes an arbitrary amount of
1212
key=value pairs. If a value contains whitespaces, punctuation or any
1313
non-word character, then it should be delimited with double quotes. If
14-
a value contains quote character, then it should be escaped with the
14+
a value contains a quote character, then it should be escaped with the
1515
backslash character (the backslash character can escape
1616
itself). Properties that doesn't have values (or basically has a
1717
property of a unit type, so called boolean properties) are represented
@@ -96,15 +96,17 @@
9696
WORDCHARS = ''.join(['-:', string.ascii_letters, string.digits])
9797

9898

99-
def parse(comment):
99+
def parse(comment, debug=0):
100100
""" Parse comment string.
101101
102102
Returns a dictionary that maps properties to their values.
103103
Raises SyntaxError if the comment is syntactically incorrect.
104104
Returns None if comment doesn't start with the `BAP:` prefix.
105105
"""
106-
lexer = shlex(comment)
106+
lexer = shlex(comment, posix=True)
107107
lexer.wordchars = WORDCHARS
108+
lexer.debug = debug
109+
lexer.quotes = '"'
108110
result = {}
109111
key = ''
110112
values = []
@@ -193,14 +195,9 @@ def quote(token):
193195
>>> quote('hello, world')
194196
'"hello, world"'
195197
"""
196-
if set(token) - set(WORDCHARS):
197-
if "'" not in token:
198-
return "'{}'".format(token)
199-
elif '"' not in token:
200-
return '"{}"'.format(token)
201-
else: # we ran out of quotes, so we need
202-
return "'{}'".format(''.join('\\'+c if c == "'" else c
203-
for c in token))
198+
if not token.startswith('"') and set(token) - set(WORDCHARS):
199+
return '"{}"'.format(''.join('\\'+c if c == '"' else c
200+
for c in token))
204201
else:
205202
return token
206203

tests/test_bap_comment.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_dumps():
1818
assert 'BAP:' in dumps({'hello': []})
1919
assert dumps({'hello': ['cruel', 'world'], 'nice': [], 'thing': []}) == \
2020
'BAP: nice,thing hello=cruel,world'
21-
assert dumps({'hello': ["world\'"]}) == 'BAP: hello="world\'"'
21+
assert dumps({'hello': ["world'"]}) == 'BAP: hello="world\'"'
2222

2323

2424
def test_is_valid():
@@ -39,6 +39,11 @@ def test_roundup():
3939

4040

4141
def test_quotation():
42-
data = 'BAP: chars=\'{"a", "b", "c"}\''
42+
data = 'BAP: chars="{\\\"a\\\", \\\"b\\\", \\\"c\\\"}"'
4343
assert parse(data) == {'chars': ['{"a", "b", "c"}']}
4444
assert parse(data) == parse(dumps(parse(data)))
45+
46+
47+
def test_single_quote():
48+
data = 'BAP: key="{can\\\'t do}"'
49+
assert parse(data) == {'key': ["{can\\'t do}"]}

0 commit comments

Comments
 (0)