Skip to content

Commit b69f4ce

Browse files
committed
parser: Accept bytes as input
In addition to (Unicode) strings, also accept "bytes" (and corresponding iterators) as input to the parser. This allows skipping the decode/encode step when reading raw data from a file or socket, e.g. with os.read(). This introduces small, but measurable performance increase for such cases.
1 parent efdfee0 commit b69f4ce

File tree

2 files changed

+34
-9
lines changed

2 files changed

+34
-9
lines changed

jq.pyx

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,15 +112,15 @@ class JSONParseError(Exception):
112112

113113
cdef class _Parser(object):
114114
cdef jv_parser* _parser
115-
cdef object _text_iter
115+
cdef object _bytes_iter
116116
cdef object _bytes
117117

118118
def __dealloc__(self):
119119
jv_parser_free(self._parser)
120120

121-
def __cinit__(self, text_iter):
121+
def __cinit__(self, bytes_iter):
122122
self._parser = jv_parser_new(0)
123-
self._text_iter = text_iter
123+
self._bytes_iter = bytes_iter
124124
self._bytes = None
125125

126126
def __iter__(self):
@@ -163,7 +163,7 @@ cdef class _Parser(object):
163163
cdef char* cbytes
164164
cdef ssize_t clen
165165
try:
166-
self._bytes = next(self._text_iter).encode("utf8")
166+
self._bytes = next(self._bytes_iter)
167167
PyBytes_AsStringAndSize(self._bytes, &cbytes, &clen)
168168
jv_parser_set_buf(self._parser, cbytes, clen, 1)
169169
except StopIteration:
@@ -413,10 +413,33 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE):
413413
return compile(program).input(value, text=text).text()
414414

415415

416-
def parse(text=_NO_VALUE, text_iter=_NO_VALUE):
417-
if (text is _NO_VALUE) == (text_iter is _NO_VALUE):
418-
raise ValueError("Either the text or text_iter argument should be set")
419-
return _Parser(text_iter if text_iter is not _NO_VALUE else _iter((text,)))
416+
def parse(text=_NO_VALUE, text_iter=_NO_VALUE,
417+
bytes=_NO_VALUE, bytes_iter=_NO_VALUE):
418+
"""
419+
Parse a text/bytes stream into JSON. Only one of "text", "text_iter",
420+
"bytes", "bytes_iter" arguments is accepted.
421+
422+
Args:
423+
text: The text to parse.
424+
text_iter: An iterator returning pieces of the text to parse.
425+
bytes: The bytes to parse.
426+
bytes_iter: An iterator returning pieces of the bytes to parse.
427+
428+
Returns:
429+
An iterator yielding the parsed JSON values.
430+
"""
431+
if (text, text_iter, bytes, bytes_iter).count(_NO_VALUE) != 3:
432+
raise ValueError("Exactly one argument should be set")
433+
if text is not _NO_VALUE:
434+
bytes = text.encode("utf8")
435+
if text_iter is not _NO_VALUE:
436+
def encode_text_iter():
437+
for text in text_iter:
438+
yield text.encode("utf8")
439+
bytes_iter = encode_text_iter()
440+
if bytes is not _NO_VALUE:
441+
bytes_iter = _iter((bytes,))
442+
return _Parser(bytes_iter)
420443

421444

422445
# Support the 0.1.x API for backwards compatibility

tests/jq_tests.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,11 @@ def program_string_can_be_retrieved_from_program():
197197
assert_equal(".", program.program_string)
198198

199199
@istest
200-
def parse_both_text_and_text_iter_accepted():
200+
def parse_all_inputs_accepted():
201201
assert_equal(True, next(jq.parse(text="true")))
202202
assert_equal(True, next(jq.parse(text_iter=iter(["true"]))))
203+
assert_equal(True, next(jq.parse(bytes=b"true")))
204+
assert_equal(True, next(jq.parse(bytes_iter=iter([b"true"]))))
203205

204206
@istest
205207
def parse_empty_text_iter_stops():

0 commit comments

Comments
 (0)