Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch _ResultIterator to using _Parser #52

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 177 additions & 23 deletions jq.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import json
import threading

from cpython.bytes cimport PyBytes_AsString
from cpython.bytes cimport PyBytes_AsStringAndSize


cdef extern from "jv.h":
Expand Down Expand Up @@ -35,13 +36,15 @@ cdef extern from "jv.h":
int jv_object_iter_valid(jv, int)
jv jv_object_iter_key(jv, int)
jv jv_object_iter_value(jv, int)
jv jv_invalid()

cdef struct jv_parser:
pass

jv_parser* jv_parser_new(int)
void jv_parser_free(jv_parser*)
void jv_parser_set_buf(jv_parser*, const char*, int, int)
int jv_parser_remaining(jv_parser*)
jv jv_parser_next(jv_parser*)

jv jv_parse(const char*)
Expand Down Expand Up @@ -109,6 +112,110 @@ cdef object _jv_to_python(jv value):
return python_value


class JSONParseError(Exception):
"""A failure to parse JSON"""


cdef class _JV(object):
"""Native JSON value"""
cdef jv _value

def __dealloc__(self):
jv_free(self._value)

def __cinit__(self):
self._value = jv_invalid()

def unpack(self):
"""
Unpack the JSON value into standard Python representation.

Returns:
An unpacked copy of the JSON value.
"""
return _jv_to_python(jv_copy(self._value))


cdef class _JSONParser(object):
cdef jv_parser* _parser
cdef object _text_iter
cdef object _bytes
cdef int _packed

def __dealloc__(self):
jv_parser_free(self._parser)

def __cinit__(self, text_iter, packed):
"""
Initialize the parser.

Args:
text_iter: An iterator producing pieces of the JSON stream text
(strings or bytes) to parse.
packed: Make the iterator return jq-native packed values,
if true, and standard Python values, if false.
"""
self._parser = jv_parser_new(0)
self._text_iter = text_iter
self._bytes = None
self._packed = bool(packed)

def __iter__(self):
return self

def __next__(self):
"""
Retrieve next parsed JSON value.

Returns:
The next parsed JSON value.

Raises:
JSONParseError: failed parsing the input JSON.
StopIteration: no more values available.
"""
cdef jv value
while True:
# If the parser has no buffer set/left
if not jv_parser_remaining(self._parser):
# Supply it with some bytes
self._ready_next_bytes()
# Get next value from the parser
value = jv_parser_next(self._parser)
if jv_is_valid(value):
if self._packed:
packed = _JV()
packed._value = value
return packed
else:
return _jv_to_python(value)
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_value(error_message).decode("utf8")
jv_free(error_message)
raise JSONParseError(message)
jv_free(value)
# If we supplied no bytes last time
if self._bytes is None:
raise StopIteration

cdef bint _ready_next_bytes(self) except 1:
cdef char* cbytes
cdef ssize_t clen
try:
text = next(self._text_iter)
if isinstance(text, bytes):
self._bytes = text
else:
self._bytes = text.encode("utf8")
PyBytes_AsStringAndSize(self._bytes, &cbytes, &clen)
jv_parser_set_buf(self._parser, cbytes, clen, 1)
except StopIteration:
self._bytes = None
jv_parser_set_buf(self._parser, "", 0, 0)
return 0


def compile(object program, args=None):
cdef object program_bytes = program.encode("utf8")
return _Program(program_bytes, args=args)
Expand Down Expand Up @@ -251,18 +358,27 @@ cdef class _Program(object):


cdef class _ProgramWithInput(object):
"""Input-supplied program"""
cdef _JqStatePool _jq_state_pool
cdef object _bytes_input

def __cinit__(self, jq_state_pool, bytes_input):
"""
Initialize the input-supplied program.

Args:
jq_state_pool: The JQ state pool to acquire program state from.
bytes_input: The bytes containing input JSON.
"""
self._jq_state_pool = jq_state_pool
self._bytes_input = bytes_input

def __iter__(self):
return self._make_iterator()

cdef _ResultIterator _make_iterator(self):
return _ResultIterator(self._jq_state_pool, self._bytes_input)
return _ResultIterator(self._jq_state_pool,
parse_json(text=self._bytes_input, packed=True))

def text(self):
# Performance testing suggests that using _jv_to_python (within the
Expand All @@ -279,31 +395,32 @@ cdef class _ProgramWithInput(object):


cdef class _ResultIterator(object):
"""Program result iterator"""
cdef _JqStatePool _jq_state_pool
cdef jq_state* _jq
cdef jv_parser* _parser
cdef object _bytes_input
cdef _JSONParser _parser_input
cdef bint _ready

def __dealloc__(self):
self._jq_state_pool.release(self._jq)
jv_parser_free(self._parser)

def __cinit__(self, _JqStatePool jq_state_pool, object bytes_input):
def __cinit__(self, _JqStatePool jq_state_pool, _JSONParser parser_input):
"""
Initialize the result iterator.

Args:
jq_state_pool: The JQ state pool to acquire program state from.
parser_input: The parser to receive packed input values from.
"""
self._jq_state_pool = jq_state_pool
self._jq = jq_state_pool.acquire()
self._bytes_input = bytes_input
self._ready = False
cdef jv_parser* parser = jv_parser_new(0)
cdef char* cbytes_input = PyBytes_AsString(bytes_input)
jv_parser_set_buf(parser, cbytes_input, len(cbytes_input), 0)
self._parser = parser
self._parser_input = parser_input

def __iter__(self):
return self

def __next__(self):
cdef int dumpopts = 0
while True:
if not self._ready:
self._ready_next_input()
Expand All @@ -323,18 +440,10 @@ cdef class _ResultIterator(object):

cdef bint _ready_next_input(self) except 1:
cdef int jq_flags = 0
cdef jv value = jv_parser_next(self._parser)
if jv_is_valid(value):
jq_start(self._jq, value, jq_flags)
return 0
elif jv_invalid_has_msg(jv_copy(value)):
error_message = jv_invalid_get_msg(value)
message = jv_string_value(error_message).decode("utf8")
jv_free(error_message)
raise ValueError(u"parse error: " + message)
else:
jv_free(value)
raise StopIteration()
cdef _JV packed = next(self._parser_input)
jq_start(self._jq, packed._value, jq_flags)
packed._value = jv_invalid()
return 0


def all(program, value=_NO_VALUE, text=_NO_VALUE):
Expand All @@ -356,6 +465,51 @@ def text(program, value=_NO_VALUE, text=_NO_VALUE):
return compile(program).input(value, text=text).text()


def parse_json(text=_NO_VALUE, text_iter=_NO_VALUE, packed=False):
"""
Parse a JSON stream.
Either "text" or "text_iter" must be specified.

Args:
text: A string or bytes object containing the JSON stream to
parse.
text_iter: An iterator returning strings or bytes - pieces of the
JSON stream to parse.
packed: If true, return packed, jq-native JSON values.
If false, return standard Python JSON values.

Returns:
An iterator returning parsed values.

Raises:
JSONParseError: failed parsing the input JSON stream.
"""
if (text is _NO_VALUE) == (text_iter is _NO_VALUE):
raise ValueError("Either the text or text_iter argument should be set")
return _JSONParser(text_iter
if text_iter is not _NO_VALUE
else _iter((text,)),
packed)


def parse_json_file(fp, packed=False):
"""
Parse a JSON stream file.

Args:
fp: The file-like object to read the JSON stream from.
packed: If true, return packed, jq-native JSON values.
If false, return standard Python JSON values.

Returns:
An iterator returning parsed values.

Raises:
JSONParseError: failed parsing the JSON stream.
"""
return parse_json(text=fp.read(), packed=packed)


# Support the 0.1.x API for backwards compatibility
def jq(object program):
return compile(program)
6 changes: 3 additions & 3 deletions tests/jq_old_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from nose.tools import istest, assert_equal, assert_raises

from jq import jq
from jq import jq, JSONParseError


@istest
Expand Down Expand Up @@ -118,8 +118,8 @@ def value_error_is_raised_if_input_is_not_valid_json():
try:
program.transform(text="!!")
assert False, "Expected error"
except ValueError as error:
expected_error_str = "parse error: Invalid numeric literal at EOF at line 1, column 2"
except JSONParseError as error:
expected_error_str = "Invalid numeric literal at EOF at line 1, column 2"
assert_equal(str(error), expected_error_str)


Expand Down
Loading