Skip to content

Commit 538f77a

Browse files
lapp0rlouf
authored andcommitted
Fix invalid regex in unconstrained arrays for json_schema.py
1 parent e5c39e2 commit 538f77a

File tree

2 files changed

+55
-8
lines changed

2 files changed

+55
-8
lines changed

outlines/fsm/json_schema.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -297,15 +297,22 @@ def to_regex(
297297
# Here we need to make the choice to exclude generating list of objects
298298
# if the specification of the object is not given, even though a JSON
299299
# object that contains an object here would be valid under the specification.
300-
types = [
300+
legal_types = [
301301
{"type": "boolean"},
302302
{"type": "null"},
303303
{"type": "number"},
304304
{"type": "integer"},
305305
{"type": "string"},
306306
]
307-
regexes = [to_regex(resolver, t, whitespace_pattern) for t in types]
308-
return rf"\[{whitespace_pattern}({'|'.join(regexes)})(,{whitespace_pattern}({'|'.join(regexes)})){num_repeats}){allow_empty}{whitespace_pattern}\]"
307+
depth = instance.get("depth", 2)
308+
if depth > 0:
309+
legal_types.append({"type": "object", "depth": depth - 1})
310+
legal_types.append({"type": "array", "depth": depth - 1})
311+
312+
regexes = [
313+
to_regex(resolver, t, whitespace_pattern) for t in legal_types
314+
]
315+
return rf"\[{whitespace_pattern}({'|'.join(regexes)})(,{whitespace_pattern}({'|'.join(regexes)})){num_repeats}{allow_empty}{whitespace_pattern}\]"
309316

310317
elif instance_type == "object":
311318
# pattern for json object with values defined by instance["additionalProperties"]
@@ -328,20 +335,20 @@ def to_regex(
328335
# unset or True, it is unconstrained object.
329336
# We handle this by setting additionalProperties to anyOf: {all types}
330337

331-
legal_values = [
338+
legal_types = [
332339
{"type": "string"},
333340
{"type": "number"},
334341
{"type": "boolean"},
335-
{"type": "null"}
336-
# { "type": "array" }, # TODO: enable arrays within object-types
342+
{"type": "null"},
337343
]
338344

339345
# We set the object depth to 2 to keep the expression finite, but the "depth"
340346
# key is not a true component of the JSON Schema specification.
341347
depth = instance.get("depth", 2)
342348
if depth > 0:
343-
legal_values.append({"type": "object", "depth": depth - 1})
344-
additional_properties = {"anyOf": legal_values}
349+
legal_types.append({"type": "object", "depth": depth - 1})
350+
legal_types.append({"type": "array", "depth": depth - 1})
351+
additional_properties = {"anyOf": legal_types}
345352

346353
value_pattern = to_regex(
347354
resolver, additional_properties, whitespace_pattern

tests/fsm/test_json_schema.py

+40
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,46 @@ def test_format(schema, regex, examples):
719719
('{"time":20:20:39Z}', False), # missing quotes for value
720720
],
721721
),
722+
# Unconstrained Object
723+
(
724+
{
725+
"title": "Foo",
726+
"type": "object",
727+
},
728+
[
729+
("{}", True),
730+
('{"a": 1, "b": null}', True),
731+
('{"a": {"z": {"g": 4}}, "b": null}', True),
732+
("1234", False), # not an object
733+
('["a", "a"]', False), # not an array
734+
],
735+
),
736+
# Unconstrained Array
737+
(
738+
{
739+
"type": "array",
740+
},
741+
[
742+
("[1, {}, false]", True),
743+
("[{}]", True),
744+
('[{"a": {"z": "q"}, "b": null}]', True),
745+
('[{"a": [1, 2, true], "b": null}]', True),
746+
('[{"a": [1, 2, true], "b": {"a": "b"}}, 1, true, [1, [2]]]', True),
747+
# too deep, default unconstrained depth limit = 2
748+
(
749+
'[{"a": [1, 2, true], "b": {"a": "b"}}, 1, true, [1, [2, [3]]]]',
750+
False,
751+
),
752+
('[{"a": {"z": {"g": 4}}, "b": null}]', False),
753+
("[[[[1]]]]", False),
754+
# not an array
755+
("{}", False),
756+
('{"a": 1, "b": null}', False),
757+
('{"a": {"z": {"g": 4}}, "b": null}', False),
758+
("1234", False), # not an array
759+
('{"a": "a"}', False), # not an array
760+
],
761+
),
722762
],
723763
)
724764
def test_format_without_regex(schema, examples):

0 commit comments

Comments
 (0)