From eeaa627f56bba74ac7a5b8bd4b43889dc4320089 Mon Sep 17 00:00:00 2001 From: Adithya Balaji Date: Tue, 18 Mar 2025 14:37:31 -0700 Subject: [PATCH] [pretty_format_json] Add compact array feature - Find non-nested numeric arrays using JSON spec: https://www.json.org/json-en.html --- pre_commit_hooks/pretty_format_json.py | 57 ++++++++++++++++ tests/pretty_format_json_test.py | 93 ++++++++++++++++++++++++++ 2 files changed, 150 insertions(+) diff --git a/pre_commit_hooks/pretty_format_json.py b/pre_commit_hooks/pretty_format_json.py index 501f37f7..30294c2f 100644 --- a/pre_commit_hooks/pretty_format_json.py +++ b/pre_commit_hooks/pretty_format_json.py @@ -2,6 +2,7 @@ import argparse import json +import re import sys from collections.abc import Mapping from collections.abc import Sequence @@ -14,6 +15,7 @@ def _get_pretty_format( ensure_ascii: bool = True, sort_keys: bool = True, top_keys: Sequence[str] = (), + compact_arrays: bool = False, ) -> str: def pairs_first(pairs: Sequence[tuple[str, str]]) -> Mapping[str, str]: before = [pair for pair in pairs if pair[0] in top_keys] @@ -22,14 +24,58 @@ def pairs_first(pairs: Sequence[tuple[str, str]]) -> Mapping[str, str]: if sort_keys: after.sort() return dict(before + after) + json_pretty = json.dumps( json.loads(contents, object_pairs_hook=pairs_first), indent=indent, ensure_ascii=ensure_ascii, ) + + if compact_arrays: + json_pretty = _compact_arrays(json_pretty) + return f'{json_pretty}\n' +def _compact_arrays(json_text: str) -> str: + """Convert arrays with simple values to a single line format.""" + pattern = re.compile( + r''' + ( # Capturing group for the entire array + \[ # Opening bracket + \s* # Optional whitespace + (?: # Non-capturing group for array elements + (?: # Non-capturing group for each value type + "[^"]*" # String: anything in quotes + | + -? # Optional negative sign + (?: + 0|[1-9]\d* # Integer part: 0 or non-zero digit + # followed by digits + ) + (?:\.\d+)? # Optional fractional part + (?:[eE][+-]?\d+)? # Optional exponent part + | + true|false # Boolean + | + null # Null + ) + (?:\s*,\s*)? # Optional comma and whitespace + )++ # One or more elements + \s* # Optional whitespace + \] # Closing bracket + ) + ''', re.VERBOSE, + ) + + def compact_match(match: re.Match[str]) -> str: + array_content = match.group(0) + compact = re.sub(r'\s*\n\s*', ' ', array_content) + return compact + + return re.sub(pattern, compact_match, json_text) + + def _autofix(filename: str, new_contents: str) -> None: print(f'Fixing file {filename}') with open(filename, 'w', encoding='UTF-8') as f: @@ -96,6 +142,16 @@ def main(argv: Sequence[str] | None = None) -> int: default=[], help='Ordered list of keys to keep at the top of JSON hashes', ) + parser.add_argument( + '--compact-arrays', + action='store_true', + dest='compact_arrays', + default=False, + help=( + 'Format simple arrays on a single line for more ' + 'compact representation' + ), + ) parser.add_argument('filenames', nargs='*', help='Filenames to fix') args = parser.parse_args(argv) @@ -109,6 +165,7 @@ def main(argv: Sequence[str] | None = None) -> int: pretty_contents = _get_pretty_format( contents, args.indent, ensure_ascii=not args.no_ensure_ascii, sort_keys=not args.no_sort_keys, top_keys=args.top_keys, + compact_arrays=args.compact_arrays, ) except ValueError: print( diff --git a/tests/pretty_format_json_test.py b/tests/pretty_format_json_test.py index 68b6d7a1..07b4ca51 100644 --- a/tests/pretty_format_json_test.py +++ b/tests/pretty_format_json_test.py @@ -155,3 +155,96 @@ def test_diffing_output(capsys): assert actual_retval == expected_retval assert actual_out == expected_out assert actual_err == '' + + +def test_compact_arrays_main(tmpdir): + # TODO: Intentionally don't address round trip bug caused by + # using `json.loads(json.dumps(data))`. This will need to be + # resolved separately. + srcfile = tmpdir.join('to_be_compacted.json') + srcfile.write( + '{\n' + ' "simple_array": [\n' + ' 1,\n' + ' 2,\n' + ' 3\n' + ' ],\n' + ' "string_array": [\n' + ' "a",\n' + ' "b",\n' + ' "c"\n' + ' ],\n' + ' "mixed_array": [\n' + ' 1,\n' + ' "string",\n' + ' true,\n' + ' null\n' + ' ],\n' + ' "nested_objects": [\n' + ' {\n' + ' "a": 1\n' + ' },\n' + ' {\n' + ' "b": 2\n' + ' }\n' + ' ]\n' + '}', + ) + + ret = main(['--compact-arrays', '--autofix', str(srcfile)]) + assert ret == 1 + + with open(str(srcfile), encoding='UTF-8') as f: + contents = f.read() + + # Simple arrays should be compacted + assert '"simple_array": [ 1, 2, 3 ]' in contents + assert '"string_array": [ "a", "b", "c" ]' in contents + assert '"mixed_array": [ 1, "string", true, null ]' in contents + + # Nested array objects should remain expanded + assert ' "nested_objects": [\n' in contents + assert ' "a": 1\n' in contents + + +def test_compact_arrays_diff_output(tmpdir, capsys): + srcfile = tmpdir.join('expanded_arrays.json') + srcfile.write( + '{\n' + ' "array": [\n' + ' 1,\n' + ' 2,\n' + ' 3\n' + ' ]\n' + '}', + ) + + ret = main(['--compact-arrays', str(srcfile)]) + assert ret == 1 + + out, _ = capsys.readouterr() + assert '+ "array": [ 1, 2, 3 ]' in out + + # Validate diff output + assert '- 1,' in out + assert '- 2,' in out + assert '- 3' in out + assert '- "array": [' in out + assert '- ]' in out + + +def test_compact_arrays_disabled(tmpdir): + """Test that compacting arrays does not impact default formatting.""" + srcfile = tmpdir.join('already_compact.json') + srcfile.write('{\n "array": [ 1, 2, 3 ]\n}') + + ret = main(['--autofix', str(srcfile)]) + assert ret == 1 + + with open(str(srcfile), encoding='UTF-8') as f: + contents = f.read() + + assert '"array": [\n' in contents + assert ' 1,' in contents + assert ' 2,' in contents + assert ' 3\n ]' in contents