From 862e33e0ad77cf1489a2f761956265da9f1ca444 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Thu, 19 Jun 2025 10:02:24 +0100 Subject: [PATCH 1/5] Add functionality to validate data section only --- __init__.py | 25 +++++++++++++++++++++---- __main__.py | 25 +++++++++++++++++++++---- test_parser.py | 42 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 9 deletions(-) diff --git a/__init__.py b/__init__.py index 133dca6..deced79 100644 --- a/__init__.py +++ b/__init__.py @@ -333,14 +333,19 @@ def parse( with_progress=False, with_tree=True, with_header=False, - only_header=False + only_header=False, + validate_data_only=False ): + if validate_data_only: # Used by the Validation Service to validate only the data section of an IFC file, ignoring the header. + only_header = False + with_header = False + with_tree= False if filename: assert not filecontent filecontent = builtins.open(filename, encoding=None).read() if only_header: - assert with_header, "'only_header=True' requires 'with_header=True'" + with_header = True # Match and remove the comments p = r"/\*[\s\S]*?\*/" @@ -407,8 +412,20 @@ def replace_fn(match): NT = type("NullTransformer", (Transformer,), methods) transformer = {"transformer": NT()} - - parser = Lark(grammar, parser="lalr", start="file", **transformer) + + if validate_data_only: + match = re.search( + r"DATA\s*;(.*?)ENDSEC\s*;", + filecontent_wo_comments, + flags=re.DOTALL | re.IGNORECASE, + ) + if not match: + raise ValidationError("No DATA section found in file") + filecontent_wo_comments = f"DATA;{match.group(1)}ENDSEC;" + start_rule = "data_section" + else:# Parse entire file (header + data) + start_rule = "file" + parser = Lark(grammar, parser="lalr", start=start_rule, **transformer) try: ast = parser.parse(filecontent_wo_comments) diff --git a/__main__.py b/__main__.py index 15c98cb..0b2cd66 100644 --- a/__main__.py +++ b/__main__.py @@ -6,16 +6,33 @@ args = [x for x in sys.argv[1:] if not x.startswith("-")] flags = [x for x in sys.argv[1:] if x.startswith("-")] - fn = args[0] + filename = args[0] start_time = time.time() + + with_progress = "--progress" in flags + json_output = "--json" in flags + only_header = "--header-only" in flags + validate_data_only = "--data-only" in flags + + + # Sanity check: can't use both at once + if only_header and validate_data_only: + print("Cannot use both --header-only and --data-only at the same time", file=sys.stderr) + sys.exit(2) try: - parse(filename=fn, with_progress="--progress" in flags, with_tree=False) - if "--json" not in flags: + parse( + filename=filename, + with_progress=with_progress, + with_tree=False, + only_header=only_header, + validate_data_only=validate_data_only, + ) + if not json_output: print("Valid", file=sys.stderr) exit(0) except ValidationError as exc: - if "--json" not in flags: + if not json_output: print(exc, file=sys.stderr) else: import sys diff --git a/test_parser.py b/test_parser.py index fb2565a..0953ce6 100644 --- a/test_parser.py +++ b/test_parser.py @@ -105,4 +105,44 @@ def test_file_mvd_attr(): assert f.mvd.Remark['SomeKey'] == 'SomeValue' assert len(f.mvd.comments) == 2 assert all(v in vars(f.header).keys() for v in ['file_description', 'file_name', 'file_schema']) - assert len(f.header.file_name) == 7 \ No newline at end of file + assert len(f.header.file_name) == 7 + + +@pytest.mark.parametrize("filename", [ + 'fixtures/fail_invalid_header_entity.ifc', + 'fixtures/fail_no_header.ifc', +]) +def test_invalid_headers_(filename): + # error in header; with_header should raise an error + with pytest.raises(ValidationError): + parse(filename=filename, with_tree=False, only_header=True, with_header=True) + +@pytest.mark.parametrize("filename", [ + 'fixtures/fail_duplicate_id.ifc', + 'fixtures/fail_double_comma.ifc', + 'fixtures/fail_double_semi.ifc' +]) +def test_valid_headers(filename): + # error in body; with_header should not raise an error + with nullcontext(): + parse(filename=filename, with_tree=False, only_header=True, with_header=True) + + +@pytest.mark.parametrize("filename", [ + 'fixtures/fail_invalid_header_entity.ifc', + 'fixtures/fail_no_header.ifc', +]) +def test_invalid_headers_(filename): + # error in header; validate_data_only should not raise an error + with nullcontext(): + parse(filename=filename, validate_data_only=True) + +@pytest.mark.parametrize("filename", [ + 'fixtures/fail_duplicate_id.ifc', + 'fixtures/fail_double_comma.ifc', + 'fixtures/fail_double_semi.ifc' +]) +def test_valid_headers(filename): + # error in body; validate_data_only should raise an error + with pytest.raises(ValidationError): + parse(filename=filename, validate_data_only=True) \ No newline at end of file From be15b717ef1631cf0351caf75705c4b70c1e3a2f Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Fri, 20 Jun 2025 16:25:35 +0100 Subject: [PATCH 2/5] use namespaces instead of kw args --- __main__.py | 50 +++++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/__main__.py b/__main__.py index 0b2cd66..91ce153 100644 --- a/__main__.py +++ b/__main__.py @@ -1,42 +1,38 @@ import sys -import time +import json +import argparse from . import parse, ValidationError -if __name__ == "__main__": - args = [x for x in sys.argv[1:] if not x.startswith("-")] - flags = [x for x in sys.argv[1:] if x.startswith("-")] - - filename = args[0] - start_time = time.time() - - with_progress = "--progress" in flags - json_output = "--json" in flags - only_header = "--header-only" in flags - validate_data_only = "--data-only" in flags - +def main(): + parser = argparse.ArgumentParser(description="Parse and validate STEP file.") + parser.add_argument("filename", help="The STEP file to validate.") + parser.add_argument("--progress", action="store_true", help="Show progress during validation.") + parser.add_argument("--json", action="store_true", help="Output errors in JSON format.") + parser.add_argument("--only-header", action="store_true", help="Validate only the header section.") + parser.add_argument("--only-data", action="store_true", help="Validate only the data section.") - # Sanity check: can't use both at once - if only_header and validate_data_only: - print("Cannot use both --header-only and --data-only at the same time", file=sys.stderr) + args = parser.parse_args() + if args.only_header and args.only_data: + print("Cannot use both --only-header and --only-data at the same time", file=sys.stderr) sys.exit(2) - + try: parse( - filename=filename, - with_progress=with_progress, - with_tree=False, - only_header=only_header, - validate_data_only=validate_data_only, + filename=args.filename, + with_progress = args.progress, + with_tree = False, + only_header=args.only_header, + validate_data_only = args.only_data ) - if not json_output: + if not args.json: print("Valid", file=sys.stderr) exit(0) except ValidationError as exc: - if not json_output: + if not args.json: print(exc, file=sys.stderr) else: - import sys - import json - json.dump(exc.asdict(), sys.stdout) exit(1) + +if __name__ == '__main__': + main() From f816e79ec27b6d97521a01d7b5bb1a237f8320b1 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 24 Jun 2025 13:10:47 +0100 Subject: [PATCH 3/5] remove data_only validation (header is preq for syntax check) --- __init__.py | 21 ++------------------- __main__.py | 2 -- test_parser.py | 20 -------------------- 3 files changed, 2 insertions(+), 41 deletions(-) diff --git a/__init__.py b/__init__.py index deced79..02a5d1e 100644 --- a/__init__.py +++ b/__init__.py @@ -334,12 +334,7 @@ def parse( with_tree=True, with_header=False, only_header=False, - validate_data_only=False ): - if validate_data_only: # Used by the Validation Service to validate only the data section of an IFC file, ignoring the header. - only_header = False - with_header = False - with_tree= False if filename: assert not filecontent filecontent = builtins.open(filename, encoding=None).read() @@ -412,20 +407,8 @@ def replace_fn(match): NT = type("NullTransformer", (Transformer,), methods) transformer = {"transformer": NT()} - - if validate_data_only: - match = re.search( - r"DATA\s*;(.*?)ENDSEC\s*;", - filecontent_wo_comments, - flags=re.DOTALL | re.IGNORECASE, - ) - if not match: - raise ValidationError("No DATA section found in file") - filecontent_wo_comments = f"DATA;{match.group(1)}ENDSEC;" - start_rule = "data_section" - else:# Parse entire file (header + data) - start_rule = "file" - parser = Lark(grammar, parser="lalr", start=start_rule, **transformer) + + parser = Lark(grammar, parser="lalr", start="file", **transformer) try: ast = parser.parse(filecontent_wo_comments) diff --git a/__main__.py b/__main__.py index 91ce153..3b34976 100644 --- a/__main__.py +++ b/__main__.py @@ -9,7 +9,6 @@ def main(): parser.add_argument("--progress", action="store_true", help="Show progress during validation.") parser.add_argument("--json", action="store_true", help="Output errors in JSON format.") parser.add_argument("--only-header", action="store_true", help="Validate only the header section.") - parser.add_argument("--only-data", action="store_true", help="Validate only the data section.") args = parser.parse_args() if args.only_header and args.only_data: @@ -22,7 +21,6 @@ def main(): with_progress = args.progress, with_tree = False, only_header=args.only_header, - validate_data_only = args.only_data ) if not args.json: print("Valid", file=sys.stderr) diff --git a/test_parser.py b/test_parser.py index 0953ce6..48f3185 100644 --- a/test_parser.py +++ b/test_parser.py @@ -126,23 +126,3 @@ def test_valid_headers(filename): # error in body; with_header should not raise an error with nullcontext(): parse(filename=filename, with_tree=False, only_header=True, with_header=True) - - -@pytest.mark.parametrize("filename", [ - 'fixtures/fail_invalid_header_entity.ifc', - 'fixtures/fail_no_header.ifc', -]) -def test_invalid_headers_(filename): - # error in header; validate_data_only should not raise an error - with nullcontext(): - parse(filename=filename, validate_data_only=True) - -@pytest.mark.parametrize("filename", [ - 'fixtures/fail_duplicate_id.ifc', - 'fixtures/fail_double_comma.ifc', - 'fixtures/fail_double_semi.ifc' -]) -def test_valid_headers(filename): - # error in body; validate_data_only should raise an error - with pytest.raises(ValidationError): - parse(filename=filename, validate_data_only=True) \ No newline at end of file From 3552f12dde0a86971effa4219ccaea48e8f188a0 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 24 Jun 2025 13:25:32 +0100 Subject: [PATCH 4/5] rm only_data --- __main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/__main__.py b/__main__.py index 3b34976..7dd33e4 100644 --- a/__main__.py +++ b/__main__.py @@ -11,7 +11,7 @@ def main(): parser.add_argument("--only-header", action="store_true", help="Validate only the header section.") args = parser.parse_args() - if args.only_header and args.only_data: + if args.only_header: print("Cannot use both --only-header and --only-data at the same time", file=sys.stderr) sys.exit(2) From 941e03a6e3ed053e532ea672745df6446e5a6f18 Mon Sep 17 00:00:00 2001 From: Geert Hesselink Date: Tue, 24 Jun 2025 13:26:51 +0100 Subject: [PATCH 5/5] rm simultaneous only-data & only-headercheck --- __main__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/__main__.py b/__main__.py index 7dd33e4..7230888 100644 --- a/__main__.py +++ b/__main__.py @@ -11,9 +11,6 @@ def main(): parser.add_argument("--only-header", action="store_true", help="Validate only the header section.") args = parser.parse_args() - if args.only_header: - print("Cannot use both --only-header and --only-data at the same time", file=sys.stderr) - sys.exit(2) try: parse(