Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,19 @@ def parse(
with_progress=False,
with_tree=True,
with_header=False,
only_header=False
only_header=False,
validate_data_only=False
):
if validate_data_only: # Used by the Validation Service to validate only the data section of an IFC file, ignoring the header.
only_header = False
with_header = False
with_tree= False
if filename:
assert not filecontent
filecontent = builtins.open(filename, encoding=None).read()

if only_header:
assert with_header, "'only_header=True' requires 'with_header=True'"
with_header = True

# Match and remove the comments
p = r"/\*[\s\S]*?\*/"
Expand Down Expand Up @@ -407,8 +412,20 @@ def replace_fn(match):

NT = type("NullTransformer", (Transformer,), methods)
transformer = {"transformer": NT()}

parser = Lark(grammar, parser="lalr", start="file", **transformer)

if validate_data_only:
match = re.search(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know... as always the risk with these kind of regexes is that they don't follow the grammar, there can be ENDSEC in a string somewhere or comment. Not likely I know, but still can we do this in another way.

Parsing the header is not time consuming and I guess once we have arrived at this point in the VS we have also established that it's valid, can't we do this in another way, like use the grammar to parse the header (again) to find the exact start offset of the data section. And also don't look for the ENDSEC in the file using the grammar, but just see what the parser tells us that the end of the data section is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm making it too complicated for myself; the idea was that we can still check the header section of a file with an invalid header. However, as you mention, at the moment we check the data section we already established that the header section is valid.

Wouldn't this imply that we can just run simple_spf as before in this case, i.e. without any data-only section? So:

  • For syntax_validation_header, run ifcopenshell.simple_spf file.ifc --header-only --json
  • For syntax_validation_data, run ifcopenshell.simple_spf file.ifc --json

Since an error in the header will block the syntax_validation_data task, we'll know for sure that the error is in the data_section if an error occurs there.

r"DATA\s*;(.*?)ENDSEC\s*;",
filecontent_wo_comments,
flags=re.DOTALL | re.IGNORECASE,
)
if not match:
raise ValidationError("No DATA section found in file")
filecontent_wo_comments = f"DATA;{match.group(1)}ENDSEC;"
start_rule = "data_section"
else:# Parse entire file (header + data)
start_rule = "file"
parser = Lark(grammar, parser="lalr", start=start_rule, **transformer)

try:
ast = parser.parse(filecontent_wo_comments)
Expand Down
25 changes: 21 additions & 4 deletions __main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,33 @@
args = [x for x in sys.argv[1:] if not x.startswith("-")]
flags = [x for x in sys.argv[1:] if x.startswith("-")]

fn = args[0]
filename = args[0]
start_time = time.time()

with_progress = "--progress" in flags
json_output = "--json" in flags
only_header = "--header-only" in flags
validate_data_only = "--data-only" in flags


# Sanity check: can't use both at once
if only_header and validate_data_only:
print("Cannot use both --header-only and --data-only at the same time", file=sys.stderr)
sys.exit(2)

try:
parse(filename=fn, with_progress="--progress" in flags, with_tree=False)
if "--json" not in flags:
parse(
filename=filename,
with_progress=with_progress,
with_tree=False,
only_header=only_header,
validate_data_only=validate_data_only,
)
if not json_output:
print("Valid", file=sys.stderr)
exit(0)
except ValidationError as exc:
if "--json" not in flags:
if not json_output:
print(exc, file=sys.stderr)
else:
import sys
Expand Down
42 changes: 41 additions & 1 deletion test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,4 +105,44 @@ def test_file_mvd_attr():
assert f.mvd.Remark['SomeKey'] == 'SomeValue'
assert len(f.mvd.comments) == 2
assert all(v in vars(f.header).keys() for v in ['file_description', 'file_name', 'file_schema'])
assert len(f.header.file_name) == 7
assert len(f.header.file_name) == 7


@pytest.mark.parametrize("filename", [
'fixtures/fail_invalid_header_entity.ifc',
'fixtures/fail_no_header.ifc',
])
def test_invalid_headers_(filename):
# error in header; with_header should raise an error
with pytest.raises(ValidationError):
parse(filename=filename, with_tree=False, only_header=True, with_header=True)

@pytest.mark.parametrize("filename", [
'fixtures/fail_duplicate_id.ifc',
'fixtures/fail_double_comma.ifc',
'fixtures/fail_double_semi.ifc'
])
def test_valid_headers(filename):
# error in body; with_header should not raise an error
with nullcontext():
parse(filename=filename, with_tree=False, only_header=True, with_header=True)


@pytest.mark.parametrize("filename", [
'fixtures/fail_invalid_header_entity.ifc',
'fixtures/fail_no_header.ifc',
])
def test_invalid_headers_(filename):
# error in header; validate_data_only should not raise an error
with nullcontext():
parse(filename=filename, validate_data_only=True)

@pytest.mark.parametrize("filename", [
'fixtures/fail_duplicate_id.ifc',
'fixtures/fail_double_comma.ifc',
'fixtures/fail_double_semi.ifc'
])
def test_valid_headers(filename):
# error in body; validate_data_only should raise an error
with pytest.raises(ValidationError):
parse(filename=filename, validate_data_only=True)