Skip to content

Commit d98b22f

Browse files
committed
use getter for inner crate
also prevents directly accessing items listed in subcrate under hasPart e.g subcrate.get("subfile.txt")
1 parent 21556c8 commit d98b22f

File tree

3 files changed

+49
-21
lines changed

3 files changed

+49
-21
lines changed

rocrate/model/entity.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(self, crate, identifier=None, properties=None):
4343
if name.startswith("@"):
4444
self._jsonld[name] = value
4545
else:
46+
# this will call the __setitem__ method defined below
4647
self[name] = value
4748

4849
@property

rocrate/rocrate.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ def dereference(self, entity_id, default=None):
433433
# dont use replace, as it could replace in the middle of the id
434434
entity_id_in_subcrate = entity_id[len(subcrate_entity.id):]
435435

436-
return subcrate_entity.get(entity_id_in_subcrate, default=default)
436+
return subcrate_entity.get_crate().get(entity_id_in_subcrate, default=default)
437437

438438
# fallback
439439
return default
@@ -865,44 +865,69 @@ def __init__(self, crate, source=None, dest_path=None, fetch_remote=False,
865865
super().__init__(crate, source, dest_path, fetch_remote,
866866
validate_url, properties=properties, record_size=record_size)
867867

868-
self._subcrate = None
868+
self._crate = None
869869
"""
870870
A ROCrate instance allowing access to the nested RO-Crate.
871871
The nested RO-Crate is loaded on first access to any of its attribute.
872+
This attribute should not be confused with the crate attribute, which is a reference to the parent crate.
873+
Caller should rather use the get_crate() method to access the nested RO-Crate.
872874
"""
873875

876+
def get_crate(self):
877+
"""
878+
Return the RO-Crate object referenced by this subcrate.
879+
"""
880+
if self._crate is None:
881+
self._load_subcrate()
882+
883+
return self._crate
884+
874885
def _load_subcrate(self):
875886
"""
876887
Load the nested RO-Crate from the source path or URL.
877888
878-
This adds an attribute "hasPart" to the `subcrate` with the entities from the nested RO-Crate,
879-
updating the JSON-LD representation accordingly.
889+
This populates the attribute `hasPart` of the `Subcrate` entity,
890+
with the data entities listed under the `root_dataset["hasPart"]` of the nested RO-Crate.
891+
If the nested RO-crate does not list any part und `hasPart`,
892+
then the `hasPart` attribute of the `Subcrate` entity will be an empty list.
893+
"""
894+
if self._crate is None:
895+
# parse_subcrate=True to load further nested RO-Crate (on-demand / lazily too)
896+
self._crate = ROCrate(self.source, parse_subcrate=True)
897+
898+
# Note : assigning to hasPart keeps only the dict with id:entity not the actual entities
899+
# such that when retrieving something from hasPart one was getting a string not an entity
900+
self["hasPart"] = self._crate.root_dataset.get("hasPart", [])
901+
902+
def _get_parts_subcrate_root(self):
880903
"""
881-
if self._subcrate is None:
882-
self._subcrate = ROCrate(self.source, parse_subcrate=True) # would load further nested RO-Crate
883-
if list_parts := self._subcrate.root_dataset.get("hasPart"):
884-
self._jsonld["hasPart"] = list_parts
904+
Get the list of data entities listed under the `root_dataset["hasPart"]` of the nested RO-Crate.
905+
906+
This will load the nested RO-Crate if not already loaded.
907+
908+
:return: A list of data entities of the nested RO-Crate,
909+
or an empty list if the nested RO-Crate does not list any part.
910+
"""
911+
912+
return self.get_crate().root_dataset.get("hasPart", [])
885913

886914
def __getitem__(self, key):
887-
if self._subcrate is None:
888-
self._load_subcrate()
889915

890-
if key in self._jsonld:
891-
# e.g the "original" entity keys such as id or type
892-
return super().__getitem__(key)
916+
if key == "hasPart":
917+
return self._get_parts_subcrate_root()
893918

894-
# look into the subcrate entities
895-
return self._subcrate.get(key)
919+
else:
920+
return super().__getitem__(key)
896921

897922
def as_jsonld(self):
898-
if self._subcrate is None:
923+
if self._crate is None:
899924
self._load_subcrate()
900925
return super().as_jsonld()
901926

902927
def get_entities(self):
903-
if self._subcrate is None:
928+
if self._crate is None:
904929
self._load_subcrate()
905-
return self._subcrate.get_entities()
930+
return self._crate.get_entities()
906931

907932

908933
def make_workflow_rocrate(workflow_path, wf_type, include_files=[],

test/test_read.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,11 +217,13 @@ def test_crate_with_subcrate(test_data_dir):
217217
assert "hasPart" not in subcrate
218218

219219
# check lazy loading by accessing an entity from the subcrate
220-
subfile_entity = subcrate.get("subfile.txt")
221-
assert isinstance(subfile_entity, model.file.File)
220+
list_subcrate_parts = subcrate.get("hasPart", [])
221+
assert len(list_subcrate_parts) == 2 # subfile.txt and subsubcrate/
222+
assert isinstance(list_subcrate_parts[0], DataEntity)
223+
assert "subfile.txt" in [e.id for e in list_subcrate_parts]
222224

223225
# check access from the top-level crate works too
224-
assert main_crate.get("subcrate/subfile.txt") is subfile_entity
226+
assert main_crate.get("subcrate/subfile.txt") in list_subcrate_parts
225227

226228
# check with another nested rocrate
227229
assert isinstance(main_crate.get("subcrate/subsubcrate/deepfile.txt"), model.file.File)

0 commit comments

Comments
 (0)