Skip to content

Commit bacaf30

Browse files
authored
Update to work with new array schema and LoL pydantic generator (#14)
1 parent 2659925 commit bacaf30

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+2042
-3806
lines changed

Diff for: .gitignore

+3-2
Original file line numberDiff line numberDiff line change
@@ -129,5 +129,6 @@ dmypy.json
129129
.pyre/
130130

131131
.DS_Store
132-
/my_temperature.zarr
133-
/my_temperature.h5
132+
/out/*
133+
/my_container.h5
134+
/my_container.zarr

Diff for: README.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ Support for loading and dumping N-dimensional arrays in LinkML.
55
# Quick reference for common commands
66

77
```bash
8-
cd linkml-model
9-
poetry run gen-json-schema tests/input/examples/schema_definition-array-2.yaml
10-
poetry run gen-pydantic tests/input/examples/schema_definition-array-2.yaml
8+
poetry run gen-pydantic tests/input/temperature_schema.yaml > tests/array_classes_lol.py
119
```
1210

1311
# Acknowledgements

Diff for: docs/conf.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import os
77
from datetime import date
8+
89
from linkml_arrays import __version__
910

1011
# -- Project information -----------------------------------------------------
@@ -24,7 +25,7 @@
2425
"sphinx_rtd_theme",
2526
"sphinx_click",
2627
# "sphinx_autodoc_typehints",
27-
"myst_parser"
28+
"myst_parser",
2829
]
2930

3031
# generate autosummary pages

Diff for: my_temperature.DaySeries.values.h5

-2.02 KB
Binary file not shown.

Diff for: my_temperature.DaySeries.values.npy

-152 Bytes
Binary file not shown.

Diff for: my_temperature.LatitudeSeries.values.h5

-2.02 KB
Binary file not shown.

Diff for: my_temperature.LatitudeSeries.values.npy

-152 Bytes
Binary file not shown.

Diff for: my_temperature.LongitudeSeries.values.h5

-2.02 KB
Binary file not shown.

Diff for: my_temperature.LongitudeSeries.values.npy

-152 Bytes
Binary file not shown.

Diff for: my_temperature.TemperatureMatrix.values.h5

-2.21 KB
Binary file not shown.

Diff for: my_temperature.TemperatureMatrix.values.npy

-344 Bytes
Binary file not shown.

Diff for: poetry.lock

+780-2,847
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: pyproject.toml

+31-31
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,50 @@
11
[tool.poetry]
22
name = "linkml-arrays"
3-
version = "0.0.0"
3+
version = "0.1.0"
44
description = "linkml-arrays"
5-
authors = ["Ryan Ly <[email protected]>"]
5+
authors = [
6+
"Ryan Ly <[email protected]>",
7+
"Chris Mungall <[email protected]>",
8+
]
69
license = "BSD-3"
710
readme = "README.md"
811

912
[tool.poetry.dependencies]
1013
python = "^3.9"
11-
linkml-runtime = ">=1.7.0"
14+
linkml-runtime = ">=1.8.0"
1215
numpy = ">=1.24.3"
1316
h5py = ">=3.9.0"
1417
zarr = ">=2.16.1"
15-
nptyping = ">=2.5.0"
1618
xarray = "^2024.1.1"
17-
tox = "^3.25.1" # TODO move out of main deps
19+
ruamel-yaml = "^0.18.6"
20+
importlib_metadata = "*"
1821

1922
[tool.poetry.dev-dependencies]
20-
pytest = "^7.1.2"
21-
sphinx = {version = "^5.3.0", extras = ["docs"]}
22-
sphinx-rtd-theme = {version = "^1.0.0", extras = ["docs"]}
23-
# sphinx-autodoc-typehints = {version = "^1.19.4", extras = ["docs"]}
24-
sphinx-click = {version = "^4.3.0", extras = ["docs"]}
25-
myst-parser = {version = "^0.18.1", extras = ["docs"]}
26-
jupyter = {version = "*", extras = ["jupyter"]}
27-
28-
[tool.poetry.scripts]
29-
linkml-arrays = "linkml_arrays.cli:main"
23+
pytest = "*"
24+
tox = "*"
25+
# sphinx = {version = "*", extras = ["docs"]}
26+
# sphinx-rtd-theme = {version = "^1.0.0", extras = ["docs"]}
27+
# # sphinx-autodoc-typehints = {version = "^1.19.4", extras = ["docs"]}
28+
# sphinx-click = {version = "^4.3.0", extras = ["docs"]}
29+
# myst-parser = {version = "*", extras = ["docs"]}
30+
# jupyter = {version = "*", extras = ["jupyter"]}
3031

31-
[tool.poetry.extras]
32-
docs = [
33-
"sphinx",
34-
"sphinx-rtd-theme",
35-
# "sphinx-autodoc-typehints",
36-
"sphinx-click",
37-
"myst-parser"
38-
]
39-
jupyter = [
40-
"jupyter"
41-
]
32+
# [tool.poetry.extras]
33+
# docs = [
34+
# "sphinx",
35+
# "sphinx-rtd-theme",
36+
# "sphinx-autodoc-typehints",
37+
# "sphinx-click",
38+
# "myst-parser"
39+
# ]
40+
# jupyter = [
41+
# "jupyter"
42+
# ]
4243

43-
[tool.poetry.group.dev.dependencies]
44-
black = "^24.1.1"
45-
pytest = "^7.1.2"
46-
mypy = "^1.8.0"
44+
# [tool.poetry.group.dev.dependencies]
45+
# black = "^24.1.1"
46+
# pytest = "^7.1.2"
47+
# mypy = "^1.8.0"
4748

4849
[tool.poetry-dynamic-versioning]
4950
enable = true
@@ -52,7 +53,6 @@ style = "pep440"
5253

5354
[tool.black]
5455
line-length = 100
55-
target-version = ["py38", "py39", "py310"]
5656

5757
[tool.isort]
5858
profile = "black"

Diff for: src/linkml_arrays/cli.py

-44
This file was deleted.

Diff for: src/linkml_arrays/dumpers/hdf5_dumper.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Class for dumping a LinkML model to an HDF5 file."""
22

3+
from pathlib import Path
34
from typing import Union
45

56
import h5py
@@ -14,15 +15,15 @@ def _iterate_element(
1415
):
1516
"""Recursively iterate through the elements of a LinkML model and save them.
1617
17-
Writes Pydantic BaseModel objects as groups, slots that implement "linkml:elements"
18+
Write Pydantic BaseModel objects as groups, slots with the "array" element
1819
as datasets, and other slots as attributes.
1920
"""
2021
# get the type of the element
2122
element_type = type(element).__name__
2223

2324
for k, v in vars(element).items():
2425
found_slot = schemaview.induced_slot(k, element_type)
25-
if "linkml:elements" in found_slot.implements:
26+
if found_slot.array:
2627
# save the numpy array to an hdf5 dataset
2728
group.create_dataset(found_slot.name, data=v)
2829
else:
@@ -39,16 +40,13 @@ class Hdf5Dumper(Dumper):
3940
"""Dumper class for LinkML models to HDF5 files."""
4041

4142
# TODO is this the right method to overwrite? it does not dump a string
42-
def dumps(self, element: Union[YAMLRoot, BaseModel], schemaview: SchemaView, **kwargs):
43-
"""Dump the element to an HDF5 file.
44-
45-
Raises:
46-
ValueError: If the class requires an identifier and it is not provided.
47-
"""
48-
id_slot = schemaview.get_identifier_slot(element.__class__.__name__)
49-
if id_slot is None:
50-
raise ValueError("The class requires an identifier.")
51-
id_value = getattr(element, id_slot.name)
52-
output_file_path = f"{id_value}.h5"
43+
def dumps(
44+
self,
45+
element: Union[YAMLRoot, BaseModel],
46+
schemaview: SchemaView,
47+
output_file_path: Union[str, Path],
48+
**kwargs,
49+
):
50+
"""Dump the element to an HDF5 file."""
5351
with h5py.File(output_file_path, "w") as f:
5452
_iterate_element(element, schemaview, f)

Diff for: src/linkml_arrays/dumpers/yaml_array_file_dumper.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
"""Base class for dumping a LinkML model to YAML with paths to files containing arrays."""
2+
3+
import os
4+
from abc import ABCMeta, abstractmethod
5+
from collections.abc import Callable
6+
from pathlib import Path
7+
from typing import List, Optional, Union
8+
9+
import numpy as np
10+
import yaml
11+
from linkml_runtime import SchemaView
12+
from linkml_runtime.dumpers.dumper_root import Dumper
13+
from linkml_runtime.utils.yamlutils import YAMLRoot
14+
from pydantic import BaseModel
15+
16+
17+
def _iterate_element(
18+
element: Union[YAMLRoot, BaseModel],
19+
schemaview: SchemaView,
20+
output_dir: Path,
21+
write_array: Callable,
22+
format: str,
23+
parent_identifier=None,
24+
inlined_name=None,
25+
):
26+
"""Recursively iterate through the elements of a LinkML model and save them.
27+
28+
Return a dictionary with the same structure as the input element, but where the slots
29+
with the "array" element are written to an array file and the paths to these
30+
files are returned in the dictionary. The paths are relative to the output directory.
31+
32+
Raises:
33+
ValueError: If the class requires an identifier and it is not provided.
34+
"""
35+
# get the type of the element
36+
element_type = type(element).__name__
37+
38+
# ask schemaview whether it has a class by this name
39+
found_class = schemaview.get_class(element_type)
40+
41+
id_slot = schemaview.get_identifier_slot(found_class.name)
42+
if id_slot is not None:
43+
id_value = getattr(element, id_slot.name)
44+
else:
45+
id_value = None
46+
47+
ret_dict = dict()
48+
for k, v in vars(element).items():
49+
found_slot = schemaview.induced_slot(k, element_type)
50+
if found_slot.array:
51+
if id_slot is None and parent_identifier is None:
52+
raise ValueError("The class requires an identifier.")
53+
54+
# determine the output file name without the suffix
55+
if id_slot is not None:
56+
output_file_name = f"{id_value}.{found_slot.name}"
57+
elif inlined_name is not None:
58+
output_file_name = f"{parent_identifier}.{inlined_name}.{found_slot.name}"
59+
elif parent_identifier is not None:
60+
output_file_name = f"{parent_identifier}.{found_slot.name}"
61+
else:
62+
output_file_name = f"{found_slot.name}"
63+
64+
# if output_dir is absolute, make it relative to current working directory
65+
# and create the directory if it does not exist
66+
if output_dir.is_absolute():
67+
output_dir = Path(os.path.relpath(output_dir, start=os.getcwd()))
68+
output_dir.mkdir(exist_ok=True)
69+
output_file_path_no_suffix = output_dir / output_file_name
70+
71+
# save the numpy array to file and write the file path to the dictionary
72+
output_file_path = write_array(v, output_file_path_no_suffix)
73+
ret_dict[k] = {
74+
"source": [
75+
{
76+
"file": f"./{output_file_path}",
77+
"format": format,
78+
}
79+
]
80+
}
81+
else:
82+
if isinstance(v, BaseModel):
83+
v2 = _iterate_element(
84+
v,
85+
schemaview,
86+
output_dir,
87+
write_array,
88+
format,
89+
id_value,
90+
inlined_name=found_slot.name,
91+
)
92+
ret_dict[k] = v2
93+
else:
94+
ret_dict[k] = v
95+
return ret_dict
96+
97+
98+
class YamlArrayFileDumper(Dumper, metaclass=ABCMeta):
99+
"""Base dumper class for LinkML models to YAML files with paths to array files."""
100+
101+
# FORMAT is a class attribute that must be set by subclasses
102+
103+
def dumps(
104+
self,
105+
element: Union[YAMLRoot, BaseModel],
106+
schemaview: SchemaView,
107+
output_dir: Optional[Union[str, Path]] = None,
108+
**kwargs,
109+
) -> str:
110+
"""Return element formatted as a YAML string."""
111+
if output_dir is None:
112+
output_dir = "."
113+
input = _iterate_element(
114+
element, schemaview, Path(output_dir), self.write_array, self.FORMAT
115+
)
116+
117+
return yaml.dump(input)
118+
119+
@classmethod
120+
@abstractmethod
121+
def write_array(cls, array: Union[List, np.ndarray], output_file_path: Union[str, Path]):
122+
"""Write an array to a file."""
123+
raise NotImplementedError("Subclasses must implement this method.")

Diff for: src/linkml_arrays/dumpers/yaml_dumper.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Class for dumpling a LinkML model to a YAML file."""
1+
"""Class for dumping a LinkML model to YAML."""
22

33
from typing import Union
44

@@ -14,8 +14,8 @@ def _iterate_element(
1414
):
1515
"""Recursively iterate through the elements of a LinkML model and save them.
1616
17-
Returns a dictionary with the same structure as the input element, but with the slots
18-
that implement "linkml:elements" (arrays) are written as lists or lists of lists.
17+
Returns a dictionary with the same structure as the input element, but where the slots
18+
with the "array" element are written as lists of lists in YAML.
1919
2020
Raises:
2121
ValueError: If the class requires an identifier and it is not provided.
@@ -35,10 +35,11 @@ def _iterate_element(
3535
ret_dict = dict()
3636
for k, v in vars(element).items():
3737
found_slot = schemaview.induced_slot(k, element_type)
38-
if "linkml:elements" in found_slot.implements:
38+
if found_slot.array:
3939
if id_slot is None and parent_identifier is None:
4040
raise ValueError("The class requires an identifier.")
41-
ret_dict[k] = v.tolist()
41+
assert isinstance(v, list)
42+
ret_dict[k] = v
4243
else:
4344
if isinstance(v, BaseModel):
4445
v2 = _iterate_element(v, schemaview, id_value)

0 commit comments

Comments
 (0)