Skip to content

Commit 46d473c

Browse files
committed
Refactor serialization: Replace UsedLanguage with LanguageVersion, rename SerializedChunk to SerializationChunk, and implement support for Protocol Buffer serialization
1 parent b400b3f commit 46d473c

File tree

12 files changed

+401
-38
lines changed

12 files changed

+401
-38
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,10 @@ And then generate a report with:
6060
```
6161
coverage html
6262
# report generated under htmlcov/index.html
63+
```
64+
65+
## Update Protobuffer classes
66+
67+
```
68+
protoc --proto_path=./src --python_out=./src ./src/lionweb/serialization/proto/Chunk.proto
6369
```

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ platformdirs==4.3.6
5656
pluggy==1.5.0
5757
pre_commit==4.1.0
5858
prompt_toolkit==3.0.50
59+
protobuf==6.32.1
5960
ptyprocess==0.7.0
6061
pure_eval==0.2.3
6162
pydantic==2.10.6
@@ -84,6 +85,7 @@ tqdm==4.67.1
8485
traitlets==5.14.3
8586
trove-classifiers==2025.3.13.13
8687
twine==6.1.0
88+
types-protobuf==6.30.2.20250822
8789
types-requests==2.32.0.20241016
8890
typing_extensions==4.12.2
8991
urllib3==2.3.0
Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .abstract_serialization import AbstractSerialization
2-
from .data import (MetaPointer, SerializedChunk, SerializedClassifierInstance,
2+
from .data import (MetaPointer, SerializationChunk, SerializedClassifierInstance,
33
SerializedContainmentValue, SerializedPropertyValue,
44
SerializedReferenceValue)
55
from .instantiator import InstantiationError
@@ -8,6 +8,7 @@
88
from .serialization_provider import (create_standard_json_serialization,
99
setup_standard_initialization)
1010
from .serialized_json_comparison_utils import SerializedJsonComparisonUtils
11+
#from .protobuf_serialization import ProtobufSerialization
1112

1213
__all__ = [
1314
"AbstractSerialization",
@@ -17,10 +18,10 @@
1718
"setup_standard_initialization",
1819
"SerializedJsonComparisonUtils",
1920
"MetaPointer",
20-
"SerializedChunk",
21+
"SerializationChunk",
2122
"SerializedClassifierInstance",
2223
"SerializedContainmentValue",
2324
"SerializedPropertyValue",
2425
"SerializedReferenceValue",
25-
"LowLevelJsonSerialization",
26+
"LowLevelJsonSerialization"
2627
]

src/lionweb/serialization/abstract_serialization.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from lionweb.model.has_settable_parent import HasSettableParent
77
from lionweb.serialization.classifier_resolver import ClassifierResolver
88
from lionweb.serialization.data.metapointer import MetaPointer
9-
from lionweb.serialization.data.serialized_chunk import SerializedChunk
9+
from lionweb.serialization.data.serialized_chunk import SerializationChunk
1010
from lionweb.serialization.data.serialized_classifier_instance import \
1111
SerializedClassifierInstance
1212
from lionweb.serialization.data.serialized_containment_value import \
@@ -15,7 +15,7 @@
1515
SerializedPropertyValue
1616
from lionweb.serialization.data.serialized_reference_value import (
1717
SerializedReferenceValue, SerializedReferenceValueEntry)
18-
from lionweb.serialization.data.used_language import UsedLanguage
18+
from lionweb.serialization.data.language_version import LanguageVersion
1919
from lionweb.serialization.deserialization_exception import \
2020
DeserializationException
2121
from lionweb.serialization.deserialization_status import DeserializationStatus
@@ -77,7 +77,7 @@ def collect_self_and_descendants(
7777
return collection
7878

7979
def serialize_nodes_to_serialization_chunk(self, classifier_instances):
80-
serialized_chunk = SerializedChunk()
80+
serialized_chunk = SerializationChunk()
8181
serialized_chunk.serialization_format_version = self.lion_web_version.value
8282

8383
for classifier_instance in classifier_instances:
@@ -135,7 +135,7 @@ def serialize_nodes_to_serialization_chunk(self, classifier_instances):
135135

136136
def _consider_language_during_serialization(self, serialized_chunk, language):
137137
self.register_language(language)
138-
used_language = UsedLanguage(language.get_key(), language.get_version())
138+
used_language = LanguageVersion(language.get_key(), language.get_version())
139139
if used_language not in serialized_chunk.languages:
140140
serialized_chunk.languages.append(used_language)
141141

@@ -281,7 +281,7 @@ def _serialize_annotations(
281281
annotation.id for annotation in classifier_instance.get_annotations()
282282
]
283283

284-
def deserialize_serialization_chunk(self, serialized_chunk: SerializedChunk):
284+
def deserialize_serialization_chunk(self, serialized_chunk: SerializationChunk):
285285
serialized_instances = serialized_chunk.classifier_instances
286286
return self._deserialize_classifier_instances(
287287
self.lion_web_version, serialized_instances
@@ -388,7 +388,7 @@ def _deserialize_classifier_instances(
388388
return nodes_with_original_sorting
389389

390390
def _validate_serialization_chunk(
391-
self, serialization_chunk: SerializedChunk
391+
self, serialization_chunk: SerializationChunk
392392
) -> None:
393393
if serialization_chunk is None:
394394
raise ValueError("serialization_chunk should not be null")

src/lionweb/serialization/data/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
from .metapointer import MetaPointer
2-
from .serialized_chunk import SerializedChunk
2+
from .serialized_chunk import SerializationChunk
33
from .serialized_classifier_instance import SerializedClassifierInstance
44
from .serialized_containment_value import SerializedContainmentValue
55
from .serialized_property_value import SerializedPropertyValue
66
from .serialized_reference_value import SerializedReferenceValue
7+
from .language_version import LanguageVersion
78

89
__all__ = [
10+
"LanguageVersion",
911
"MetaPointer",
10-
"SerializedChunk",
12+
"SerializationChunk",
1113
"SerializedClassifierInstance",
1214
"SerializedContainmentValue",
1315
"SerializedPropertyValue",

src/lionweb/serialization/data/used_language.py renamed to src/lionweb/serialization/data/language_version.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Optional
22

33

4-
class UsedLanguage:
4+
class LanguageVersion:
55
"""
66
The pair Language Key and Language Version identify a specific version of a language.
77
Corresponds to the Java class 'UsedLanguage'.
@@ -20,7 +20,7 @@ def from_language(language):
2020
language: An object with `key` and `version` attributes.
2121
2222
Returns:
23-
UsedLanguage: An instance of UsedLanguage.
23+
LanguageVersion: An instance of UsedLanguage.
2424
2525
Raises:
2626
ValueError: If language or its attributes are None.
@@ -29,7 +29,7 @@ def from_language(language):
2929
raise ValueError("Language parameter should not be null")
3030
if language.version is None:
3131
raise ValueError("Language version should not be null")
32-
return UsedLanguage(language.key, language.version)
32+
return LanguageVersion(language.key, language.version)
3333

3434
@staticmethod
3535
def from_meta_pointer(meta_pointer):
@@ -40,7 +40,7 @@ def from_meta_pointer(meta_pointer):
4040
meta_pointer: An object with `language` and `version` attributes.
4141
4242
Returns:
43-
UsedLanguage: An instance of UsedLanguage.
43+
LanguageVersion: An instance of UsedLanguage.
4444
4545
Raises:
4646
ValueError: If meta_pointer or its attributes are None.
@@ -51,7 +51,7 @@ def from_meta_pointer(meta_pointer):
5151
raise ValueError("meta_pointer language should not be null")
5252
if meta_pointer.version is None:
5353
raise ValueError("meta_pointer version should not be null")
54-
return UsedLanguage(meta_pointer.language, meta_pointer.version)
54+
return LanguageVersion(meta_pointer.language, meta_pointer.version)
5555

5656
def get_key(self) -> Optional[str]:
5757
return self.key
@@ -66,7 +66,7 @@ def set_version(self, version: str):
6666
self.version = version
6767

6868
def __eq__(self, other):
69-
if not isinstance(other, UsedLanguage):
69+
if not isinstance(other, LanguageVersion):
7070
return False
7171
return self.key == other.key and self.version == other.version
7272

src/lionweb/serialization/data/metapointer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from typing import TYPE_CHECKING, Optional
33

44

5+
# TODO adopt interning
56
@dataclass(frozen=True, eq=True)
67
class MetaPointer:
78
if TYPE_CHECKING:

src/lionweb/serialization/data/serialized_chunk.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33

44
from lionweb.serialization.data.serialized_classifier_instance import \
55
SerializedClassifierInstance
6-
from lionweb.serialization.data.used_language import UsedLanguage
6+
from lionweb.serialization.data.language_version import LanguageVersion
77

88

99
@dataclass
10-
class SerializedChunk:
10+
class SerializationChunk:
1111
serialization_format_version: str = ""
12-
languages: List[UsedLanguage] = field(default_factory=list)
12+
languages: List[LanguageVersion] = field(default_factory=list)
1313
classifier_instances: List[SerializedClassifierInstance] = field(
1414
default_factory=list
1515
)
@@ -37,7 +37,7 @@ def __str__(self):
3737
)
3838

3939
def __eq__(self, other):
40-
if not isinstance(other, SerializedChunk):
40+
if not isinstance(other, SerializationChunk):
4141
return False
4242
return (
4343
self.serialization_format_version == other.serialization_format_version
@@ -81,6 +81,6 @@ def populate_used_languages(self) -> None:
8181
self._consider_meta_pointer(property_value.get_meta_pointer())
8282

8383
def _consider_meta_pointer(self, meta_pointer):
84-
used_language = UsedLanguage.from_meta_pointer(meta_pointer)
84+
used_language = LanguageVersion.from_meta_pointer(meta_pointer)
8585
if used_language not in self.languages:
8686
self.languages.append(used_language)

src/lionweb/serialization/data/serialized_property_value.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from lionweb.serialization.data.metapointer import MetaPointer
44

5-
5+
# TODO intern values
66
class SerializedPropertyValue:
77
def __init__(self, meta_pointer: MetaPointer, value: Optional[str]):
88
self.meta_pointer = meta_pointer

src/lionweb/serialization/low_level_json_serialization.py

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from lionweb import LionWebVersion
55
from lionweb.serialization.data.metapointer import MetaPointer
6-
from lionweb.serialization.data.serialized_chunk import SerializedChunk
6+
from lionweb.serialization.data.serialized_chunk import SerializationChunk
77
from lionweb.serialization.data.serialized_classifier_instance import \
88
SerializedClassifierInstance
99
from lionweb.serialization.data.serialized_containment_value import \
@@ -12,7 +12,7 @@
1212
SerializedPropertyValue
1313
from lionweb.serialization.data.serialized_reference_value import \
1414
SerializedReferenceValue
15-
from lionweb.serialization.data.used_language import UsedLanguage
15+
from lionweb.serialization.data.language_version import LanguageVersion
1616
from lionweb.serialization.deserialization_exception import \
1717
DeserializationException
1818
from lionweb.serialization.json_utils import JsonArray, JsonElement, JsonObject
@@ -22,8 +22,8 @@
2222
class LowLevelJsonSerialization:
2323
def deserialize_serialization_block(
2424
self, json_element: JsonElement
25-
) -> SerializedChunk:
26-
serialized_chunk = SerializedChunk()
25+
) -> SerializationChunk:
26+
serialized_chunk = SerializationChunk()
2727
if isinstance(json_element, dict):
2828
self._check_no_extra_keys(
2929
json_element, ["nodes", "serializationFormatVersion", "languages"]
@@ -38,7 +38,7 @@ def deserialize_serialization_block(
3838
)
3939

4040
def serialize_to_json_element(
41-
self, serialized_chunk: SerializedChunk
41+
self, serialized_chunk: SerializationChunk
4242
) -> JsonObject:
4343
serialized_nodes = []
4444
for node in serialized_chunk.get_classifier_instances():
@@ -97,7 +97,7 @@ def serialize_to_json_element(
9797
}
9898

9999
def _serialize_language_to_json_element(
100-
self, language_key_version: UsedLanguage
100+
self, language_key_version: LanguageVersion
101101
) -> JsonObject:
102102
json_object = {
103103
"key": language_key_version.get_key(),
@@ -114,7 +114,7 @@ def _serialize_metapointer_to_json_element(
114114
"key": meta_pointer.key,
115115
}
116116

117-
def serialize_to_json_string(self, serialized_chunk: SerializedChunk) -> str:
117+
def serialize_to_json_string(self, serialized_chunk: SerializationChunk) -> str:
118118
return json.dumps(
119119
self.serialize_to_json_element(serialized_chunk),
120120
indent=2,
@@ -123,7 +123,7 @@ def serialize_to_json_string(self, serialized_chunk: SerializedChunk) -> str:
123123
def deserialize_serialization_block_from_string(
124124
self,
125125
json_string: str,
126-
) -> SerializedChunk:
126+
) -> SerializationChunk:
127127
try:
128128
json_element = json.loads(json_string)
129129
return self.deserialize_serialization_block(json_element)
@@ -132,7 +132,7 @@ def deserialize_serialization_block_from_string(
132132

133133
def deserialize_serialization_block_from_file(
134134
self, file_path: str
135-
) -> SerializedChunk:
135+
) -> SerializationChunk:
136136
try:
137137
with open(file_path, "r") as file:
138138
json_element = json.load(file)
@@ -153,7 +153,7 @@ def _check_no_extra_keys(
153153
)
154154

155155
def _read_serialization_format_version(
156-
self, serialized_chunk: SerializedChunk, top_level: JsonObject
156+
self, serialized_chunk: SerializationChunk, top_level: JsonObject
157157
) -> None:
158158
if "serializationFormatVersion" not in top_level:
159159
raise ValueError("serializationFormatVersion not specified")
@@ -173,24 +173,24 @@ def require_is_string(value, desc: str):
173173
def group_nodes_into_serialization_block(
174174
serialized_classifier_instances: Iterable[SerializedClassifierInstance],
175175
lion_web_version: LionWebVersion,
176-
) -> SerializedChunk:
177-
serialized_chunk = SerializedChunk()
176+
) -> SerializationChunk:
177+
serialized_chunk = SerializationChunk()
178178
serialized_chunk.serialization_format_version = lion_web_version.value
179179
for sci in serialized_classifier_instances:
180180
serialized_chunk.add_classifier_instance(sci)
181181
serialized_chunk.populate_used_languages()
182182
return serialized_chunk
183183

184184
def _read_languages(
185-
self, serialized_chunk: SerializedChunk, top_level: JsonObject
185+
self, serialized_chunk: SerializationChunk, top_level: JsonObject
186186
) -> None:
187187
if "languages" not in top_level:
188188
raise ValueError("languages not specified")
189189
languages = top_level.get("languages")
190190
if isinstance(languages, list):
191191
for element in languages:
192192
try:
193-
language_key_version = UsedLanguage()
193+
language_key_version = LanguageVersion()
194194
if isinstance(element, dict):
195195
extra_keys = set(element.keys()) - {"key", "version"}
196196
if extra_keys:
@@ -220,7 +220,7 @@ def _read_languages(
220220
raise ValueError(f"We expected a list, we got instead: {languages}")
221221

222222
def _deserialize_classifier_instances(
223-
self, serialized_chunk: SerializedChunk, top_level: JsonObject
223+
self, serialized_chunk: SerializationChunk, top_level: JsonObject
224224
) -> None:
225225
if "nodes" not in top_level:
226226
raise ValueError("nodes not specified")

0 commit comments

Comments
 (0)