Skip to content

Commit 4f8a3c7

Browse files
updating comments
1 parent 2fea419 commit 4f8a3c7

File tree

1 file changed

+8
-11
lines changed

1 file changed

+8
-11
lines changed

orion/merging.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@
2525
# properties where two entities had different values that could not be reconciled.
2626
_dropped_properties = set()
2727

28-
# use this flush pattern so we can emit warnings after each merge, even if there are many
29-
# (ie after each set of node/edge files)
28+
# Emit collected warnings, clear them, and return the collected lists for metadata capture.
3029
def flush_merge_warnings():
3130
mismatched = sorted(_mismatched_dict_properties)
3231
dropped = sorted(_dropped_properties)
@@ -96,19 +95,18 @@ def edge_key_function(edge, custom_key_attributes=None, edge_id_type=None):
9695
def entity_merging_function(entity_1, entity_2):
9796
# for every property of entity 2
9897
for key, entity_2_value in entity_2.items():
99-
# if entity 1 also has the property and entity_2_value is not null/empty:
98+
# if entity 1 also has the property and entity_2_value is not null:
10099
if (key in entity_1) and (entity_2_value is not None):
101100
entity_1_value = entity_1[key]
102101

103-
# check if one or both of them are lists so we can combine them
102+
# classify both values so we can pick the right merge strategy
104103
entity_1_is_list = isinstance(entity_1_value, list)
105104
entity_2_is_list = isinstance(entity_2_value, list)
106105
entity_1_is_dict = isinstance(entity_1_value, dict)
107106
entity_2_is_dict = isinstance(entity_2_value, dict)
108107
if entity_1_is_dict and entity_2_is_dict:
109-
# Dict-shaped biolink slots are id-keyed by construction, so merging by key is
110-
# lossless at the key level. For colliding keys whose values are themselves
111-
# schema-shaped dicts, recurse so nested list/dict/scalar slots merge correctly.
108+
# merge by key; recursively merging entities in dict values,
109+
# truthy-prefer on scalars otherwise keep entity 1's
112110
for sub_key, sub_value in entity_2_value.items():
113111
if sub_key in entity_1_value:
114112
existing_sub_value = entity_1_value[sub_key]
@@ -132,11 +130,10 @@ def entity_merging_function(entity_1, entity_2):
132130
# if 1 is a list and 2 isn't, append the value of 2 to the list from 1
133131
entity_1_value.append(entity_2_value)
134132
elif entity_2_is_list:
133+
# if 2 is a list and 1 is a non-null scalar, prepend 1 into the list from 2
135134
if entity_1_value is not None:
136-
# if 2 is a list and 1 has a value, add the value of 1 to the list from 2
137135
entity_1[key] = [entity_1_value] + entity_2_value
138136
else:
139-
# if 2 is a list and 1 doesn't have a value, just use the list from 2
140137
entity_1[key] = entity_2_value
141138
else:
142139
# scalar/scalar: prefer the truthy value. Falsy values (None, 0, "", False)
@@ -170,8 +167,8 @@ def entity_merging_function(entity_1, entity_2):
170167
entity_1[key] = list(grouped.values())
171168
else:
172169
entity_1[key] = sorted(set(entity_1[key]))
173-
else:
174-
# if entity 1 doesn't have the property, add the property from entity 2
170+
elif key not in entity_1:
171+
# entity 1 doesn't have the property, copy it from entity 2
175172
entity_1[key] = entity_2_value
176173
return entity_1
177174

0 commit comments

Comments
 (0)