Skip to content

Commit

Permalink
Allow the specification of aggregate copy properties for equal name a…
Browse files Browse the repository at this point in the history
…gg creation and make them more efficient.

More efficiency by accumulating the new properties in a in-memory map instead of writing to the node all the time. I am actually not sure that this helps since transactions are made in memory anyway. But it seemed to improve things a bit. The actual issue was a missing index, however. Since the copy property process accessed the id property of AGGREGATE nodes, the corresponding index is tremendously helpful.
  • Loading branch information
khituras committed Nov 29, 2022
1 parent 42950b9 commit 56dd818
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 59 deletions.
1 change: 1 addition & 0 deletions julielab-neo4j-plugins-concepts-representation/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<groupId>de.julielab</groupId>
<artifactId>julielab-neo4j-server-plugins</artifactId>
<version>3.1.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,13 @@ public static Node mergeConceptNodesWithUniqueSourceId(Transaction tx, String sr

// ----- merging of synonyms
String[] conceptSynonyms = (String[]) conceptNode.getProperty(PROP_SYNONYMS);
mergeArrayProperty(firstNode, ConceptConstants.PROP_SYNONYMS, conceptSynonyms, true);
mergeArrayProperty(firstNode, ConceptConstants.PROP_SYNONYMS, conceptSynonyms);
if (addConceptPrefToSynonyms)
addToArrayProperty(firstNode, PROP_SYNONYMS, conceptPrefName);

// ----- merging of facets
String[] conceptFacets = (String[]) conceptNode.getProperty(PROP_FACETS);
mergeArrayProperty(firstNode, PROP_FACETS, conceptFacets, true);
mergeArrayProperty(firstNode, PROP_FACETS, conceptFacets);

// ----- merging labels
Iterable<Label> labels = conceptNode.getLabels();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static de.julielab.neo4j.plugins.auxiliaries.PropertyUtilities.*;
import static de.julielab.neo4j.plugins.auxiliaries.PropertyUtilities.getNonNullNodeProperty;
import static de.julielab.neo4j.plugins.auxiliaries.PropertyUtilities.mergeArrayValue;
import static de.julielab.neo4j.plugins.concepts.ConceptInsertion.registerNewHollowConceptNode;
import static de.julielab.neo4j.plugins.concepts.ConceptLabel.AGGREGATE;
import static de.julielab.neo4j.plugins.concepts.ConceptLabel.*;
Expand All @@ -54,6 +55,7 @@ public class ConceptAggregateManager {
public static final String KEY_AGGREGATED_LABELS = "aggregated_labels";
public static final String KEY_SKIP_EXISTING_PROPERTIES = "skip_existing_properties";
public static final String KEY_ALLOWED_MAPPING_TYPES = "allowedMappingTypes";
public static final String KEY_COPY_PROPERTIES = "copy_properties";
public static final String RET_KEY_NUM_AGGREGATES = "numAggregates";
public static final String RET_KEY_NUM_ELEMENTS = "numElements";
public static final String RET_KEY_NUM_PROPERTIES = "numProperties";
Expand Down Expand Up @@ -188,9 +190,10 @@ static void insertAggregateConcept(Transaction tx, ImportConcept jsonConcept,
* @param tx The graph database to work on.
* @param nodeLabels
* @param aggregatedLabels
* @param copyProperties
* @return
*/
public static int buildAggregatesForEqualNames(Transaction tx, List<Label> nodeLabels, List<Label> aggregatedLabels, Log log) {
public static int buildAggregatesForEqualNames(Transaction tx, List<Label> nodeLabels, List<Label> aggregatedLabels, String[] copyProperties, Log log) {
int createdAggregates = 0;
Comparator<Node> nodeNameComparator = Comparator.comparing(n -> ((String) n.getProperty(PROP_PREF_NAME)).toLowerCase().replaceAll("\\s+", ""));

Expand All @@ -209,7 +212,7 @@ public static int buildAggregatesForEqualNames(Transaction tx, List<Label> nodeL
nodes.sort(nodeNameComparator);
log.info("Sorting of nodes by name is done.");

String[] copyProperties = new String[]{PROP_PREF_NAME, PROP_SYNONYMS};

List<Node> equalNameNodes = new ArrayList<>();
log.info("Creating equal-name aggregates for labels %s with labels %s", nodeLabels, aggregatedLabels);
Label[] aggregatedLabelsArray = aggregatedLabels.toArray(Label[]::new);
Expand Down Expand Up @@ -535,14 +538,14 @@ private static void createAggregate(Transaction tx, String[] copyProperties, Set
*
* @param aggregate The aggregate node to assembly element properties to.
* @param skipExistingProperties
* @param copyProperties The properties that should be copied into the aggregate.
* @param copyProperties The properties that should be copied into the aggregate.
* @param copyStats An object to collect statistics over the copy process.
*/
public static void copyAggregateProperties(Node aggregate, boolean skipExistingProperties, String[] copyProperties,
CopyAggregatePropertiesStatistics copyStats) {
String[] unskippedProperties = copyProperties;
// first, clear the properties be copied in case we make a refresh
if (!skipExistingProperties) {
if (skipExistingProperties) {
unskippedProperties = Arrays.stream(copyProperties).filter(Predicate.not(aggregate::hasProperty)).toArray(String[]::new);
}
for (String copyProperty : unskippedProperties) {
Expand All @@ -554,6 +557,12 @@ public static void copyAggregateProperties(Node aggregate, boolean skipExistingP
// values.
Set<String> divergentProperties = new HashSet<>();
// For each element...
Map<String, Object> newAggregateProperties = new HashMap<>();
// Collect existing property values on the aggregate
for (String copyProperty : unskippedProperties) {
if (aggregate.hasProperty(copyProperty))
newAggregateProperties.put(copyProperty, aggregate.getProperties(copyProperty));
}
for (Relationship elementRel : elementRels) {
Node term = elementRel.getEndNode();
if (null != copyStats)
Expand All @@ -570,10 +579,15 @@ public static void copyAggregateProperties(Node aggregate, boolean skipExistingP
copyStats.numProperties++;
Object property = term.getProperty(copyProperty);
if (property.getClass().isArray()) {
mergeArrayProperty(aggregate, copyProperty, JulieNeo4jUtilities.convertArray(property), true);
final Object[] mergedValue = mergeArrayValue(newAggregateProperties.getOrDefault(copyProperty, null), JulieNeo4jUtilities.convertArray(property));
// TODO
// aggregate.setProperty(copyProperty, mergedValue);
newAggregateProperties.put(copyProperty, mergedValue);
} else {
setNonNullNodeProperty(aggregate, copyProperty, property);
Object aggregateProperty = getNonNullNodeProperty(aggregate, copyProperty);
if (!newAggregateProperties.containsKey(copyProperty))
newAggregateProperties.put(copyProperty, property);
// setNonNullNodeProperty(aggregate, copyProperty, property);
Object aggregateProperty = newAggregateProperties.get(copyProperty);
if (!aggregateProperty.equals(property)) {
divergentProperties.add(copyProperty);
}
Expand Down Expand Up @@ -606,14 +620,18 @@ public static void copyAggregateProperties(Node aggregate, boolean skipExistingP
}

// Set the majority value to the aggregate.
aggregate.setProperty(divergentProperty, majorityValue);
// aggregate.setProperty(divergentProperty, majorityValue);
newAggregateProperties.put(divergentProperty, majorityValue);
// Set the minority values to the aggregate as a special property.
for (Object propertyValue : propertyValues.elementSet()) {
if (!propertyValue.equals(majorityValue)) {
Object[] convert = JulieNeo4jUtilities.convertElementsIntoArray(propertyValue.getClass(),
propertyValue);
mergeArrayProperty(aggregate,
divergentProperty + AggregateConstants.SUFFIX_DIVERGENT_ELEMENT_ROPERTY, convert, true);
final String divergentKey = divergentProperty + AggregateConstants.SUFFIX_DIVERGENT_ELEMENT_ROPERTY;
final Object[] mergedValue = mergeArrayValue(newAggregateProperties.getOrDefault(divergentKey, null), convert);
newAggregateProperties.put(divergentKey, mergedValue);
// mergeArrayProperty(aggregate,
// divergentKey, convert);
}
}
}
Expand All @@ -622,13 +640,17 @@ public static void copyAggregateProperties(Node aggregate, boolean skipExistingP
// already resolved by a majority
// vote above. We now additionally merge the minority names to the
// synonyms.
mergeArrayProperty(aggregate, PROP_SYNONYMS,
(Object[]) getNonNullNodeProperty(aggregate,
PROP_PREF_NAME + AggregateConstants.SUFFIX_DIVERGENT_ELEMENT_ROPERTY), true);
final String divergentPrefnameKey = PROP_PREF_NAME + AggregateConstants.SUFFIX_DIVERGENT_ELEMENT_ROPERTY;
final Object[] synonymsWithDivergentPrefNames = mergeArrayValue(newAggregateProperties.getOrDefault(PROP_SYNONYMS, null), (Object[]) newAggregateProperties.get(divergentPrefnameKey));
if (synonymsWithDivergentPrefNames != null)
newAggregateProperties.put(PROP_SYNONYMS, synonymsWithDivergentPrefNames);
// mergeArrayProperty(aggregate, PROP_SYNONYMS,
// (Object[]) getNonNullNodeProperty(aggregate,
// divergentPrefnameKey));

// As a last step, remove duplicate synonyms, case ignored
if (aggregate.hasProperty(PROP_SYNONYMS)) {
String[] synonyms = (String[]) aggregate.getProperty(PROP_SYNONYMS);
if (newAggregateProperties.containsKey(PROP_SYNONYMS)) {
String[] synonyms = (String[]) newAggregateProperties.get(PROP_SYNONYMS);
Set<String> lowerCaseSynonyms = new HashSet<>();
List<String> acceptedSynonyms = new ArrayList<>();
for (String synonym : synonyms) {
Expand All @@ -639,9 +661,13 @@ public static void copyAggregateProperties(Node aggregate, boolean skipExistingP
}
}
Collections.sort(acceptedSynonyms);
aggregate.setProperty(PROP_SYNONYMS,
newAggregateProperties.put(PROP_SYNONYMS,
acceptedSynonyms.toArray(new String[0]));
}

for (String copyProperty : newAggregateProperties.keySet()) {
aggregate.setProperty(copyProperty, newAggregateProperties.get(copyProperty));
}
}

/**
Expand Down Expand Up @@ -698,13 +724,16 @@ public Response buildAggregatesByPreferredName(String jsonParameterObject, @Cont
List<Label> aggregatedLabels = List.of(AGGREGATE_EQUAL_NAMES);
if (parameterMap.containsKey(KEY_AGGREGATED_LABELS))
aggregatedLabels = ((List<String>) parameterMap.get(KEY_AGGREGATED_LABELS)).stream().map(Label::label).collect(Collectors.toList());
List<String> copyProperties = List.of(PROP_PREF_NAME, PROP_SYNONYMS);
if (parameterMap.containsKey(KEY_COPY_PROPERTIES))
copyProperties = ((List<String>) parameterMap.get(KEY_COPY_PROPERTIES));
List<Label> targetLabels = ((List<String>) parameterMap.get(KEY_LABELS)).stream().map(Label::label).collect(Collectors.toList());
log.info("Creating equal-name-aggregates for concepts with label %s and assigning them label", targetLabels, aggregatedLabels);
GraphDatabaseService graphDb = dbms.database(DEFAULT_DATABASE_NAME);
int createdAggregates;
log.info("Beginning transaction for the creation of equal-name aggregates.");
try (Transaction tx = graphDb.beginTx()) {
createdAggregates = ConceptAggregateManager.buildAggregatesForEqualNames(tx, targetLabels, aggregatedLabels, log);
createdAggregates = ConceptAggregateManager.buildAggregatesForEqualNames(tx, targetLabels, aggregatedLabels, copyProperties.toArray(String[]::new), log);
log.info("Committing transaction for the creation of equal-name aggregates.");
tx.commit();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ else if (concept == null)
mergeArrayProperty(concept, PROP_DESCRIPTIONS, () -> jsonConcept.descriptions.toArray(new String[0]));
mergeArrayProperty(concept, PROP_WRITING_VARIANTS, () -> jsonConcept.writingVariants.toArray(new String[0]));
mergeArrayProperty(concept, PROP_COPY_PROPERTIES, () -> jsonConcept.copyProperties.toArray(new String[0]));
mergeArrayProperty(concept, PROP_SYNONYMS, synonyms.stream().filter(s -> !s.equals(prefName)).toArray(), true);
mergeArrayProperty(concept, PROP_SYNONYMS, synonyms.stream().filter(s -> !s.equals(prefName)).toArray());
addToArrayProperty(concept, PROP_FACETS, facetId);
if (jsonConcept.additionalProperties != null) {
for (String property : jsonConcept.additionalProperties.keySet()) {
Expand Down
2 changes: 1 addition & 1 deletion julielab-neo4j-plugins-utilities/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<groupId>de.julielab</groupId>
<artifactId>julielab-neo4j-server-plugins</artifactId>
<version>3.1.0-SNAPSHOT</version>
<relativePath>..</relativePath>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>julielab-neo4j-plugins-utilities</artifactId>
<name>JULIE Lab Neo4j Server Plugin Utilities</name>
Expand Down
Loading

0 comments on commit 56dd818

Please sign in to comment.