From bbfdb2a5a4218793155af37e66f8d515382c3d91 Mon Sep 17 00:00:00 2001 From: khituras Date: Sat, 10 Dec 2022 17:38:43 +0100 Subject: [PATCH] Release 3.1.0. Update dependency versions. --- .../pom.xml | 2 +- .../datarepresentation/ImportConcept.java | 16 ++- .../constants/ConceptConstants.java | 6 + julielab-neo4j-plugins-concepts/pom.xml | 2 +- .../de/julielab/neo4j/plugins/Export.java | 71 ++++++----- .../concepts/ConceptAggregateManager.java | 112 +++++++++++------- .../plugins/concepts/ConceptInsertion.java | 71 ++++++++--- .../plugins/concepts/ConceptManager.java | 2 +- julielab-neo4j-plugins-utilities/pom.xml | 2 +- pom.xml | 10 +- 10 files changed, 188 insertions(+), 106 deletions(-) diff --git a/julielab-neo4j-plugins-concepts-representation/pom.xml b/julielab-neo4j-plugins-concepts-representation/pom.xml index bb83ff0..b176b6a 100644 --- a/julielab-neo4j-plugins-concepts-representation/pom.xml +++ b/julielab-neo4j-plugins-concepts-representation/pom.xml @@ -4,7 +4,7 @@ de.julielab julielab-neo4j-server-plugins - 3.1.0-SNAPSHOT + 3.1.0 ../pom.xml julielab-neo4j-plugins-concepts-representation diff --git a/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/ImportConcept.java b/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/ImportConcept.java index 960fab0..1c884f5 100644 --- a/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/ImportConcept.java +++ b/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/ImportConcept.java @@ -18,6 +18,8 @@ public class ImportConcept { public List writingVariants = Collections.emptyList(); @JsonProperty(ConceptConstants.COORDINATES) public ConceptCoordinates coordinates; + @JsonProperty(ConceptConstants.ADDITIONAL_COORDINATES) + public List additionalCoordinates = Collections.emptyList(); @JsonProperty(ConceptConstants.PARENT_COORDINATES) public List parentCoordinates = Collections.emptyList(); @JsonProperty(ConceptConstants.RELATIONSHIPS) @@ -38,7 +40,9 @@ public class ImportConcept { @JsonProperty(ConceptConstants.AGGREGATE_INCLUDE_IN_HIERARCHY) public boolean aggregateIncludeInHierarchy; @JsonProperty(ConceptConstants.ADDITIONAL_PROPERTIES) - public Map additionalProperties; + public Map additionalProperties = Collections.emptyMap(); + @JsonProperty(ConceptConstants.ELIGIBLE_FOR_FACET_ROOT) + public boolean eligibleForFacetRoot = true; /** @@ -211,5 +215,15 @@ public boolean hasParents() { return !parentCoordinates.isEmpty(); } + public void addAdditionalCoordinates(ConceptCoordinates additionalCoordinates) { + if (this.additionalCoordinates.isEmpty()) + this.additionalCoordinates = new ArrayList<>(); + this.additionalCoordinates.add(additionalCoordinates); + } + public void addAdditionalProperty(String property, Object value) { + if (this.additionalProperties.isEmpty()) + this.additionalProperties = new HashMap<>(); + this.additionalProperties.put(property, value); + } } diff --git a/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/constants/ConceptConstants.java b/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/constants/ConceptConstants.java index f25f675..5dbfcb6 100644 --- a/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/constants/ConceptConstants.java +++ b/julielab-neo4j-plugins-concepts-representation/src/main/java/de/julielab/neo4j/plugins/datarepresentation/constants/ConceptConstants.java @@ -107,4 +107,10 @@ public class ConceptConstants extends NodeConstants { public static final String COORDINATES = "coordinates"; + public static final String ADDITIONAL_COORDINATES = "additional_coordinates"; + + /** + * Whether a ImportConcept can be connected to its ImportFacet via a HAS_ROOT_CONCEPT relationship. Useful to switch off when importing concepts with one facet that would rather belong to another one but that is not imported at all or later. + */ + public static final String ELIGIBLE_FOR_FACET_ROOT = "eligible_for_facet_root"; } diff --git a/julielab-neo4j-plugins-concepts/pom.xml b/julielab-neo4j-plugins-concepts/pom.xml index 9617d45..7e31696 100644 --- a/julielab-neo4j-plugins-concepts/pom.xml +++ b/julielab-neo4j-plugins-concepts/pom.xml @@ -4,7 +4,7 @@ de.julielab julielab-neo4j-server-plugins - 3.1.0-SNAPSHOT + 3.1.0 ../pom.xml julielab-neo4j-plugins-concepts diff --git a/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/Export.java b/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/Export.java index cf6f533..87db870 100644 --- a/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/Export.java +++ b/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/Export.java @@ -48,8 +48,10 @@ public class Export { public static final String LINGPIPE_DICT = "lingpipe_dictionary"; public static final String CONCEPT_TO_FACET = "concept_facet_map"; public static final String CONCEPT_ID_MAPPING = "concept_id_mapping"; + public static final String PARAM_UNIQUE_KEYS = "unique_keys"; public static final String PARAM_SOURCE_ID_PROPERTY = "source_id_property"; public static final String PARAM_TARGET_ID_PROPERTY = "target_id_property"; + public static final String PARAM_FACET_NAMES = "facet_names"; public static final String PARAM_LABELS = "labels"; public static final String PARAM_LABEL = "label"; public static final String PARAM_EXCLUSION_LABEL = "exclusion_label"; @@ -153,18 +155,18 @@ private void createIdMapping(OutputStream os, String sourceIdProperty, String ta @Produces(MediaType.APPLICATION_JSON) @javax.ws.rs.Path(HYPERNYMS) public Object exportHypernyms( - @QueryParam(PARAM_LABELS) String facetLabelStrings, + @QueryParam(PARAM_FACET_NAMES) String facetNames, @QueryParam(PARAM_LABEL) String conceptLabel, @Context Log log) throws Exception { ObjectMapper om = new ObjectMapper(); - String[] labelsArray = null != facetLabelStrings ? om.readValue(facetLabelStrings, String[].class) : null; - if (null == labelsArray) + String[] facetNameArray = null != facetNames ? om.readValue(facetNames, String[].class) : null; + if (null == facetNameArray) log.info("Exporting hypernyms dictionary data for all facets."); else - log.info("Exporting hypernyms dictionary data for the facets with labels " + Arrays.toString(labelsArray) + "."); + log.info("Exporting hypernyms dictionary data for the facets with names " + Arrays.toString(facetNameArray) + "."); return (StreamingOutput) output -> { try { - writeHypernymList(labelsArray, conceptLabel, output); + writeHypernymList(facetNameArray, conceptLabel, output); } catch (Exception e) { log.error("Exception occurred during concept ID output streaming.", e); e.printStackTrace(); @@ -172,16 +174,12 @@ public Object exportHypernyms( }; } - private void writeHypernymList(String[] labelsArray, - String termLabelString, OutputStream output) throws IOException { + private void writeHypernymList(String[] facetNames, + String conceptLabelString, OutputStream output) throws IOException { - String[] labels = labelsArray; - if (null == labels) { - labels = new String[]{FacetManager.FacetLabel.FACET.name()}; - } - Label termLabel = null; - if (!StringUtils.isBlank(termLabelString)) - termLabel = Label.label(termLabelString); + Label conceptLabel = null; + if (!StringUtils.isBlank(conceptLabelString)) + conceptLabel = Label.label(conceptLabelString); Map> cache = new HashMap<>(Export.HYPERNYMS_CACHE_SIZE); @@ -197,15 +195,10 @@ private void writeHypernymList(String[] labelsArray, List relationshipTypeList = new ArrayList<>(); // Only create the specific facet IDs set when we have not just // all facets - if (labels.length > 1 || !labels[0].equals(FacetManager.FacetLabel.FACET.name())) { - for (String labelString : labels) { - Label label = Label.label(labelString); - ResourceIterable facets = () -> tx.findNodes(label); + if (facetNames != null && facetNames.length > 1 || !facetNames[0].equals("all")) { + for (String facetName : facetNames) { + ResourceIterable facets = () -> tx.findNodes(FacetManager.FacetLabel.FACET, FacetConstants.PROP_NAME, facetName); for (Node facet : facets) { - if (!facet.hasLabel(FacetManager.FacetLabel.FACET)) - throw new IllegalArgumentException("Label node " + facet + " with the label " + label - + " is no facet since it does not have the " + FacetManager.FacetLabel.FACET - + " label."); String facetId = (String) facet.getProperty(FacetConstants.PROP_ID); RelationshipType reltype = RelationshipType .withName(ConceptEdgeTypes.IS_BROADER_THAN + "_" + facetId); @@ -216,17 +209,16 @@ private void writeHypernymList(String[] labelsArray, relationshipTypeList.add(ConceptEdgeTypes.IS_BROADER_THAN); } - for (String labelString : labels) { - Label label = Label.label(labelString); - log.info("Now creating hypernyms for facets with label " + label); - ResourceIterable facets = () -> tx.findNodes(label); + for (String facetName : facetNames) { + log.info("Now creating hypernyms for facet with name " + facetName); + ResourceIterable facets = () -> tx.findNodes(FacetManager.FacetLabel.FACET, FacetConstants.PROP_NAME, facetName); Set visitedNodes = new HashSet<>(); for (Node facet : facets) { Iterable rels = facet.getRelationships(Direction.OUTGOING, ConceptEdgeTypes.HAS_ROOT_CONCEPT); for (Relationship rel : rels) { Node rootTerm = rel.getEndNode(); - if (null != termLabel && !rootTerm.hasLabel(termLabel)) + if (null != conceptLabel && !rootTerm.hasLabel(conceptLabel)) continue; writeHypernyms(rootTerm, visitedNodes, cache, output, relationshipTypeList.toArray(new RelationshipType[0])); @@ -292,6 +284,18 @@ private void writeHypernyms(Node n, Set visitedNodes, MapProduces a dictionary/mapping from concept node names - preferred name, synonyms and, if added to the database, connected acronym node names - to their concept ID ([at]id[0-9]+).

+ *

The mapping is a text string that consists of one entry per line, name and conceptId are separated by a tab character. While this format can be used for a number of purposes, + * it specifically fits the format used by the JCoRe Lingpipe Gazetteer component.

+ * @param labelsString One or multiple labels that identify the sets of nodes to process for dictionary creation. Lists of labels must be in JSON format. The labels are processed in the specified order. This is important if uniqueKeys is enabled. + * @param exclusionLabelString One or multiple labels that serve as a node filter. Nodes having one of those labels will be skipped from dictionary creation. + * @param nodeCategories The node properties that should be the keys of the dictionary. Separate multiple properties with commas. There are restrictions regarding the types of the properties. They must either all be non-array values or all are arrays of the same length. For multiple properties, a single mapping-target string is created with "||" as a value separator. In case of array values, the string first lists all first elements, then the second elements, then the third elements etc. + * @param uniqueKeys Determines if keys may occur multiple times or should be unique. In case of uniqueness, the labelsString becomes important: the first occurrence of a key will be included in the output, subsequent occurrences will be discarded. + * @param log + * @return The dictionary text string GZIP-compressed and Base64-ASCII-encoded. + * @throws IOException + */ @GET @Produces(MediaType.TEXT_PLAIN) @javax.ws.rs.Path(LINGPIPE_DICT) @@ -299,6 +303,7 @@ public String exportLingpipeDictionary( @QueryParam(PARAM_LABELS) String labelsString, @QueryParam(PARAM_EXCLUSION_LABEL) String exclusionLabelString, @QueryParam(PARAM_SOURCE_ID_PROPERTY) String nodeCategories, + @QueryParam(PARAM_UNIQUE_KEYS) boolean uniqueKeys, @Context Log log) throws IOException { final ObjectMapper om = new ObjectMapper(); @@ -333,6 +338,7 @@ public String exportLingpipeDictionary( ByteArrayOutputStream baos = new ByteArrayOutputStream(OUTPUTSTREAM_INIT_SIZE); GraphDatabaseService graphDb = dbms.database(DEFAULT_DATABASE_NAME); + Set writtenKeys = uniqueKeys ? new HashSet<>() : null; try (GZIPOutputStream os = new GZIPOutputStream(baos)) { for (Label label : labels) { try (Transaction tx = graphDb.beginTx()) { @@ -409,14 +415,14 @@ public String exportLingpipeDictionary( // writingVariants = (String[]) term // .getProperty(PROP_WRITING_VARIANTS); - writeNormalizedDictionaryEntry(preferredName, categoryString, os); + writeNormalizedDictionaryEntry(preferredName, categoryString, writtenKeys, os); for (String synonString : synonyms) - writeNormalizedDictionaryEntry(synonString, categoryString, os); + writeNormalizedDictionaryEntry(synonString, categoryString, writtenKeys, os); TraversalDescription acronymsTraversal = PredefinedTraversals.getAcronymsTraversal(tx); Traverser traverse = acronymsTraversal.traverse(term); for (Node acronymNode : traverse.nodes()) { String acronym = (String) acronymNode.getProperty(MorphoConstants.PROP_NAME); - writeNormalizedDictionaryEntry(acronym, categoryString, os); + writeNormalizedDictionaryEntry(acronym, categoryString, writtenKeys, os); } // for (String variant : writingVariants) // writeNormalizedDictionaryEntry(variant, @@ -434,10 +440,11 @@ public String exportLingpipeDictionary( return Base64.getEncoder().encodeToString(bytes); } - private void writeNormalizedDictionaryEntry(String name, String termId, OutputStream os) throws IOException { + private void writeNormalizedDictionaryEntry(String name, String termId, Set writtenKeys, OutputStream os) throws IOException { String normalizedName = StringUtils.normalizeSpace(name); - if (normalizedName.length() > 2) + if (normalizedName.length() > 2 && (writtenKeys == null || writtenKeys.add(normalizedName))) { IOUtils.write(normalizedName + "\t" + termId + "\n", os, "UTF-8"); + } } @GET diff --git a/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/concepts/ConceptAggregateManager.java b/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/concepts/ConceptAggregateManager.java index a3a3e31..f579acc 100644 --- a/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/concepts/ConceptAggregateManager.java +++ b/julielab-neo4j-plugins-concepts/src/main/java/de/julielab/neo4j/plugins/concepts/ConceptAggregateManager.java @@ -25,6 +25,7 @@ import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import java.io.IOException; import java.util.*; import java.util.function.Predicate; import java.util.stream.Collectors; @@ -42,6 +43,8 @@ import static de.julielab.neo4j.plugins.datarepresentation.constants.ConceptConstants.*; import static org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME; + + @Path("/" + ConceptAggregateManager.CAM_REST_ENDPOINT) public class ConceptAggregateManager { @@ -86,7 +89,7 @@ public ConceptAggregateManager(@Context DatabaseManagementService dbms) { * aggregate and does not have to be present in which case nothing will be * copied. The copy process will NOT be done in this method call but must be * triggered manually via - * {@link #copyAggregateProperties(Log)}. + * {@link #copyAggregateProperties()}. * * @param tx The current transaction. * @param jsonConcept The aggregate encoded into JSON format. @@ -187,60 +190,84 @@ static void insertAggregateConcept(Transaction tx, ImportConcept jsonConcept, * Aggregates terms that have equal preferred name and synonyms, after some * minor normalization. * - * @param tx The graph database to work on. + * @param graphDb The graph database to work on. * @param nodeLabels * @param aggregatedLabels * @param copyProperties * @return */ - public static int buildAggregatesForEqualNames(Transaction tx, List