From 6c5e8c8bf9d6d1f650c8df81d769d610599fc77f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sat, 30 May 2020 11:09:52 +0200
Subject: [PATCH 001/269] Cord19 reader: Adding html element to `TabFigRef`.
 Seems to be new in Cord19.

---
 .../jcore/reader/cord19/jsonformat/TabFigRef.java        | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java
index bfe873c48..0e7794322 100644
--- a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java
+++ b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java
@@ -19,6 +19,15 @@ public class TabFigRef {
     private String text;
     private String type;
     private String latex;
+    private String html;
+
+    public String getHtml() {
+        return html;
+    }
+
+    public void setHtml(String html) {
+        this.html = html;
+    }
 
     public String getLatex() {
         return latex;

From 2d227284d91cbc7cff473e59f44ffdd506b04708 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 8 Jun 2020 17:07:58 +0200
Subject: [PATCH 002/269] Added first untested version of the Neo4j to
 relations consumer.

---
 .../reader/cord19/jsonformat/TabFigRef.java   |   1 +
 jcore-neo4j-relations-consumer/LICENSE        |  26 ++
 jcore-neo4j-relations-consumer/README.md      |  34 ++
 jcore-neo4j-relations-consumer/pom.xml        |  58 ++++
 .../Neo4jRelationsConsumer.java               | 254 ++++++++++++++
 .../consumer/neo4jrelations/desc/PLACEHOLDER  |   1 +
 .../desc/jcore-neo4j-relations-consumer.xml   |  21 ++
 ...Neo4jRelationsConsumerIntegrationTest.java |  31 ++
 .../Neo4jRelationsConsumerTest.java           |  28 ++
 pom.xml                                       | 327 ++++++++++++------
 10 files changed, 672 insertions(+), 109 deletions(-)
 create mode 100644 jcore-neo4j-relations-consumer/LICENSE
 create mode 100644 jcore-neo4j-relations-consumer/README.md
 create mode 100644 jcore-neo4j-relations-consumer/pom.xml
 create mode 100644 jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
 create mode 100644 jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER
 create mode 100644 jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
 create mode 100644 jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
 create mode 100644 jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java

diff --git a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java
index 0e7794322..d35bc534e 100644
--- a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java
+++ b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/jsonformat/TabFigRef.java
@@ -22,6 +22,7 @@ public class TabFigRef {
     private String html;
 
     public String getHtml() {
+
         return html;
     }
 
diff --git a/jcore-neo4j-relations-consumer/LICENSE b/jcore-neo4j-relations-consumer/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-neo4j-relations-consumer/README.md b/jcore-neo4j-relations-consumer/README.md
new file mode 100644
index 000000000..7b8a2a0a9
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/README.md
@@ -0,0 +1,34 @@
+# JCoRe Neo4j Relations Consumer
+
+**Descriptor Path**:
+```
+de.julielab.jcore.consumer.neo4jrelations.desc.jcore-neo4j-relations-consumer
+```
+
+Writes EventMentions to Neo4j.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
new file mode 100644
index 000000000..e83c89ced
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-neo4j-relations-consumer</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.3.0-SNAPSHOT</version>
+    </parent>
+
+    <version>2.3.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
+            <version>3.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.neo4j.test</groupId>
+            <artifactId>neo4j-harness</artifactId>
+            <version>4.0.4</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-neo4j-plugins-concepts</artifactId>
+            <version>3.0.0-SNAPSHOT</version>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+    <name>JCoRe Neo4j Relations Consumer</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-neo4j-relations-consumer</url>
+    <description>Writes EventMentions to Neo4j.</description>
+</project>
diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
new file mode 100644
index 000000000..4c4670d97
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -0,0 +1,254 @@
+package de.julielab.jcore.consumer.neo4jrelations;
+
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonGenerator;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.HashMultiset;
+import com.google.common.collect.Multiset;
+import de.julielab.jcore.types.ConceptMention;
+import de.julielab.jcore.types.ResourceEntry;
+import de.julielab.jcore.types.ext.FlattenedRelation;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIERelation;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationArgument;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationDocument;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIETypedRelations;
+import de.julielab.neo4j.plugins.datarepresentation.constants.ImportIERelations;
+import org.apache.commons.io.IOUtils;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.ws.rs.HttpMethod;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URI;
+import java.net.URL;
+import java.util.*;
+import java.util.stream.StreamSupport;
+
+@ResourceMetaData(name = "JCoRe Neo4j Relations Consumer", description = "This component assumes that a Neo4j server with an installed julieliab-neo4j-plugins-concepts plugin installed. It then sends FlattenedRelation instances with more then one arguments to Neo4j. Note that this requires the event arguments to have a ResourceEntry list to obtain database concept IDs from.", vendor = "JULIE Lab, Germany", copyright = "JULIE Lab", version = "2.6.0-SNAPSHOT")
+@TypeCapability(inputs = {"de.julielab.jcore.types.EventMention"})
+public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
+
+    public static final String PARAM_URL = "URL";
+    public static final String PARAM_ID_PROPERTY = "IdProperty";
+    public static final String PARAM_SOURCE = "ConceptSource";
+    private final static Logger log = LoggerFactory.getLogger(Neo4jRelationsConsumer.class);
+    @ConfigurationParameter(name = PARAM_URL, description = "The complete URL to the endpoint of the Neo4j server for relation insertion.")
+    private String url;
+    @ConfigurationParameter(name = PARAM_ID_PROPERTY, description = "The ID property to look up concept nodes in the Neo4j graph. Common options are 'id', 'sourceIds' and 'originalId'. You must know to which ID type the ResourceEntry objects of the relation arguments refer to.")
+    private String idProperty;
+    @ConfigurationParameter(name = PARAM_SOURCE, mandatory = false, description = "Optional. Sets the global source for the concept IDs taken from the ResourceEntry instances of the relation arguments. This causes the 'source' feature of the ResourceEntry objects to be omitted and to globally use the specified source instead. This causes the Neo4j database plugin to resolve the provided argument IDs against the source specified here.")
+    private String globalSource;
+
+    private ImportIERelations importIERelations;
+    private ObjectMapper om;
+
+    /**
+     * This method is called a single time by the framework at component
+     * creation. Here, descriptor parameters are read and initial setup is done.
+     */
+    @Override
+    public void initialize(final UimaContext aContext) throws ResourceInitializationException {
+        url = (String) aContext.getConfigParameterValue(PARAM_URL);
+        idProperty = (String) aContext.getConfigParameterValue(PARAM_ID_PROPERTY);
+        globalSource = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_SOURCE)).orElse(null);
+        om = new ObjectMapper();
+        om.setSerializationInclusion(JsonInclude.Include.NON_NULL);
+        om.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
+    }
+
+    private void initImportRelations() {
+        importIERelations = globalSource != null ? new ImportIERelations(idProperty, globalSource) : new ImportIERelations(idProperty);
+    }
+
+    /**
+     * This method is called for each document going through the component. This
+     * is where the actual work happens.
+     */
+    @Override
+    public void process(final JCas aJCas) {
+        importIERelations.addRelationDocument(convertRelations(aJCas));
+    }
+
+    private ImportIERelationDocument convertRelations(JCas aJCas) {
+        Map<String, Multiset<UnificationRelation>> relationCounts = getEquivalentRelationGroups(aJCas);
+        ImportIERelationDocument relDoc = new ImportIERelationDocument();
+        ImportIETypedRelations typedRelations = new ImportIETypedRelations();
+        for (String relationType : relationCounts.keySet()) {
+            Multiset<UnificationRelation> unificationRelations = relationCounts.get(relationType);
+            List<ImportIERelation> ieRelations4relationType = new ArrayList<>();
+            for (UnificationRelation rel : unificationRelations) {
+                ieRelations4relationType.add(rel.toImportRelation(unificationRelations.count(rel)));
+            }
+        }
+        relDoc.setRelations(typedRelations);
+        return relDoc;
+    }
+
+    @Override
+    public void batchProcessComplete() throws AnalysisEngineProcessException {
+        super.batchProcessComplete();
+        sendRelationsToNeo4j();
+    }
+
+    @Override
+    public void collectionProcessComplete() throws AnalysisEngineProcessException {
+        super.collectionProcessComplete();
+        sendRelationsToNeo4j();
+    }
+
+    private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
+        try {
+            URL url = URI.create(this.url).toURL();
+            HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
+            urlConnection.setRequestMethod(HttpMethod.POST);
+            urlConnection.setDoOutput(true);
+            try (OutputStream outputStream = urlConnection.getOutputStream()) {
+                JsonFactory jf = new JsonFactory(om);
+                JsonGenerator g = jf.createGenerator(outputStream);
+                g.writeStartObject();
+                g.writeObjectField(ImportIERelations.NAME_ID_PROPERTY, idProperty);
+                g.writeObjectField(ImportIERelations.NAME_ID_SOURCE, globalSource);
+
+                List<ImportIERelationDocument> documents = importIERelations.getDocuments();
+                g.writeFieldName(ImportIERelations.NAME_DOCUMENTS);
+                g.writeStartArray();
+                for (ImportIERelationDocument document : (Iterable<ImportIERelationDocument>) documents::iterator) {
+                    g.writeObject(document);
+                }
+                g.writeEndArray();
+                g.writeEndObject();
+                g.close();
+            }
+            try (InputStream inputStream = urlConnection.getInputStream()) {
+                log.debug("Response from Neo4j: {}", IOUtils.toString(inputStream));
+            }
+            importIERelations.clear();
+        } catch (IOException e) {
+            log.error("Could not send relations to Neo4j", e);
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
+
+    /**
+     * <p>Iterates through the FlattenedRelations in the JCas and creates an intermediate representation that is primarily meant to group relations together that are basically the same. Then we can just count them instead of sending duplicates to the server.</p>
+     *
+     * @param aJCas The JCas to get relations from.
+     * @return The grouped relations.
+     */
+    private Map<String, Multiset<UnificationRelation>> getEquivalentRelationGroups(JCas aJCas) {
+        Map<String, Multiset<UnificationRelation>> relationCounts = new HashMap<>();
+        for (FlattenedRelation fr : aJCas.<FlattenedRelation>getAnnotationIndex(FlattenedRelation.type)) {
+            Iterator<ConceptMention> cmIt = StreamSupport.stream(fr.getArguments().spliterator(), false).map(ConceptMention.class::cast).iterator();
+            Set<UnificationArgument> unificationArgs = new HashSet<>();
+            while (cmIt.hasNext()) {
+                ConceptMention cm = cmIt.next();
+                FSArray resourceEntryList = cm.getResourceEntryList();
+                if (resourceEntryList != null) {
+                    ResourceEntry resourceEntry = (ResourceEntry) resourceEntryList.get(0);
+                    String id = resourceEntry.getEntryId();
+                    String source = resourceEntry.getSource();
+                    if (globalSource == null)
+                        unificationArgs.add(new UnificationArgument(id, source));
+                    else
+                        unificationArgs.add(new UnificationArgument(id));
+                }
+            }
+            if (unificationArgs.size() > 1) {
+                UnificationRelation rel = new UnificationRelation(fr.getRootRelation().getSpecificType(), unificationArgs);
+                relationCounts.compute(rel.getRelationType(), (k, v) -> v != null ? v : HashMultiset.create()).add(rel);
+            }
+        }
+        return relationCounts;
+    }
+
+    private class UnificationRelation {
+        private String relationType;
+        private Set<UnificationArgument> args;
+
+        public UnificationRelation(String relationType, Set<UnificationArgument> args) {
+            this.relationType = relationType;
+            this.args = args;
+        }
+
+        public ImportIERelation toImportRelation(int count) {
+            return ImportIERelation.of(count, (Iterable<ImportIERelationArgument>) args.stream().map(UnificationArgument::toImportArgument).iterator());
+        }
+
+        public String getRelationType() {
+            return relationType;
+        }
+
+        public Set<UnificationArgument> getArgs() {
+            return args;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            UnificationRelation that = (UnificationRelation) o;
+            return relationType.equals(that.relationType) &&
+                    args.equals(that.args);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(relationType, args);
+        }
+    }
+
+    private class UnificationArgument {
+        private String id;
+        private String source;
+
+        public UnificationArgument(String id) {
+            this.id = id;
+        }
+
+        public UnificationArgument(String id, String source) {
+            this.id = id;
+            this.source = source;
+        }
+
+        public ImportIERelationArgument toImportArgument() {
+            return source != null ? ImportIERelationArgument.of(id, source) : ImportIERelationArgument.of(id);
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o) return true;
+            if (o == null || getClass() != o.getClass()) return false;
+            UnificationArgument that = (UnificationArgument) o;
+            return id.equals(that.id) &&
+                    Objects.equals(source, that.source);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(id, source);
+        }
+
+        public String getId() {
+            return id;
+        }
+
+        public String getSource() {
+            return source;
+        }
+    }
+
+
+}
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER
new file mode 100644
index 000000000..9f6c6ddb5
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER
@@ -0,0 +1 @@
+The actual descriptor must be created by UIMA fit.
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
new file mode 100644
index 000000000..a0eadea2f
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>Neo4jRelationsConsumer</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>JCoRe Neo4j Relations Consumer</name>
+    <description/>
+    <version>2.3.0-SNAPSHOT</version>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription/>
+    <capabilities/>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
new file mode 100644
index 000000000..6c853ecdd
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
@@ -0,0 +1,31 @@
+
+package de.julielab.jcore.consumer.neo4jrelations;
+
+import de.julielab.neo4j.plugins.Indexes;
+import de.julielab.neo4j.plugins.concepts.ConceptManager;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.junit.Rule;
+import org.junit.Test;
+import org.neo4j.harness.junit.rule.Neo4jRule;
+
+
+/**
+ * Unit tests for jcore-neo4j-relations-consumer.
+ *
+ */
+public class Neo4jRelationsConsumerIntegrationTest {
+    @Rule
+    public Neo4jRule neo4j = new Neo4jRule()
+            .withUnmanagedExtension("/concepts", ConceptManager.class).withFixture(graphDatabaseService -> {
+                new Indexes(null).createIndexes(graphDatabaseService);
+                return null;
+            });
+
+    @Test
+    public void insertEventMentions() throws UIMAException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+
+    }
+}
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
new file mode 100644
index 000000000..41d24b178
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
@@ -0,0 +1,28 @@
+
+package de.julielab.jcore.consumer.neo4jrelations;
+
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.junit.Test;
+
+import java.io.IOException;
+
+
+/**
+ * Unit tests for jcore-neo4j-relations-consumer.
+ *
+ */
+public class Neo4jRelationsConsumerTest {
+
+
+    @Test
+    public void insertEventMentions() throws UIMAException, IOException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.neo4jrelations.desc.jcore-neo4j-relations-consumer", Neo4jRelationsConsumer.PARAM_URL, "");
+
+
+    }
+}
diff --git a/pom.xml b/pom.xml
index 5687e86e0..274a990dd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,112 +1,221 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-    <parent>
-        <groupId>de.julielab</groupId>
-        <artifactId>jcore-parent</artifactId>
-        <version>2.5.1</version>
-    </parent>
-    <artifactId>jcore-base</artifactId>
-    <packaging>pom</packaging>
-    <name>JCoRe Base</name>
-    <description>The POM for the JCoRe Base projects.</description>
-    <version>2.5.1-SNAPSHOT</version>
-    <organization>
-        <name>JULIE Lab, Germany</name>
-        <url>http://www.julielab.de</url>
-    </organization>
-    <licenses>
-        <license>
-            <name>BSD-2-Clause</name>
-            <url>https://opensource.org/licenses/BSD-2-Clause</url>
-        </license>
-    </licenses>
-    <url>https://github.com/JULIELab/jcore-base</url>
-    <dependencies>
-        <dependency>
-            <groupId>org.apache.uima</groupId>
-            <artifactId>uimaj-core</artifactId>
-            <version>${uima-version}</version>
-        </dependency>
-        <dependency>
-            <groupId>org.apache.uima</groupId>
-            <artifactId>uimafit-core</artifactId>
-            <version>${uimafit-version}</version>
-        </dependency>
-    </dependencies>
-    <modules>
-        <module>jcore-ace-reader</module>
-        <module>jcore-acronym-ae</module>
-        <module>jcore-banner-ae</module>
-        <module>jcore-biolemmatizer-ae</module>
-        <module>jcore-bionlpformat-consumer</module>
-        <module>jcore-bionlpformat-reader</module>
-        <module>jcore-biosem-ae</module>
-        <module>jcore-conll-consumer</module>
-        <module>jcore-coordination-baseline-ae</module>
-        <module>jcore-ct-reader</module>
-        <module>jcore-descriptor-creator</module>
-        <module>jcore-dta-reader</module>
-        <module>jcore-ec-code-ae</module>
-        <module>jcore-elasticsearch-consumer</module>
-        <module>jcore-embedding-writer</module>
-        <module>jcore-event-flattener-ae</module>
-        <module>jcore-feature-value-replacement-ae</module>
-        <module>jcore-file-reader</module>
-        <module>jcore-flair-ner-ae</module>
-        <module>jcore-iexml-consumer</module>
-        <module>jcore-iexml-reader</module>
-        <module>jcore-ign-reader</module>
-        <module>jcore-iob-consumer</module>
-        <module>jcore-jnet-ae</module>
-        <module>jcore-jpos-ae</module>
-        <module>jcore-jsbd-ae</module>
-        <module>jcore-jtbd-ae</module>
-        <module>jcore-julielab-entity-evaluator-consumer</module>
-        <module>jcore-likelihood-assignment-ae</module>
-        <module>jcore-likelihood-detection-ae</module>
-        <module>jcore-lingpipegazetteer-ae</module>
-        <module>jcore-lingpipe-porterstemmer-ae</module>
-        <module>jcore-lingscope-ae</module>
-        <module>jcore-linnaeus-species-ae</module>
-        <module>jcore-mantra-xml-types</module>
-        <module>jcore-medxn-ae</module>
-        <module>jcore-msdoc-reader</module>
-        <module>jcore-mstparser-ae</module>
-        <module>jcore-muc7-reader</module>
-        <module>jcore-mutationfinder-ae</module>
-        <module>jcore-opennlp-chunk-ae</module>
-        <module>jcore-opennlp-parser-ae</module>
-        <module>jcore-opennlp-postag-ae</module>
-        <module>jcore-opennlp-sentence-ae</module>
-        <module>jcore-opennlp-token-ae</module>
-        <module>jcore-pmc-reader</module>
-        <module>jcore-pubtator-reader</module>
-        <module>jcore-stanford-lemmatizer-ae</module>
-        <module>jcore-topic-indexing-ae</module>
-        <module>jcore-topics-writer</module>
-        <module>jcore-txt-consumer</module>
-        <module>jcore-types</module>
-        <module>jcore-utilities</module>
-        <module>jcore-xml-mapper</module>
-        <module>jcore-xml-reader</module>
-        <module>jcore-xmi-reader</module>
-        <module>jcore-xmi-writer</module>
-        <module>jedis-parent</module>
-        <module>jcore-db-checkpoint-ae</module>
-        <module>jcore-ppd-writer</module>
-        <module>jcore-bc2gmformat-writer</module>
-        <module>jcore-bc2gm-reader</module>
-        <module>jcore-annotation-adder-ae</module>
-        <module>jcore-flair-token-embedding-ae</module>
-        <module>jcore-line-multiplier</module>
-        <module>jcore-cord19-reader</module>
-    </modules>
-    <scm>
-        <connection>scm:git:https://github.com/JULIELab/jcore-base
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+      
+  <modelVersion>4.0.0</modelVersion>
+      
+  <parent>
+            
+    <groupId>de.julielab</groupId>
+            
+    <artifactId>jcore-parent</artifactId>
+            
+    <version>2.5.1</version>
+        
+  </parent>
+      
+  <artifactId>jcore-base</artifactId>
+      
+  <packaging>pom</packaging>
+      
+  <name>JCoRe Base</name>
+      
+  <description>The POM for the JCoRe Base projects.</description>
+      
+  <version>2.5.1-SNAPSHOT</version>
+      
+  <organization>
+            
+    <name>JULIE Lab, Germany</name>
+            
+    <url>http://www.julielab.de</url>
+        
+  </organization>
+      
+  <licenses>
+            
+    <license>
+                  
+      <name>BSD-2-Clause</name>
+                  
+      <url>https://opensource.org/licenses/BSD-2-Clause</url>
+              
+    </license>
+        
+  </licenses>
+      
+  <url>https://github.com/JULIELab/jcore-base</url>
+      
+  <dependencies>
+            
+    <dependency>
+                  
+      <groupId>org.apache.uima</groupId>
+                  
+      <artifactId>uimaj-core</artifactId>
+                  
+      <version>${uima-version}</version>
+              
+    </dependency>
+            
+    <dependency>
+                  
+      <groupId>org.apache.uima</groupId>
+                  
+      <artifactId>uimafit-core</artifactId>
+                  
+      <version>${uimafit-version}</version>
+              
+    </dependency>
+        
+  </dependencies>
+      
+  <modules>
+            
+    <module>jcore-ace-reader</module>
+            
+    <module>jcore-acronym-ae</module>
+            
+    <module>jcore-banner-ae</module>
+            
+    <module>jcore-biolemmatizer-ae</module>
+            
+    <module>jcore-bionlpformat-consumer</module>
+            
+    <module>jcore-bionlpformat-reader</module>
+            
+    <module>jcore-biosem-ae</module>
+            
+    <module>jcore-conll-consumer</module>
+            
+    <module>jcore-coordination-baseline-ae</module>
+            
+    <module>jcore-ct-reader</module>
+            
+    <module>jcore-descriptor-creator</module>
+            
+    <module>jcore-dta-reader</module>
+            
+    <module>jcore-ec-code-ae</module>
+            
+    <module>jcore-elasticsearch-consumer</module>
+            
+    <module>jcore-embedding-writer</module>
+            
+    <module>jcore-event-flattener-ae</module>
+            
+    <module>jcore-feature-value-replacement-ae</module>
+            
+    <module>jcore-file-reader</module>
+            
+    <module>jcore-flair-ner-ae</module>
+            
+    <module>jcore-iexml-consumer</module>
+            
+    <module>jcore-iexml-reader</module>
+            
+    <module>jcore-ign-reader</module>
+            
+    <module>jcore-iob-consumer</module>
+            
+    <module>jcore-jnet-ae</module>
+            
+    <module>jcore-jpos-ae</module>
+            
+    <module>jcore-jsbd-ae</module>
+            
+    <module>jcore-jtbd-ae</module>
+            
+    <module>jcore-julielab-entity-evaluator-consumer</module>
+            
+    <module>jcore-likelihood-assignment-ae</module>
+            
+    <module>jcore-likelihood-detection-ae</module>
+            
+    <module>jcore-lingpipegazetteer-ae</module>
+            
+    <module>jcore-lingpipe-porterstemmer-ae</module>
+            
+    <module>jcore-lingscope-ae</module>
+            
+    <module>jcore-linnaeus-species-ae</module>
+            
+    <module>jcore-mantra-xml-types</module>
+            
+    <module>jcore-medxn-ae</module>
+            
+    <module>jcore-msdoc-reader</module>
+            
+    <module>jcore-mstparser-ae</module>
+            
+    <module>jcore-muc7-reader</module>
+            
+    <module>jcore-mutationfinder-ae</module>
+            
+    <module>jcore-opennlp-chunk-ae</module>
+            
+    <module>jcore-opennlp-parser-ae</module>
+            
+    <module>jcore-opennlp-postag-ae</module>
+            
+    <module>jcore-opennlp-sentence-ae</module>
+            
+    <module>jcore-opennlp-token-ae</module>
+            
+    <module>jcore-pmc-reader</module>
+            
+    <module>jcore-pubtator-reader</module>
+            
+    <module>jcore-stanford-lemmatizer-ae</module>
+            
+    <module>jcore-topic-indexing-ae</module>
+            
+    <module>jcore-topics-writer</module>
+            
+    <module>jcore-txt-consumer</module>
+            
+    <module>jcore-types</module>
+            
+    <module>jcore-utilities</module>
+            
+    <module>jcore-xml-mapper</module>
+            
+    <module>jcore-xml-reader</module>
+            
+    <module>jcore-xmi-reader</module>
+            
+    <module>jcore-xmi-writer</module>
+            
+    <module>jedis-parent</module>
+            
+    <module>jcore-db-checkpoint-ae</module>
+            
+    <module>jcore-ppd-writer</module>
+            
+    <module>jcore-bc2gmformat-writer</module>
+            
+    <module>jcore-bc2gm-reader</module>
+            
+    <module>jcore-annotation-adder-ae</module>
+            
+    <module>jcore-flair-token-embedding-ae</module>
+            
+    <module>jcore-line-multiplier</module>
+            
+    <module>jcore-cord19-reader</module>
+          
+    <module>jcore-neo4j-relations-consumer</module>
+      
+  </modules>
+      
+  <scm>
+            
+    <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-        <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-        <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-    </scm>
+            
+    <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
+            
+    <url>scm:git:https://github.com/JULIELab/jcore-base</url>
+        
+  </scm>
+  
 </project>

From 35789670af0f0fc8dd1ee6ead7831826cd780523 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 8 Jun 2020 17:09:26 +0200
Subject: [PATCH 003/269] Letting travis run for the 2.6 branch.

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 208b0219a..57daeceac 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -51,4 +51,4 @@ deploy:
   skip_cleanup: true
   on:
     all_branches: true
-    condition: $TRAVIS_BRANCH =~ ^v2.5|master$
\ No newline at end of file
+    condition: $TRAVIS_BRANCH =~ ^v2.6|master$
\ No newline at end of file

From 9f047ab0fa5489f64d2e36e77d2d1fe9d4e83418 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 9 Jun 2020 09:34:17 +0200
Subject: [PATCH 004/269] Version 2.6.0-SNAPSHOT. Neo4jRelationsConsumer unit
 tests working.

---
 jcore-ace-reader/component.meta               |   2 +-
 jcore-ace-reader/pom.xml                      |   2 +-
 .../reader/ace/desc/jcore-ace-reader.xml      |   2 +-
 jcore-acronym-ae/component.meta               |   2 +-
 jcore-acronym-ae/pom.xml                      |   2 +-
 .../acronymtagger/desc/jcore-acronym-ae.xml   |   2 +-
 .../desc/JulesToolsAEDescriptor.xml           |   2 +-
 .../desc/jcore-acronymtagger-test.xml         |   2 +-
 .../types/StemNetSemanticsTypeSystem.xml      |   2 +-
 .../acronyms/desc/jcore-acronym-writer.xml    |   2 +-
 jcore-annotation-adder-ae/component.meta      |   2 +-
 jcore-annotation-adder-ae/pom.xml             |   2 +-
 .../desc/jcore-annotation-adder-ae.xml        |   2 +-
 jcore-banner-ae/component.meta                |   2 +-
 jcore-banner-ae/pom.xml                       |   2 +-
 .../jcore/ae/banner/desc/jcore-banner-ae.xml  |   2 +-
 .../src/main/resources/desc/BANNERAE.xml      |   2 +-
 .../src/main/resources/desc/bannerTS.xml      |   2 +-
 jcore-bc2gm-reader/component.meta             |   2 +-
 jcore-bc2gm-reader/pom.xml                    |   2 +-
 .../reader/bc2gm/desc/jcore-bc2gm-reader.xml  |   2 +-
 jcore-bc2gmformat-writer/component.meta       |   2 +-
 jcore-bc2gmformat-writer/pom.xml              |   2 +-
 .../desc/jcore-bc2gmformat-writer.xml         |   2 +-
 jcore-biolemmatizer-ae/component.meta         |   2 +-
 jcore-biolemmatizer-ae/pom.xml                |   2 +-
 .../desc/jcore-biolemmatizer-ae.xml           |   2 +-
 jcore-bionlpformat-consumer/component.meta    |   2 +-
 jcore-bionlpformat-consumer/pom.xml           |   2 +-
 ...pformat-consumer-biomedical-sharedtask.xml |   2 +-
 .../jcore-bionlpformat-consumer-medical.xml   |   2 +-
 .../jcore-bionlpformat-consumer-segment.xml   |   2 +-
 .../test/resources/types/jcore-all-types.xml  |   2 +-
 .../types/jcore-semantics-biology-types.xml   |   2 +-
 jcore-bionlpformat-reader/component.meta      |   2 +-
 jcore-bionlpformat-reader/pom.xml             |   2 +-
 ...nlpformat-reader-biomedical-sharedtask.xml |   2 +-
 .../jcore-bionlpformat-reader-medical.xml     |   2 +-
 .../jcore-bionlpformat-reader-segment.xml     |   2 +-
 .../bionlpformat/desc/EventReaderTest.xml     |   2 +-
 jcore-biosem-ae/component.meta                |   2 +-
 jcore-biosem-ae/pom.xml                       |   6 +-
 jcore-conll-consumer/component.meta           |   2 +-
 jcore-conll-consumer/pom.xml                  |   2 +-
 .../conll/desc/jcore-conll-consumer.xml       |   2 +-
 jcore-coordination-baseline-ae/component.meta |   2 +-
 jcore-coordination-baseline-ae/pom.xml        |   2 +-
 ...core-coordination-baseline-ae-conjunct.xml |   2 +-
 ...-coordination-baseline-ae-coordination.xml |   2 +-
 .../jcore-coordination-baseline-ae-eee.xml    |   2 +-
 ...core-coordination-baseline-ae-ellipsis.xml |   2 +-
 .../resources/desc/ConjunctAnnotatorTest.xml  |   2 +-
 .../desc/CoordinationAnnotatorTest.xml        |   2 +-
 .../test/resources/desc/EEEAnnotatorTest.xml  |   2 +-
 .../resources/desc/EllipsisAnnotatorTest.xml  |   2 +-
 jcore-cord19-reader/component.meta            |   2 +-
 jcore-cord19-reader/pom.xml                   |   2 +-
 .../desc/jcore-cord19-multiplier-reader.xml   |   2 +-
 .../cord19/desc/jcore-cord19-multiplier.xml   |   2 +-
 jcore-ct-reader/component.meta                |   2 +-
 jcore-ct-reader/pom.xml                       |   2 +-
 .../ct/desc/jcore-clinicaltrials-reader.xml   |   2 +-
 jcore-db-checkpoint-ae/component.meta         |   2 +-
 jcore-db-checkpoint-ae/pom.xml                |   2 +-
 .../desc/jcore-db-checkpoint-ae.xml           |   2 +-
 .../desc/jcore-db-checkpoint-consumer.xml     |   2 +-
 jcore-db-reader/component.meta                |   2 +-
 jcore-db-reader/pom.xml                       |   4 +-
 .../db/desc/jcore-db-multiplier-reader.xml    |   2 +-
 jcore-descriptor-creator/pom.xml              |   2 +-
 .../de.julielab.jcore.ae.testae.TestAE.xml    |   2 +-
 ...ore.consumer.testconsumer.Testconsumer.xml |   2 +-
 ...ltiplier.testmultiplier.TestMultiplier.xml |   2 +-
 ...lab.jcore.reader.testreader.TestReader.xml |   2 +-
 jcore-dta-reader/component.meta               |   2 +-
 jcore-dta-reader/pom.xml                      |   2 +-
 .../reader/dta/desc/jcore-dta-reader.xml      |   2 +-
 jcore-ec-code-ae/component.meta               |   2 +-
 jcore-ec-code-ae/pom.xml                      |   2 +-
 jcore-elasticsearch-consumer/component.meta   |   2 +-
 jcore-elasticsearch-consumer/pom.xml          |   2 +-
 .../es/desc/jcore-elasticsearch-consumer.xml  |   2 +-
 .../consumer/es/desc/jcore-json-writer.xml    |   2 +-
 .../julielab/jcore/consumer/es/testTypes.xml  |   2 +-
 jcore-embedding-writer/component.meta         |   2 +-
 jcore-embedding-writer/pom.xml                |   2 +-
 .../ew/desc/jcore-embedding-writer.xml        |   2 +-
 jcore-event-flattener-ae/component.meta       |   2 +-
 jcore-event-flattener-ae/pom.xml              |   2 +-
 .../desc/jcore-event-flattener-ae.xml         |   2 +-
 .../component.meta                            |   2 +-
 jcore-feature-value-replacement-ae/pom.xml    |   2 +-
 .../jcore-feature-value-replacement-ae.xml    |   2 +-
 jcore-file-reader/component.meta              |   2 +-
 jcore-file-reader/pom.xml                     |   2 +-
 .../reader/file/desc/jcore-file-reader.xml    |   2 +-
 jcore-flair-ner-ae/component.meta             |   2 +-
 jcore-flair-ner-ae/pom.xml                    |   4 +-
 .../ae/flairner/desc/jcore-flair-ner-ae.xml   |   2 +-
 jcore-flair-token-embedding-ae/component.meta |   2 +-
 jcore-flair-token-embedding-ae/pom.xml        |   2 +-
 .../desc/jcore-flair-token-embedding-ae.xml   |   2 +-
 jcore-iexml-consumer/component.meta           |   2 +-
 jcore-iexml-consumer/pom.xml                  |   4 +-
 .../iexml/desc/jcore-iexml-consumer.xml       |   2 +-
 jcore-iexml-reader/component.meta             |   2 +-
 jcore-iexml-reader/pom.xml                    |   4 +-
 .../reader/iexml/desc/jcore-iexml-reader.xml  |   2 +-
 jcore-ign-reader/component.meta               |   2 +-
 jcore-ign-reader/pom.xml                      |   2 +-
 .../reader/ign/desc/jcore-ign-reader.xml      |   2 +-
 jcore-iob-consumer/component.meta             |   2 +-
 jcore-iob-consumer/pom.xml                    |   2 +-
 .../cas2iob/desc/jcore-iob-consumer.xml       |   2 +-
 .../cas2iob/desc/ToIOBConsumerTest.xml        |   2 +-
 .../consumer/cas2iob/types/TestTypeSystem.xml |   2 +-
 .../jcore/ae/jemas/desc/jcore-jemas-ae.xml    |   2 +-
 jcore-jnet-ae/component.meta                  |   2 +-
 jcore-jnet-ae/pom.xml                         |   2 +-
 .../jcore/ae/jnet/desc/jcore-jnet-ae.xml      |   2 +-
 .../ae/jnet/uima/EntityAnnotatorTest.xml      |   2 +-
 .../jcore/ae/jnet/uima/tsDescriptor.xml       |   2 +-
 jcore-jpos-ae/component.meta                  |   2 +-
 jcore-jpos-ae/pom.xml                         |   2 +-
 .../jcore/ae/jpos/desc/jcore-jpos.xml         |   2 +-
 .../test/resources/POSTagAnnotatorTest.xml    |   2 +-
 jcore-jsbd-ae/component.meta                  |   2 +-
 jcore-jsbd-ae/pom.xml                         |   2 +-
 .../jcore/ae/jsbd/desc/jcore-jsbd-ae.xml      |   2 +-
 .../ae/jsbd/desc/SentenceAnnotatorTest.xml    |   2 +-
 .../SentenceAnnotator_with-scope_Test.xml     |   2 +-
 .../ae/jsbd/desc/paragraph-scope-type.xml     |   2 +-
 jcore-jtbd-ae/component.meta                  |   2 +-
 jcore-jtbd-ae/pom.xml                         |   2 +-
 .../jcore/ae/jtbd/desc/jcore-jtbd.xml         |   2 +-
 .../jcore/ae/jtbd/desc/TokenAnnotatorTest.xml |   2 +-
 .../component.meta                            |   2 +-
 .../pom.xml                                   |   2 +-
 ...ore-julielab-entity-evaluator-consumer.xml |   2 +-
 jcore-likelihood-assignment-ae/component.meta |   2 +-
 jcore-likelihood-assignment-ae/pom.xml        |   2 +-
 .../desc/jcore-likelihood-assignment-ae.xml   |   2 +-
 jcore-likelihood-detection-ae/component.meta  |   2 +-
 jcore-likelihood-detection-ae/pom.xml         |   2 +-
 .../desc/jcore-likelihood-detection-ae.xml    |   2 +-
 jcore-line-multiplier/component.meta          |   2 +-
 jcore-line-multiplier/pom.xml                 |   2 +-
 .../line/desc/jcore-line-multiplier-ae.xml    |   2 +-
 .../line/desc/jcore-line-multiplier-ae.xml    |   2 +-
 .../component.meta                            |   2 +-
 jcore-lingpipe-porterstemmer-ae/pom.xml       |   2 +-
 .../desc/jcore-lingpipe-porterstemmer-ae.xml  |   2 +-
 jcore-lingpipegazetteer-ae/component.meta     |   2 +-
 jcore-lingpipegazetteer-ae/pom.xml            |   2 +-
 ...ipe-gazetteer-ae-configurable-resource.xml |   2 +-
 .../desc/jcore-lingpipe-gazetteer-ae.xml      |   2 +-
 .../ApproxGazetteerAnnotatorTest.xml          |   2 +-
 .../resources/ExactGazetteerAnnotatorTest.xml |   2 +-
 jcore-lingscope-ae/component.meta             |   2 +-
 jcore-lingscope-ae/pom.xml                    |   2 +-
 .../ae/lingscope/desc/jcore-lingscope-ae.xml  |   2 +-
 jcore-linnaeus-species-ae/component.meta      |   2 +-
 jcore-linnaeus-species-ae/pom.xml             |   2 +-
 .../ae/linnaeus/desc/jcore-linnaeus-ae.xml    |   2 +-
 jcore-mantra-xml-types/pom.xml                |   2 +-
 jcore-medxn-ae/component.meta                 |   2 +-
 jcore-medxn-ae/pom.xml                        |   2 +-
 .../jcore/ae/medxn/desc/MedNormAE.xml         |   2 +-
 .../desc/jcore-medxn-ae-attributes-german.xml |   2 +-
 .../desc/jcore-medxn-ae-extractor-german.xml  |   2 +-
 jcore-msdoc-reader/component.meta             |   2 +-
 jcore-msdoc-reader/pom.xml                    |   2 +-
 .../reader/msdoc/desc/jcore-msdoc-reader.xml  |   2 +-
 jcore-mstparser-ae/component.meta             |   2 +-
 jcore-mstparser-ae/pom.xml                    |   2 +-
 .../ae/mstparser/desc/jcore-mstparser.xml     |   2 +-
 .../desc/MSTParserDescriptorTest.xml          |   2 +-
 jcore-muc7-reader/component.meta              |   2 +-
 jcore-muc7-reader/pom.xml                     |   2 +-
 .../reader/muc7/desc/jcore-muc7-reader.xml    |   2 +-
 .../reader/muc7/desc/jcore-muc7-reader.xml    |   2 +-
 jcore-mutationfinder-ae/component.meta        |   2 +-
 jcore-mutationfinder-ae/pom.xml               |   2 +-
 .../desc/jcore-mutationfinder-ae.xml          |   2 +-
 jcore-neo4j-relations-consumer/pom.xml        |  18 ++-
 .../Neo4jRelationsConsumer.java               |  15 ++-
 .../consumer/neo4jrelations/desc/PLACEHOLDER  |   1 -
 .../desc/jcore-neo4j-relations-consumer.xml   |  98 +++++++++++---
 .../Neo4jRelationsConsumerTest.java           | 126 +++++++++++++++++-
 jcore-opennlp-chunk-ae/component.meta         |   2 +-
 jcore-opennlp-chunk-ae/pom.xml                |   2 +-
 .../src/test/resources/ChunkAnnotatorTest.xml |   2 +-
 .../ChunkAnnotatorTestDefaultMappings.xml     |   2 +-
 jcore-opennlp-parser-ae/component.meta        |   2 +-
 jcore-opennlp-parser-ae/pom.xml               |   2 +-
 .../desc/jcore-opennlpparser.xml              |   2 +-
 .../desc/jcore-opennlpparser-test.xml         |   2 +-
 jcore-opennlp-postag-ae/component.meta        |   2 +-
 jcore-opennlp-postag-ae/pom.xml               |   2 +-
 .../desc/jcore-opennlppostag.xml              |   2 +-
 .../test/resources/PosTagAnnotatorTest.xml    |   2 +-
 jcore-opennlp-sentence-ae/component.meta      |   2 +-
 jcore-opennlp-sentence-ae/pom.xml             |   2 +-
 .../test/resources/SentenceAnnotatorTest.xml  |   2 +-
 jcore-opennlp-token-ae/component.meta         |   2 +-
 .../desc/TokenAnnotator.xml                   |   2 +-
 jcore-opennlp-token-ae/pom.xml                |   2 +-
 .../src/test/resources/TokenAnnotatorTest.xml |   2 +-
 jcore-pmc-reader/component.meta               |   2 +-
 jcore-pmc-reader/pom.xml                      |   2 +-
 .../pmc/desc/jcore-pmc-multiplier.xml         |   2 +-
 .../pmc/desc/jcore-pmc-multiplier-reader.xml  |   2 +-
 .../reader/pmc/desc/jcore-pmc-reader.xml      |   2 +-
 jcore-ppd-writer/component.meta               |   2 +-
 jcore-ppd-writer/pom.xml                      |   2 +-
 .../consumer/ppd/desc/jcore-ppd-writer.xml    |   2 +-
 jcore-pubtator-reader/component.meta          |   2 +-
 jcore-pubtator-reader/pom.xml                 |   2 +-
 .../pubtator/desc/jcore-pubtator-reader.xml   |   2 +-
 jcore-stanford-lemmatizer-ae/component.meta   |   2 +-
 jcore-stanford-lemmatizer-ae/pom.xml          |   2 +-
 .../lemma/desc/jcore-stanford-lemmatizer.xml  |   2 +-
 .../desc/jcore-stanford-lemmatizer-ae.xml     |   2 +-
 jcore-topic-indexing-ae/component.meta        |   2 +-
 jcore-topic-indexing-ae/pom.xml               |   4 +-
 .../desc/jcore-topic-indexing-ae.xml          |   2 +-
 jcore-topics-writer/component.meta            |   2 +-
 jcore-topics-writer/pom.xml                   |   2 +-
 .../topics/desc/jcore-topics-writer.xml       |   2 +-
 jcore-txt-consumer/component.meta             |   2 +-
 jcore-txt-consumer/pom.xml                    |   2 +-
 .../consumer/txt/desc/jcore-txt-consumer.xml  |   2 +-
 jcore-types/pom.xml                           |   2 +-
 .../jcore-dbtable-multiplier-types.xml        |   2 +-
 .../jcore-uri-multiplier-types.xml            |   2 +-
 .../types/extensions/jcore-ace-types.xml      |   2 +-
 .../jcore-document-meta-extension-types.xml   |   2 +-
 .../types/extensions/jcore-dta-types.xml      |   2 +-
 .../extensions/jcore-evaluation-types.xml     |   2 +-
 .../types/extensions/jcore-mantra-types.xml   |   2 +-
 .../types/extensions/jcore-medical-types.xml  |   2 +-
 .../types/extensions/jcore-mmax-types.xml     |   2 +-
 .../types/extensions/jcore-muc7-types.xml     |   2 +-
 .../extensions/jcore-semantics-ace-types.xml  |   2 +-
 .../jcore-semantics-bootstrep-types.xml       |   2 +-
 ...core-semantics-mention-extension-types.xml |   2 +-
 .../jcore-semantics-stemnet-types.xml         |   2 +-
 .../extensions/jcore-wikipedia-types.xml      |   2 +-
 .../jcore/types/jcore-affect-types.xml        |   2 +-
 .../julielab/jcore/types/jcore-all-types.xml  |   2 +-
 .../jcore/types/jcore-basic-types.xml         |   2 +-
 .../jcore/types/jcore-discourse-types.xml     |   2 +-
 ...core-document-meta-clinicaltrial-types.xml |   2 +-
 .../jcore-document-meta-pubmed-types.xml      |   2 +-
 .../jcore/types/jcore-document-meta-types.xml |   2 +-
 ...document-structure-clinicaltrial-types.xml |   2 +-
 .../jcore-document-structure-pubmed-types.xml |   2 +-
 .../types/jcore-document-structure-types.xml  |   2 +-
 .../jcore/types/jcore-morpho-syntax-types.xml |   2 +-
 .../types/jcore-semantics-biology-types.xml   |   2 +-
 .../types/jcore-semantics-concept-types.xml   |   2 +-
 .../types/jcore-semantics-mention-types.xml   |   2 +-
 .../priorities/jcore-type-priorities.xml      |   2 +-
 jcore-utilities/pom.xml                       |   2 +-
 .../src/test/resources/AETestDescriptor.xml   |   2 +-
 jcore-xmi-db-reader/component.meta            |   2 +-
 jcore-xmi-db-reader/pom.xml                   |   8 +-
 .../desc/jcore-xmi-db-multiplier-reader.xml   |   2 +-
 .../xmi/desc/jcore-xmi-db-multiplier.xml      |   2 +-
 .../reader/xmi/desc/jcore-xmi-db-reader.xml   |   2 +-
 jcore-xmi-db-writer/component.meta            |   2 +-
 jcore-xmi-db-writer/pom.xml                   |   4 +-
 .../consumer/xmi/desc/jcore-xmi-db-writer.xml |   2 +-
 jcore-xmi-reader/component.meta               |   2 +-
 jcore-xmi-reader/pom.xml                      |   2 +-
 .../reader/xmi/desc/jcore-xmi-reader.xml      |   2 +-
 jcore-xmi-writer/component.meta               |   2 +-
 jcore-xmi-writer/pom.xml                      |   2 +-
 .../consumer/xmi/desc/jcore-xmi-writer.xml    |   2 +-
 .../jcore/consumer/xmi/CasToXmiConsumer.xml   |   2 +-
 jcore-xml-db-reader/component.meta            |   2 +-
 jcore-xml-db-reader/pom.xml                   |   6 +-
 .../reader/xml/desc/jcore-xml-db-reader.xml   |   2 +-
 jcore-xml-mapper/pom.xml                      |   2 +-
 .../test/resources/XMLReaderDescriptor.xml    |   2 +-
 ...Descriptor_medline_Unicode_outside_BMP.xml |   2 +-
 ...aderDescriptor_medline_missingInputDir.xml |   2 +-
 ...XMLReaderDescriptor_medline_singleFile.xml |   2 +-
 ...MLReaderDescriptor_medline_singleFile2.xml |   2 +-
 jcore-xml-reader/component.meta               |   2 +-
 jcore-xml-reader/pom.xml                      |   4 +-
 .../reader/xml/desc/XMLMultiplierReader.xml   |   2 +-
 ...edlineReaderDescriptor_missingInputDir.xml |   2 +-
 .../test/resources/PubmedXMLMultiplier.xml    |   2 +-
 .../test/resources/XMLMultiplierReader.xml    |   2 +-
 jedis-parent/pom.xml                          |   2 +-
 pom.xml                                       |   2 +-
 297 files changed, 531 insertions(+), 339 deletions(-)
 delete mode 100644 jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER

diff --git a/jcore-ace-reader/component.meta b/jcore-ace-reader/component.meta
index 65d83f33b..0ed4db39b 100644
--- a/jcore-ace-reader/component.meta
+++ b/jcore-ace-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ace-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe ACE Reader"
 }
diff --git a/jcore-ace-reader/pom.xml b/jcore-ace-reader/pom.xml
index fad4ca485..fdf961ad1 100644
--- a/jcore-ace-reader/pom.xml
+++ b/jcore-ace-reader/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml b/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml
index 6d7d29ff9..a1eae5b5b 100644
--- a/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml
+++ b/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>AceReader</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-acronym-ae/component.meta b/jcore-acronym-ae/component.meta
index 4ccd014c0..5e9a4da4c 100644
--- a/jcore-acronym-ae/component.meta
+++ b/jcore-acronym-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-acronym-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Acronym Tagger"
 }
diff --git a/jcore-acronym-ae/pom.xml b/jcore-acronym-ae/pom.xml
index df40261b4..dfd4fce45 100644
--- a/jcore-acronym-ae/pom.xml
+++ b/jcore-acronym-ae/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml b/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml
index f31cada2f..2ca072f45 100755
--- a/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml
+++ b/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe AcronymAnnotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml
index 9aa0a7e09..1e2c24294 100644
--- a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml
+++ b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JulesToolsDescriptor</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml
index 8e179d4c3..60c613aaf 100755
--- a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml
+++ b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe AcronymAnnotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml
index fd197d12f..5b37032f1 100644
--- a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml
+++ b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>StemNetSemanticsTypeSystem</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>http://www.julielab.de</vendor>
 <imports>
 <import location="JulieTypeSystem.xml" />
diff --git a/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml b/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
index 5f3073b02..6659cbf31 100644
--- a/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
+++ b/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Acronym Writer</name>
         <description>Writes acronym annotation to a text file.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputFile</name>
diff --git a/jcore-annotation-adder-ae/component.meta b/jcore-annotation-adder-ae/component.meta
index 500127938..3978e1017 100644
--- a/jcore-annotation-adder-ae/component.meta
+++ b/jcore-annotation-adder-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-annotation-adder-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Annotation Adder"
 }
diff --git a/jcore-annotation-adder-ae/pom.xml b/jcore-annotation-adder-ae/pom.xml
index 1473a562b..a8f6ce3bd 100644
--- a/jcore-annotation-adder-ae/pom.xml
+++ b/jcore-annotation-adder-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
index fcd2c1d27..2a72b89f9 100644
--- a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
+++ b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Annotation Adder</name>
         <description>This component helps to import annotations made on the exact CAS document text by an external process back into the CAS. To this end, the component is prepared to read several data formats. Currently, simple offset-based annotations are supported with configurable UIMA types. The component supports character and token based offsets.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OffsetMode</name>
diff --git a/jcore-banner-ae/component.meta b/jcore-banner-ae/component.meta
index 8785baa0c..2a01d6ff1 100644
--- a/jcore-banner-ae/component.meta
+++ b/jcore-banner-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-banner-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Banner"
 }
diff --git a/jcore-banner-ae/pom.xml b/jcore-banner-ae/pom.xml
index 139a33c03..9e47d8857 100644
--- a/jcore-banner-ae/pom.xml
+++ b/jcore-banner-ae/pom.xml
@@ -66,7 +66,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>..</relativePath>
     </parent>
     <licenses>
diff --git a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
index 844073c9e..b98b5f42f 100644
--- a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
+++ b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>jcore-banner-ae</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml b/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml
index 28c2a1499..05b35368f 100644
--- a/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml
+++ b/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>BANNERAE</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-banner-ae/src/main/resources/desc/bannerTS.xml b/jcore-banner-ae/src/main/resources/desc/bannerTS.xml
index d25adc102..70aaf0715 100644
--- a/jcore-banner-ae/src/main/resources/desc/bannerTS.xml
+++ b/jcore-banner-ae/src/main/resources/desc/bannerTS.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>bannerTS</name>
   <description>basic typesystem started by sid</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor />
   <types>
     <typeDescription>
diff --git a/jcore-bc2gm-reader/component.meta b/jcore-bc2gm-reader/component.meta
index 748123c36..3b60c95ed 100644
--- a/jcore-bc2gm-reader/component.meta
+++ b/jcore-bc2gm-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bc2gm-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe BioCreative II Gene Mention Reader"
 }
diff --git a/jcore-bc2gm-reader/pom.xml b/jcore-bc2gm-reader/pom.xml
index 1ec0602a9..f8579d215 100644
--- a/jcore-bc2gm-reader/pom.xml
+++ b/jcore-bc2gm-reader/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml b/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml
index 04e62abd2..b3b40d26c 100644
--- a/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml
+++ b/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe BioCreative II Gene Mention reader</name>
         <description>This component reads gene annotated sentences in the BioCreative II Gene Mention challenge format. Each CAS will contain one annotated sentence.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>SentencesFile</name>
diff --git a/jcore-bc2gmformat-writer/component.meta b/jcore-bc2gmformat-writer/component.meta
index 384a54b21..2b7c90e41 100644
--- a/jcore-bc2gmformat-writer/component.meta
+++ b/jcore-bc2gmformat-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bc2gmformat-writer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe BioCreative II Gene Mention Format Writer"
 }
diff --git a/jcore-bc2gmformat-writer/pom.xml b/jcore-bc2gmformat-writer/pom.xml
index c68e9f170..8092a37ee 100644
--- a/jcore-bc2gmformat-writer/pom.xml
+++ b/jcore-bc2gmformat-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml b/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml
index 2e122f8b6..811375d76 100644
--- a/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml
+++ b/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe BioCreative II Gene Mention Format writer</name>
         <description>This component writes gene annotations in the CAS to the format employed by the BioCreative II Gene Mention challenge.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputDirectory</name>
diff --git a/jcore-biolemmatizer-ae/component.meta b/jcore-biolemmatizer-ae/component.meta
index 66fd947c5..2b698fcb5 100644
--- a/jcore-biolemmatizer-ae/component.meta
+++ b/jcore-biolemmatizer-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-biolemmatizer-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe BioLemmatizer"
 }
diff --git a/jcore-biolemmatizer-ae/pom.xml b/jcore-biolemmatizer-ae/pom.xml
index bf56276d0..241617304 100644
--- a/jcore-biolemmatizer-ae/pom.xml
+++ b/jcore-biolemmatizer-ae/pom.xml
@@ -8,7 +8,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
 	</parent>
 
 	<dependencies>
diff --git a/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml b/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
index 27b446003..137eb219c 100644
--- a/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
+++ b/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>BioLemmatizer</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-bionlpformat-consumer/component.meta b/jcore-bionlpformat-consumer/component.meta
index e4c0dedc0..4071c4a18 100644
--- a/jcore-bionlpformat-consumer/component.meta
+++ b/jcore-bionlpformat-consumer/component.meta
@@ -22,7 +22,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bionlpformat-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe BioNLP Format Consumer"
 }
diff --git a/jcore-bionlpformat-consumer/pom.xml b/jcore-bionlpformat-consumer/pom.xml
index bf58e21a4..d868129aa 100644
--- a/jcore-bionlpformat-consumer/pom.xml
+++ b/jcore-bionlpformat-consumer/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml
index 45463be92..3d358227d 100644
--- a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml
+++ b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe BioNLP Event Consumer</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml
index 5ebfec59f..547769316 100644
--- a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml
+++ b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe BioNLP Format Event Consumer (Medical)</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml
index dc654b37b..be36250a4 100644
--- a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml
+++ b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml
@@ -7,7 +7,7 @@
     <analysisEngineMetaData>
         <name>JCoRe BioNLP Format Segment Consumer</name>
         <description />
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor />
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml
index 670239d8d..7c320da41 100644
--- a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml
+++ b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe All Types</name>
   <description>This is just a convenience file, assembling all JCoRe types</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import name="de.julielab.jcore.types.jcore-basic-types" />
diff --git a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml
index 0f6fca3ac..c01c57fe9 100644
--- a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml
+++ b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics Biology Types</name>
   <description>The type system contains types of the biomedical domain.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
diff --git a/jcore-bionlpformat-reader/component.meta b/jcore-bionlpformat-reader/component.meta
index 6f10e9e95..229346ad7 100644
--- a/jcore-bionlpformat-reader/component.meta
+++ b/jcore-bionlpformat-reader/component.meta
@@ -22,7 +22,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bionlpformat-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe BioNLP Format Reader"
 }
diff --git a/jcore-bionlpformat-reader/pom.xml b/jcore-bionlpformat-reader/pom.xml
index 862c09d97..65fcefb66 100644
--- a/jcore-bionlpformat-reader/pom.xml
+++ b/jcore-bionlpformat-reader/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml
index ccd6c46f6..0ba9c91cf 100644
--- a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml
+++ b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe BioNLP Event Reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml
index 74cdb9e62..810dfac8c 100644
--- a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml
+++ b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>BioNLP Format Reader Medical</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml
index aea0bc469..1f4944403 100644
--- a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml
+++ b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>BioNLP Format Reader Segment</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml b/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml
index 38ed5aed3..3813fdc7d 100644
--- a/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml
+++ b/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>EventReader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-biosem-ae/component.meta b/jcore-biosem-ae/component.meta
index dd5fcf39d..efff383f6 100644
--- a/jcore-biosem-ae/component.meta
+++ b/jcore-biosem-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-biosem-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe BioSem Event Annotator"
 }
diff --git a/jcore-biosem-ae/pom.xml b/jcore-biosem-ae/pom.xml
index ece3b845a..eec6bc55f 100644
--- a/jcore-biosem-ae/pom.xml
+++ b/jcore-biosem-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-biosem-ae</artifactId>
     <name>JCoRe BioSem Event Annotator</name>
@@ -32,7 +32,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-bionlpformat-reader</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>
@@ -48,7 +48,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-bionlpformat-consumer</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-conll-consumer/component.meta b/jcore-conll-consumer/component.meta
index e754ff444..87ff59f38 100644
--- a/jcore-conll-consumer/component.meta
+++ b/jcore-conll-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-conll-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe CONLL Consumer"
 }
diff --git a/jcore-conll-consumer/pom.xml b/jcore-conll-consumer/pom.xml
index fef60e5bf..4ba6ef20c 100644
--- a/jcore-conll-consumer/pom.xml
+++ b/jcore-conll-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-conll-consumer</artifactId>
 
diff --git a/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml b/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml
index 30f0366eb..854c345d4 100644
--- a/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml
+++ b/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Conll Consumer</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-coordination-baseline-ae/component.meta b/jcore-coordination-baseline-ae/component.meta
index c79a816e4..361310479 100644
--- a/jcore-coordination-baseline-ae/component.meta
+++ b/jcore-coordination-baseline-ae/component.meta
@@ -26,7 +26,7 @@
     "maven-artifact": {
         "artifactId": "jcore-coordination-baseline-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Coordination Tagger Baseline"
 }
diff --git a/jcore-coordination-baseline-ae/pom.xml b/jcore-coordination-baseline-ae/pom.xml
index eaff316fa..ea88c0b43 100644
--- a/jcore-coordination-baseline-ae/pom.xml
+++ b/jcore-coordination-baseline-ae/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml
index 1e5a6c860..40bb374a8 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe ConjunctAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml
index b5db7b69b..55b4377d0 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe CoordinationAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml
index 50c01690b..434bfd967 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe EEEAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml
index 8e73905d3..a508d4ab7 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe EllipsisAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml
index 50c97ebbc..be03ff4bb 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>ConjunctAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml
index ca9a48170..a256a83b6 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>CoordinationAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml
index 3683f5210..4b470443d 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>EEEAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml
index beea12e3e..422a96e06 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>EllipsisAnnotator</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-cord19-reader/component.meta b/jcore-cord19-reader/component.meta
index 3fd15f733..66bd41580 100644
--- a/jcore-cord19-reader/component.meta
+++ b/jcore-cord19-reader/component.meta
@@ -19,7 +19,7 @@
     "maven-artifact": {
         "artifactId": "jcore-cord19-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe CORD-19 Reader"
 }
diff --git a/jcore-cord19-reader/pom.xml b/jcore-cord19-reader/pom.xml
index a1cdf1d9a..b77f93e91 100644
--- a/jcore-cord19-reader/pom.xml
+++ b/jcore-cord19-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml
index 90f5da426..fc54b7b2e 100644
--- a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml
+++ b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe CORD-19 Multiplier Reader</name>
         <description>This component reads file paths to JSON files and the CORD-19 (https://pages.semanticscholar.org/coronavirus-research) meta data file to send them to CAS multipliers.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml
index b539b1511..812eeb5c6 100644
--- a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml
+++ b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe CORD-19 CAS Multiplier</name>
         <description>This component reads the CORD-19 (https://pages.semanticscholar.org/coronavirus-research) JSON format into UIMA CAS instances.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-ct-reader/component.meta b/jcore-ct-reader/component.meta
index a131ea835..309b82f92 100644
--- a/jcore-ct-reader/component.meta
+++ b/jcore-ct-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ct-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Clinical Trials Reader"
 }
diff --git a/jcore-ct-reader/pom.xml b/jcore-ct-reader/pom.xml
index bfc239518..ac50c8cdb 100644
--- a/jcore-ct-reader/pom.xml
+++ b/jcore-ct-reader/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml b/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml
index 100df0acd..33e4a0f03 100644
--- a/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml
+++ b/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe Clinical Trials Reader</name>
         <description>This component reads the XML format provided by ClinicalTrials.gov. To this end, the JCoRe type system contains a number of types specifically created for this kind of document. Note that the CAS text created by this reader might be confusing without checking the corresponding annotations. This is due to the fact that the CT XML contains multiple enumerations which are not very well reflected in plain text. Also, enumerations with subitems, such as the outcomes, are not displayed in the expected groups of items. Instead, each item type is displayed separately. This could be changed, if necessary. Since all items are correctly annotated by their category, this might not even be an issue, depending on the downstream tasks.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>InputDirectory</name>
diff --git a/jcore-db-checkpoint-ae/component.meta b/jcore-db-checkpoint-ae/component.meta
index b703ae5c4..958bc8f17 100644
--- a/jcore-db-checkpoint-ae/component.meta
+++ b/jcore-db-checkpoint-ae/component.meta
@@ -19,7 +19,7 @@
     "maven-artifact": {
         "artifactId": "jcore-db-checkpoint-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Database Checkpoint AE"
 }
diff --git a/jcore-db-checkpoint-ae/pom.xml b/jcore-db-checkpoint-ae/pom.xml
index 3cac45687..f7ed71533 100644
--- a/jcore-db-checkpoint-ae/pom.xml
+++ b/jcore-db-checkpoint-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jedis-parent</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     
diff --git a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml
index 31e3605e8..8264367e1 100644
--- a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml
+++ b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Database Checkpoint AE</name>
         <description>This component can be used when using a JCoRe database reader that reads from a CoStoSys/JeDIS subset. Enters the configured component name in the 'last component' column. Can also mark documents as being completely processed.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CheckpointName</name>
diff --git a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml
index 5ac25514c..59b0bf054 100644
--- a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml
+++ b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Database Checkpoint Writer</name>
         <description>This component can be used when using a JCoRe database reader that reads from a CoStoSys/JeDIS subset. Enters the configured component name in the 'last component' column. Can also mark documents as being completely processed.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CheckpointName</name>
diff --git a/jcore-db-reader/component.meta b/jcore-db-reader/component.meta
index a6793b944..78b3ba1ad 100644
--- a/jcore-db-reader/component.meta
+++ b/jcore-db-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-db-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Database Reader"
 }
diff --git a/jcore-db-reader/pom.xml b/jcore-db-reader/pom.xml
index 2129cc7e0..bf3b215b9 100644
--- a/jcore-db-reader/pom.xml
+++ b/jcore-db-reader/pom.xml
@@ -3,7 +3,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
@@ -44,7 +44,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-mapper</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml b/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
index 489b2b92a..9637ab27d 100644
--- a/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
+++ b/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
@@ -10,7 +10,7 @@
             sent by this reader. The component leverages the corpus storage system (CoStoSys) for this purpose and is
             part of the Jena Document Information System, JeDIS.
         </description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>ResetTable</name>
diff --git a/jcore-descriptor-creator/pom.xml b/jcore-descriptor-creator/pom.xml
index 0336524bf..aae843561 100644
--- a/jcore-descriptor-creator/pom.xml
+++ b/jcore-descriptor-creator/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     
     <artifactId>jcore-descriptor-creator</artifactId>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
index 34208ad32..558a62b57 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.testae.TestAE</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.ae.testae</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
index 7d1d5a224..3bf9a16c1 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.consumer.testconsumer.Testconsumer</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.consumer.testconsumer</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
index 8167fbb68..8ef78db33 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.multiplier.testmultiplier.TestMultiplier</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.multiplier.testmultiplier</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
index 016fc36bf..bd482d6ee 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>de.julielab.jcore.reader.testreader.TestReader</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.reader.testreader</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-dta-reader/component.meta b/jcore-dta-reader/component.meta
index 44239af00..ee9b729df 100644
--- a/jcore-dta-reader/component.meta
+++ b/jcore-dta-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-dta-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe DTA Reader"
 }
diff --git a/jcore-dta-reader/pom.xml b/jcore-dta-reader/pom.xml
index f05d13a93..b47f53e66 100644
--- a/jcore-dta-reader/pom.xml
+++ b/jcore-dta-reader/pom.xml
@@ -9,7 +9,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
 	</parent>
 	<build>
 		<resources>
diff --git a/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml b/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml
index 1e17bdb36..8bc431330 100644
--- a/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml
+++ b/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml
@@ -5,7 +5,7 @@
 	<processingResourceMetaData>
 		<name>JCoRe DTA Reader</name>
 		<description />
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 		<configurationParameters>
 			<configurationParameter>
diff --git a/jcore-ec-code-ae/component.meta b/jcore-ec-code-ae/component.meta
index 995049c32..22af189d5 100644
--- a/jcore-ec-code-ae/component.meta
+++ b/jcore-ec-code-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ecn-code-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Enzyme Commission Number AE"
 }
diff --git a/jcore-ec-code-ae/pom.xml b/jcore-ec-code-ae/pom.xml
index 14428b6cf..05cc496a5 100644
--- a/jcore-ec-code-ae/pom.xml
+++ b/jcore-ec-code-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-elasticsearch-consumer/component.meta b/jcore-elasticsearch-consumer/component.meta
index 584bbdc82..b2f0e7a71 100644
--- a/jcore-elasticsearch-consumer/component.meta
+++ b/jcore-elasticsearch-consumer/component.meta
@@ -18,7 +18,7 @@
     "maven-artifact": {
         "artifactId": "jcore-elasticsearch-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe ElasticSearch Consumer"
 }
diff --git a/jcore-elasticsearch-consumer/pom.xml b/jcore-elasticsearch-consumer/pom.xml
index 8014c9cad..540e2f7d1 100644
--- a/jcore-elasticsearch-consumer/pom.xml
+++ b/jcore-elasticsearch-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-elasticsearch-consumer</artifactId>
     <name>JCoRe ElasticSearch Consumer</name>
diff --git a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml
index cafc85e71..c2334321e 100644
--- a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml
+++ b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml
@@ -5,7 +5,7 @@
     <annotatorImplementationName>de.julielab.jcore.consumer.es.ElasticSearchConsumer</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCore ElasticSearch Consumer</name>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>urls</name>
diff --git a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml
index efd472393..485ebb2ce 100644
--- a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml
+++ b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml
@@ -5,7 +5,7 @@
     <annotatorImplementationName>de.julielab.jcore.consumer.es.JsonWriter</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCoRe JSON Writer</name>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputDestination</name>
diff --git a/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml b/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml
index 0b1bd8c30..dfdd4d093 100644
--- a/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml
+++ b/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>testTypes</name>
   <description>Some types suited for unit tests.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor />
   <imports>
     <import name="de.julielab.jcore.types.jcore-document-meta-types" />
diff --git a/jcore-embedding-writer/component.meta b/jcore-embedding-writer/component.meta
index c95336587..0c6301641 100644
--- a/jcore-embedding-writer/component.meta
+++ b/jcore-embedding-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-embedding-writer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Embedding Writer"
 }
diff --git a/jcore-embedding-writer/pom.xml b/jcore-embedding-writer/pom.xml
index 820510aa5..d5d5304a6 100644
--- a/jcore-embedding-writer/pom.xml
+++ b/jcore-embedding-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml b/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml
index 14b684f02..46f458d8b 100644
--- a/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml
+++ b/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Flair Embedding Writer</name>
     <description>Given a Flair compatible embedding and a UIMA annotation type, this component prints the embeddings of tokens annotated with the annotation to a file.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
       <configurationParameter>
         <name>UseGzip</name>
diff --git a/jcore-event-flattener-ae/component.meta b/jcore-event-flattener-ae/component.meta
index 94b772718..afc1e729e 100644
--- a/jcore-event-flattener-ae/component.meta
+++ b/jcore-event-flattener-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-event-flattener-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Event Flattener AE"
 }
diff --git a/jcore-event-flattener-ae/pom.xml b/jcore-event-flattener-ae/pom.xml
index 83ff43f48..423a141b9 100644
--- a/jcore-event-flattener-ae/pom.xml
+++ b/jcore-event-flattener-ae/pom.xml
@@ -3,7 +3,7 @@
   <parent>
     <groupId>de.julielab</groupId>
     <artifactId>jcore-base</artifactId>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
   </parent>
   <artifactId>jcore-event-flattener-ae</artifactId>
   <name>JCoRe Event Flattener AE</name>
diff --git a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
index bbd7bde4f..ff351724b 100644
--- a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
+++ b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.eventflattener.EventFlattener</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.ae.eventflattener</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-feature-value-replacement-ae/component.meta b/jcore-feature-value-replacement-ae/component.meta
index d81fdcdaa..dfb623568 100644
--- a/jcore-feature-value-replacement-ae/component.meta
+++ b/jcore-feature-value-replacement-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-feature-value-replacement-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Feature Value Replacement AE"
 }
diff --git a/jcore-feature-value-replacement-ae/pom.xml b/jcore-feature-value-replacement-ae/pom.xml
index 721035710..f3e120d76 100644
--- a/jcore-feature-value-replacement-ae/pom.xml
+++ b/jcore-feature-value-replacement-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-feature-value-replacement-ae</artifactId>
     <name>JCoRe Feature Value Replacement AE</name>
diff --git a/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml b/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml
index 9be834fd5..42c3e36a8 100644
--- a/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml
+++ b/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.fvr.FeatureValueReplacementAnnotator</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.ae.fvr</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-file-reader/component.meta b/jcore-file-reader/component.meta
index 2e3e09849..9aabd9c66 100644
--- a/jcore-file-reader/component.meta
+++ b/jcore-file-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-file-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe File Reader"
 }
diff --git a/jcore-file-reader/pom.xml b/jcore-file-reader/pom.xml
index 74d1574a6..0de264d3b 100644
--- a/jcore-file-reader/pom.xml
+++ b/jcore-file-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-file-reader</artifactId>
     <name>JCoRe File Reader</name>
diff --git a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
index 39a2be27e..f5b30ff00 100644
--- a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
+++ b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe File Reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIELab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-flair-ner-ae/component.meta b/jcore-flair-ner-ae/component.meta
index 5340cb3ce..09250babf 100644
--- a/jcore-flair-ner-ae/component.meta
+++ b/jcore-flair-ner-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-flair-ner-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Flair NER AE"
 }
diff --git a/jcore-flair-ner-ae/pom.xml b/jcore-flair-ner-ae/pom.xml
index 5e9b35b49..9ad39de20 100644
--- a/jcore-flair-ner-ae/pom.xml
+++ b/jcore-flair-ner-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
@@ -43,7 +43,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-annotation-adder-ae</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>ch.qos.logback</groupId>
diff --git a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml
index 3d158471f..bccfd8ddc 100644
--- a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml
+++ b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Flair Named Entity Recognizer</name>
         <description>This component starts a child process to a python interpreter and loads a Flair sequence tagging model. Sentences are taken from the CAS, sent to Flair for tagging and the results are written into the CAS. The annotation type to use can be configured. It must be a subtype of de.julielab.jcore.types.EntityMention. The tag of each entity is written to the specificType feature.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>AnnotationType</name>
diff --git a/jcore-flair-token-embedding-ae/component.meta b/jcore-flair-token-embedding-ae/component.meta
index 82dc90b84..cc7ef4681 100644
--- a/jcore-flair-token-embedding-ae/component.meta
+++ b/jcore-flair-token-embedding-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-flair-token-embedding-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Flair Token Embedding Annotator"
 }
diff --git a/jcore-flair-token-embedding-ae/pom.xml b/jcore-flair-token-embedding-ae/pom.xml
index 789d1956f..483998eda 100644
--- a/jcore-flair-token-embedding-ae/pom.xml
+++ b/jcore-flair-token-embedding-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml b/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml
index 81db110e0..3b342d593 100644
--- a/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml
+++ b/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Flair Token Embedding Annotator</name>
     <description>Adds the Flair compatible embedding vectors to the token annotations.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
       <configurationParameter>
         <name>EmbeddingPath</name>
diff --git a/jcore-iexml-consumer/component.meta b/jcore-iexml-consumer/component.meta
index 621a4d340..0ec142ad7 100644
--- a/jcore-iexml-consumer/component.meta
+++ b/jcore-iexml-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-iexml-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe IEXML Consumer"
 }
diff --git a/jcore-iexml-consumer/pom.xml b/jcore-iexml-consumer/pom.xml
index 0cca60dfb..8924c020c 100644
--- a/jcore-iexml-consumer/pom.xml
+++ b/jcore-iexml-consumer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <description>Generates stand-off IEXML files as used in the Mantra challenge.</description>
 
@@ -74,7 +74,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-mantra-xml-types</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
diff --git a/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml b/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml
index 3d3cfbee2..98c581be2 100644
--- a/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml
+++ b/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe IEXML Consumer</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-iexml-reader/component.meta b/jcore-iexml-reader/component.meta
index eac29d502..15d5600c1 100644
--- a/jcore-iexml-reader/component.meta
+++ b/jcore-iexml-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-iexml-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe IEXML Reader"
 }
diff --git a/jcore-iexml-reader/pom.xml b/jcore-iexml-reader/pom.xml
index 94b02b301..2ce284fda 100644
--- a/jcore-iexml-reader/pom.xml
+++ b/jcore-iexml-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <build>
@@ -75,7 +75,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-mantra-xml-types</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>junit</groupId>
diff --git a/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml b/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml
index 89f48191c..933482a5a 100644
--- a/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml
+++ b/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe IEXML Reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-ign-reader/component.meta b/jcore-ign-reader/component.meta
index 9ea912d40..798abe608 100644
--- a/jcore-ign-reader/component.meta
+++ b/jcore-ign-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ign-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe IGN Reader"
 }
diff --git a/jcore-ign-reader/pom.xml b/jcore-ign-reader/pom.xml
index f1f2ebfd5..df7d561d4 100644
--- a/jcore-ign-reader/pom.xml
+++ b/jcore-ign-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-ign-reader</artifactId>
 
diff --git a/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml b/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml
index 91d8abac5..3205766bd 100644
--- a/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml
+++ b/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml
@@ -7,7 +7,7 @@
     <description>The IGNReader reads IGN corpus files in BioC-format.
 
 There are XML files comprising the actual text (as well as passage and sentence annotations) and there are separate XML files comprising the annotations.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-iob-consumer/component.meta b/jcore-iob-consumer/component.meta
index faa7e6b5e..9e0e62410 100644
--- a/jcore-iob-consumer/component.meta
+++ b/jcore-iob-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-iob-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe CAS to IOB Consumer"
 }
diff --git a/jcore-iob-consumer/pom.xml b/jcore-iob-consumer/pom.xml
index 7625d1c8c..e09d8591a 100644
--- a/jcore-iob-consumer/pom.xml
+++ b/jcore-iob-consumer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml b/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
index a333e4aaf..72b818213 100644
--- a/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
+++ b/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe IOB Writer</name>
         <description>This component help to write CAS entity or chunk annotations into a text file in IOB format.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>outFolder</name>
diff --git a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml
index 36199e77d..deb5a9318 100644
--- a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml
+++ b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>ToIOBConsumerTest</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml
index 7b3f82a25..0a7a01cf3 100644
--- a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml
+++ b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>TestTypeSystem</name>
 <description>including julie morpho-syntax and semantics</description>
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <imports>
 <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
diff --git a/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml b/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml
index 436c249b2..c44952183 100644
--- a/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml
+++ b/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe JEmAS</name>
     <description>A UIMA-based implementation of the core functionality of JEmAS, the Jena Emotion Analysis System.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jnet-ae/component.meta b/jcore-jnet-ae/component.meta
index dbdfe4186..74ba27806 100644
--- a/jcore-jnet-ae/component.meta
+++ b/jcore-jnet-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jnet-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe JNET AE"
 }
diff --git a/jcore-jnet-ae/pom.xml b/jcore-jnet-ae/pom.xml
index 6eb5eb572..ea8a89340 100644
--- a/jcore-jnet-ae/pom.xml
+++ b/jcore-jnet-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <build>
diff --git a/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml b/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml
index db23c98b2..8f602da33 100644
--- a/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml
+++ b/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe JNET AE</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml
index 12859863d..34cfdc1e9 100644
--- a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml
+++ b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>EntityTaggerAnnotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml
index b26a4688d..a71ebef34 100644
--- a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml
+++ b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>aceComplete</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <imports>
         <import name="de.julielab.jcore.types.jcore-basic-types" />
diff --git a/jcore-jpos-ae/component.meta b/jcore-jpos-ae/component.meta
index 86f05e5d5..eb0b7ae53 100644
--- a/jcore-jpos-ae/component.meta
+++ b/jcore-jpos-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jpos-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe JPOS AE"
 }
diff --git a/jcore-jpos-ae/pom.xml b/jcore-jpos-ae/pom.xml
index 480afdf16..87cbc7fc5 100644
--- a/jcore-jpos-ae/pom.xml
+++ b/jcore-jpos-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <build>
diff --git a/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml b/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml
index be5593812..37870472c 100644
--- a/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml
+++ b/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe JPOS AE</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
    <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml b/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml
index 384265369..5a179961d 100644
--- a/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml
+++ b/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JPOSAnnotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab</vendor>
    <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jsbd-ae/component.meta b/jcore-jsbd-ae/component.meta
index 025d9b87f..5da0bb330 100644
--- a/jcore-jsbd-ae/component.meta
+++ b/jcore-jsbd-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jsbd-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Sentence Annotator"
 }
diff --git a/jcore-jsbd-ae/pom.xml b/jcore-jsbd-ae/pom.xml
index d5622f97b..964b14ef9 100644
--- a/jcore-jsbd-ae/pom.xml
+++ b/jcore-jsbd-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <build>
diff --git a/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml b/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml
index 8bb60791a..409bda28e 100644
--- a/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml
+++ b/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.jsbd.main.SentenceAnnotator</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.ae.jsbd.main</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml
index 66314d4bf..1e1aaa26e 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Sentence Annotator</name>
     <description>This is the UIMA Wrapper for the JULIE Sentence Boundary Detector.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
index 63b003324..28c03ebe8 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Sentence Annotator</name>
     <description>This is the UIMA Wrapper for the JULIE Sentence Boundary Detector.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml
index 282896d88..cd826ac73 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>test-entity-type.xml</name>
   <description>A mini type system with one type only, used for testing consistency preservation</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import name="de.julielab.jcore.types.jcore-basic-types" />
diff --git a/jcore-jtbd-ae/component.meta b/jcore-jtbd-ae/component.meta
index 377c042d7..aa682f5da 100644
--- a/jcore-jtbd-ae/component.meta
+++ b/jcore-jtbd-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jtbd-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Token Annotator"
 }
diff --git a/jcore-jtbd-ae/pom.xml b/jcore-jtbd-ae/pom.xml
index 03523ba12..0c7e7d127 100644
--- a/jcore-jtbd-ae/pom.xml
+++ b/jcore-jtbd-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <build>
diff --git a/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml b/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml
index 337463371..a207b07d1 100644
--- a/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml
+++ b/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Token Annotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml b/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml
index 6a670af49..083790957 100644
--- a/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml
+++ b/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Token Annotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-julielab-entity-evaluator-consumer/component.meta b/jcore-julielab-entity-evaluator-consumer/component.meta
index 9ffe2edc3..dc65ea34a 100644
--- a/jcore-julielab-entity-evaluator-consumer/component.meta
+++ b/jcore-julielab-entity-evaluator-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-julielab-entity-evaluator-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe JULIE Lab Entity Evaluator Converter"
 }
diff --git a/jcore-julielab-entity-evaluator-consumer/pom.xml b/jcore-julielab-entity-evaluator-consumer/pom.xml
index 7ad4d9597..35ae8b960 100644
--- a/jcore-julielab-entity-evaluator-consumer/pom.xml
+++ b/jcore-julielab-entity-evaluator-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-julielab-entity-evaluator-consumer</artifactId>
     <name>JCoRe JULIE Lab Entity Evaluator Converter</name>
diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml b/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
index 4ffda6700..51c7fc6af 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Entity Evaluator and TSV Consumer</name>
         <description>This component was originally created to output the tab separated format used the JULIE Entity Evaluator. However, this component can be used to create a TSV file from any annotation or annotation set. The component allows to define columns by specifying the annotation type to draw feature values from and a feature path that specifies the location of the desired feature. All feature paths will be applied to each configured annotation, returning null values if an annotation does not exhibit a value for a column's feature path.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-likelihood-assignment-ae/component.meta b/jcore-likelihood-assignment-ae/component.meta
index 671dbf79e..1055a51ab 100644
--- a/jcore-likelihood-assignment-ae/component.meta
+++ b/jcore-likelihood-assignment-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-likelihood-assignment-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Likelihood Assignment AE"
 }
diff --git a/jcore-likelihood-assignment-ae/pom.xml b/jcore-likelihood-assignment-ae/pom.xml
index e49c1a243..d053fef46 100644
--- a/jcore-likelihood-assignment-ae/pom.xml
+++ b/jcore-likelihood-assignment-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
index 14bc6f60a..1a6b9b081 100644
--- a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
+++ b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Likelihood Assignment AE</name>
         <description>Analysis Engine to assign likelihood indicators to their corresponding entities and events.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters />
         <configurationParameterSettings />
         <typeSystemDescription>
diff --git a/jcore-likelihood-detection-ae/component.meta b/jcore-likelihood-detection-ae/component.meta
index e58826719..3f80906be 100644
--- a/jcore-likelihood-detection-ae/component.meta
+++ b/jcore-likelihood-detection-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-likelihood-detection-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Likelihood Detection AE"
 }
diff --git a/jcore-likelihood-detection-ae/pom.xml b/jcore-likelihood-detection-ae/pom.xml
index c68a79a73..eb4aaa51e 100644
--- a/jcore-likelihood-detection-ae/pom.xml
+++ b/jcore-likelihood-detection-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml b/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml
index 81e9c76f1..bbd5b55bf 100644
--- a/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml
+++ b/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Likelihood Detection AE</name>
         <description>Analysis Engine to detect epistemic modal expressions and assign the appropriate likelihood category.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>LikelihoodDict</name>
diff --git a/jcore-line-multiplier/component.meta b/jcore-line-multiplier/component.meta
index 432aa6b6a..864a9954c 100644
--- a/jcore-line-multiplier/component.meta
+++ b/jcore-line-multiplier/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-line-multiplier",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Line Multiplier"
 }
diff --git a/jcore-line-multiplier/pom.xml b/jcore-line-multiplier/pom.xml
index 12aa067d8..f81a228ca 100644
--- a/jcore-line-multiplier/pom.xml
+++ b/jcore-line-multiplier/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml b/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
index 69ff063cd..524ca369e 100644
--- a/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
+++ b/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Line Multiplier</name>
     <description>Splits incoming CAS document texts on line breaks and returns one CAS for each non-blank line.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
       <configurationParameter>
         <name>NumberLinesPerCAS</name>
diff --git a/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml b/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
index 69ff063cd..524ca369e 100644
--- a/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
+++ b/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Line Multiplier</name>
     <description>Splits incoming CAS document texts on line breaks and returns one CAS for each non-blank line.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
       <configurationParameter>
         <name>NumberLinesPerCAS</name>
diff --git a/jcore-lingpipe-porterstemmer-ae/component.meta b/jcore-lingpipe-porterstemmer-ae/component.meta
index f0adaa9a1..af7dce999 100644
--- a/jcore-lingpipe-porterstemmer-ae/component.meta
+++ b/jcore-lingpipe-porterstemmer-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-lingpipe-porterstemmer-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Lingpipe Porter Stemmer AE"
 }
diff --git a/jcore-lingpipe-porterstemmer-ae/pom.xml b/jcore-lingpipe-porterstemmer-ae/pom.xml
index 6a10f10c5..6df6ba486 100644
--- a/jcore-lingpipe-porterstemmer-ae/pom.xml
+++ b/jcore-lingpipe-porterstemmer-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-lingpipe-porterstemmer-ae</artifactId>
     <name>JCoRe Lingpipe Porter Stemmer AE</name>
diff --git a/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml b/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml
index b959cf460..d24a10c0d 100644
--- a/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml
+++ b/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Lingpipe Porterstemmer AE</name>
     <description>Adds a StemmedForm to each token in the CAS. The offsets and the value feature of each StemmedForm are set to the stem as returned by the Porter stemmer algorithm as implemented by Lingpipe.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab, Germany</vendor>
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-lingpipegazetteer-ae/component.meta b/jcore-lingpipegazetteer-ae/component.meta
index 0a77648a3..4ba7d7658 100644
--- a/jcore-lingpipegazetteer-ae/component.meta
+++ b/jcore-lingpipegazetteer-ae/component.meta
@@ -18,7 +18,7 @@
     "maven-artifact": {
         "artifactId": "jcore-lingpipe-gazetteer-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Lingpipe Gazetteer AE"
 }
diff --git a/jcore-lingpipegazetteer-ae/pom.xml b/jcore-lingpipegazetteer-ae/pom.xml
index 1d39efcf8..080a61539 100644
--- a/jcore-lingpipegazetteer-ae/pom.xml
+++ b/jcore-lingpipegazetteer-ae/pom.xml
@@ -10,7 +10,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
 	</parent>
 
 
diff --git a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
index 1f4e5a34e..e8895177a 100644
--- a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
+++ b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
@@ -16,7 +16,7 @@
             embedded into the descriptor. The current parameter settings will work but may be changed. Refer to
             https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipegazetteer-ae for more information.
         </description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml
index b168cefa2..e448c764c 100644
--- a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml
+++ b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml
@@ -14,7 +14,7 @@
             and some parameter settings for dictionary processing and tagging. Refer to
             https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipegazetteer-ae for more information.
         </description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml b/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml
index bfd3827d0..9e4cc5a3d 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>GazetteerAnnotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml b/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml
index eeebe281b..2c6e0779a 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>GazetteerAnnotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-lingscope-ae/component.meta b/jcore-lingscope-ae/component.meta
index 3a5fc4991..3a73c19a3 100644
--- a/jcore-lingscope-ae/component.meta
+++ b/jcore-lingscope-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-lingscope-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Lingscope AE"
 }
diff --git a/jcore-lingscope-ae/pom.xml b/jcore-lingscope-ae/pom.xml
index 28836bd2b..4c5a15b41 100644
--- a/jcore-lingscope-ae/pom.xml
+++ b/jcore-lingscope-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml b/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml
index 164a2ed7e..dea73edd6 100644
--- a/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml
+++ b/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Lingscope AE</name>
         <description>This component uses the Lingscope negation/hedge detection algorithm and models to annotate negation/hedge cues and the scope to which the cues apply.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CueModel</name>
diff --git a/jcore-linnaeus-species-ae/component.meta b/jcore-linnaeus-species-ae/component.meta
index a4789114c..8bc1674bb 100644
--- a/jcore-linnaeus-species-ae/component.meta
+++ b/jcore-linnaeus-species-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-linnaeus-species-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Linnaeus Species Tagger"
 }
diff --git a/jcore-linnaeus-species-ae/pom.xml b/jcore-linnaeus-species-ae/pom.xml
index 9e5c99785..68c29ba14 100644
--- a/jcore-linnaeus-species-ae/pom.xml
+++ b/jcore-linnaeus-species-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-linnaeus-species-ae</artifactId>
     <name>JCoRe Linnaeus Species Tagger</name>
diff --git a/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml b/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml
index e89d8d5f3..d3ab9d56b 100644
--- a/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml
+++ b/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml
@@ -5,7 +5,7 @@
     <annotatorImplementationName>de.julielab.jcore.ae.linnaeus.LinnaeusSpeciesAnnotator</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCore LINNAEUS Species AE</name>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters />
         <configurationParameterSettings />
         <typeSystemDescription>
diff --git a/jcore-mantra-xml-types/pom.xml b/jcore-mantra-xml-types/pom.xml
index 4108f1f6a..ea6b45d42 100644
--- a/jcore-mantra-xml-types/pom.xml
+++ b/jcore-mantra-xml-types/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <name>JCoRe Mantra XML Types</name>
     <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-mantra-xml-types</url>
diff --git a/jcore-medxn-ae/component.meta b/jcore-medxn-ae/component.meta
index d10bc8ded..c1c026762 100644
--- a/jcore-medxn-ae/component.meta
+++ b/jcore-medxn-ae/component.meta
@@ -22,7 +22,7 @@
     "maven-artifact": {
         "artifactId": "jcore-medxn-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe MedXN"
 }
diff --git a/jcore-medxn-ae/pom.xml b/jcore-medxn-ae/pom.xml
index 94a1d35ee..aac277c21 100644
--- a/jcore-medxn-ae/pom.xml
+++ b/jcore-medxn-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-medxn-ae</artifactId>
     <name>JCoRe MedXN</name>
diff --git a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml
index e92306340..be6bb7375 100644
--- a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml
+++ b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.medxn.ae.desc.MedNormAE</name>
     <description>make a normalized medication description based on  RxNorm standard </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml
index 94393ddbf..199f1607d 100644
--- a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml
+++ b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.medxn.ae.desc.MedAttrAE</name>
     <description>medication attribute tagger using regEx</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters searchStrategy="language_fallback" />
     <configurationParameterSettings />
diff --git a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml
index afdec1ce4..25468e126 100644
--- a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml
+++ b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.medxn.ae.desc.jcore-medxn-ae-extractor-german</name>
     <description>Associate medication and the corresponding attributes</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-msdoc-reader/component.meta b/jcore-msdoc-reader/component.meta
index 28d3243a0..eac523555 100644
--- a/jcore-msdoc-reader/component.meta
+++ b/jcore-msdoc-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-msdoc-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe MSdoc Reader"
 }
diff --git a/jcore-msdoc-reader/pom.xml b/jcore-msdoc-reader/pom.xml
index ed305d952..74d9d3daa 100644
--- a/jcore-msdoc-reader/pom.xml
+++ b/jcore-msdoc-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-msdoc-reader</artifactId>
     <name>JCoRe MSdoc Reader</name>
diff --git a/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml b/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml
index 18a03952b..146d1f488 100644
--- a/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml
+++ b/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe MSdoc Reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIELab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-mstparser-ae/component.meta b/jcore-mstparser-ae/component.meta
index d58972c4e..ba2e43335 100644
--- a/jcore-mstparser-ae/component.meta
+++ b/jcore-mstparser-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-mstparser-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe MST Parser AE"
 }
diff --git a/jcore-mstparser-ae/pom.xml b/jcore-mstparser-ae/pom.xml
index ddbf1449e..83f9017af 100644
--- a/jcore-mstparser-ae/pom.xml
+++ b/jcore-mstparser-ae/pom.xml
@@ -54,7 +54,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>..</relativePath>
     </parent>
     <dependencies>
diff --git a/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml b/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml
index 36985423b..36ef089e1 100644
--- a/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml
+++ b/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe MST Parser Annotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml b/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml
index a9b0d6b0e..9442a4955 100644
--- a/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml
+++ b/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe MST Parser Annotator</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-muc7-reader/component.meta b/jcore-muc7-reader/component.meta
index 882b76c87..7e16b6b2c 100644
--- a/jcore-muc7-reader/component.meta
+++ b/jcore-muc7-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-muc7-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe MUC7 Reader"
 }
diff --git a/jcore-muc7-reader/pom.xml b/jcore-muc7-reader/pom.xml
index aeb5a81b5..a1461b459 100644
--- a/jcore-muc7-reader/pom.xml
+++ b/jcore-muc7-reader/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml b/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
index 2f6b99cc3..be43fa1c1 100644
--- a/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
+++ b/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe MUC7 Reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml b/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
index 87e9f1679..e089a5ab2 100644
--- a/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
+++ b/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe MUC7 Reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-mutationfinder-ae/component.meta b/jcore-mutationfinder-ae/component.meta
index c0df6eb43..6a13f809a 100644
--- a/jcore-mutationfinder-ae/component.meta
+++ b/jcore-mutationfinder-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-mutationfinder-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Mutation Finder AE"
 }
diff --git a/jcore-mutationfinder-ae/pom.xml b/jcore-mutationfinder-ae/pom.xml
index bc0ff3ecb..62b3a5d5b 100644
--- a/jcore-mutationfinder-ae/pom.xml
+++ b/jcore-mutationfinder-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <name>JCoRe Mutation Finder AE</name>
diff --git a/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml b/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml
index d43c2caba..4bde7de35 100644
--- a/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml
+++ b/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml
@@ -7,7 +7,7 @@
         <name>JCoRe Mutation Annotator</name>
         <description>An analysis engine to recognize mentions of gene point mutations in document text. This is a wrapper around the original MutationFinder (http://mutationfinder.sourceforge.net/), published in the following paper: MutationFinder: A high-performance system for extracting point mutation mentions from text
 J. Gregory Caporaso, William A. Baumgartner Jr., David A. Randolph, K. Bretonnel Cohen, and Lawrence Hunter; Bioinformatics, 2007 23(14):1862-1865; doi:10.1093/bioinformatics/btm235;</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab, Germany</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index e83c89ced..b7a2bf83c 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -10,10 +10,9 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.3.0-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
-    <version>2.3.0-SNAPSHOT</version>
 
     <dependencies>
         <dependency>
@@ -28,13 +27,18 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-types</artifactId>
-            <version>${jcore-version}</version>
+            <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
             <version>3.0.0-SNAPSHOT</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
         <dependency>
             <groupId>org.neo4j.test</groupId>
             <artifactId>neo4j-harness</artifactId>
@@ -47,6 +51,14 @@
             <version>3.0.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
     </dependencies>
     <name>JCoRe Neo4j Relations Consumer</name>
     <organization>
diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 4c4670d97..9b9a6dddc 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -6,9 +6,11 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Multiset;
+import de.julielab.jcore.types.ArgumentMention;
 import de.julielab.jcore.types.ConceptMention;
 import de.julielab.jcore.types.ResourceEntry;
 import de.julielab.jcore.types.ext.FlattenedRelation;
+import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelation;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationArgument;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationDocument;
@@ -85,13 +87,16 @@ public void process(final JCas aJCas) {
     private ImportIERelationDocument convertRelations(JCas aJCas) {
         Map<String, Multiset<UnificationRelation>> relationCounts = getEquivalentRelationGroups(aJCas);
         ImportIERelationDocument relDoc = new ImportIERelationDocument();
+        relDoc.setDb(false);
+        relDoc.setName(JCoReTools.getDocId(aJCas));
         ImportIETypedRelations typedRelations = new ImportIETypedRelations();
         for (String relationType : relationCounts.keySet()) {
             Multiset<UnificationRelation> unificationRelations = relationCounts.get(relationType);
             List<ImportIERelation> ieRelations4relationType = new ArrayList<>();
-            for (UnificationRelation rel : unificationRelations) {
+            for (UnificationRelation rel : unificationRelations.elementSet()) {
                 ieRelations4relationType.add(rel.toImportRelation(unificationRelations.count(rel)));
             }
+            typedRelations.put(relationType, ieRelations4relationType);
         }
         relDoc.setRelations(typedRelations);
         return relDoc;
@@ -151,7 +156,11 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
     private Map<String, Multiset<UnificationRelation>> getEquivalentRelationGroups(JCas aJCas) {
         Map<String, Multiset<UnificationRelation>> relationCounts = new HashMap<>();
         for (FlattenedRelation fr : aJCas.<FlattenedRelation>getAnnotationIndex(FlattenedRelation.type)) {
-            Iterator<ConceptMention> cmIt = StreamSupport.stream(fr.getArguments().spliterator(), false).map(ConceptMention.class::cast).iterator();
+            Iterator<ConceptMention> cmIt = StreamSupport.stream(fr.getArguments().spliterator(), false)
+                    .map(ArgumentMention.class::cast)
+                    .map(ArgumentMention::getRef)
+                    .map(ConceptMention.class::cast)
+                    .iterator();
             Set<UnificationArgument> unificationArgs = new HashSet<>();
             while (cmIt.hasNext()) {
                 ConceptMention cm = cmIt.next();
@@ -184,7 +193,7 @@ public UnificationRelation(String relationType, Set<UnificationArgument> args) {
         }
 
         public ImportIERelation toImportRelation(int count) {
-            return ImportIERelation.of(count, (Iterable<ImportIERelationArgument>) args.stream().map(UnificationArgument::toImportArgument).iterator());
+            return ImportIERelation.of(count, () -> args.stream().map(UnificationArgument::toImportArgument).iterator());
         }
 
         public String getRelationType() {
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER
deleted file mode 100644
index 9f6c6ddb5..000000000
--- a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/PLACEHOLDER
+++ /dev/null
@@ -1 +0,0 @@
-The actual descriptor must be created by UIMA fit.
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
index a0eadea2f..1119cc5ef 100644
--- a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
+++ b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
@@ -1,21 +1,81 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <primitive>true</primitive>
-  <annotatorImplementationName>Neo4jRelationsConsumer</annotatorImplementationName>
-  <analysisEngineMetaData>
-    <name>JCoRe Neo4j Relations Consumer</name>
-    <description/>
-    <version>2.3.0-SNAPSHOT</version>
-    <vendor>JULIE Lab Jena, Germany</vendor>
-    <configurationParameters/>
-    <configurationParameterSettings/>
-    <typeSystemDescription/>
-    <capabilities/>
-    <operationalProperties>
-      <modifiesCas>true</modifiesCas>
-      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-      <outputsNewCASes>false</outputsNewCASes>
-    </operationalProperties>
-  </analysisEngineMetaData>
-</analysisEngineDescription>
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.consumer.neo4jrelations.Neo4jRelationsConsumer</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe Neo4j Relations Consumer</name>
+        <description>This component assumes that a Neo4j server with an installed julieliab-neo4j-plugins-concepts plugin installed. It then sends FlattenedRelation instances with more then one arguments to Neo4j. Note that this requires the event arguments to have a ResourceEntry list to obtain database concept IDs from.</description>
+        <version>2.6.0-SNAPSHOT</version>
+        <vendor>JULIE Lab, Germany</vendor>
+        <copyright>JULIE Lab</copyright>
+        <configurationParameters>
+            <configurationParameter>
+                <name>URL</name>
+                <description>The complete URL to the endpoint of the Neo4j server for relation insertion.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>IdProperty</name>
+                <description>The ID property to look up concept nodes in the Neo4j graph. Common options are 'id', 'sourceIds' and 'originalId'. You must know to which ID type the ResourceEntry objects of the relation arguments refer to.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>ConceptSource</name>
+                <description>Optional. Sets the global source for the concept IDs taken from the ResourceEntry instances of the relation arguments. This causes the 'source' feature of the ResourceEntry objects to be omitted and to globally use the specified source instead. This causes the Neo4j database plugin to resolve the provided argument IDs against the source specified here.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings/>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-basic-types"/>
+                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs>
+                    <type>de.julielab.jcore.types.EventMention</type>
+                </inputs>
+                <outputs/>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
index 41d24b178..f40f929b2 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
@@ -1,15 +1,24 @@
 
 package de.julielab.jcore.consumer.neo4jrelations;
 
-import org.apache.uima.UIMAException;
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import de.julielab.jcore.types.ArgumentMention;
+import de.julielab.jcore.types.ConceptMention;
+import de.julielab.jcore.types.EventMention;
+import de.julielab.jcore.types.ResourceEntry;
+import de.julielab.jcore.types.ext.FlattenedRelation;
+import de.julielab.jcore.utility.JCoReTools;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIERelation;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationArgument;
+import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationDocument;
 import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.factory.UimaContextFactory;
 import org.apache.uima.jcas.JCas;
 import org.junit.Test;
 
-import java.io.IOException;
+import java.lang.reflect.Method;
+import java.util.List;
 
+import static org.assertj.core.api.Assertions.assertThat;
 
 /**
  * Unit tests for jcore-neo4j-relations-consumer.
@@ -19,10 +28,113 @@ public class Neo4jRelationsConsumerTest {
 
 
     @Test
-    public void insertEventMentions() throws UIMAException, IOException {
-        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
-        AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.neo4jrelations.desc.jcore-neo4j-relations-consumer", Neo4jRelationsConsumer.PARAM_URL, "");
+    public void insertEventMentions() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        Neo4jRelationsConsumer engine = new Neo4jRelationsConsumer();
+        engine.initialize(UimaContextFactory.createUimaContext(Neo4jRelationsConsumer.PARAM_URL, "", Neo4jRelationsConsumer.PARAM_ID_PROPERTY, "sourceIds"));
+        addFlattenedRelation1ToCas(jCas);
+        // Here is a duplicate. It should be recognized and just be counted up
+        addFlattenedRelation2ToCas(jCas);
+        addFlattenedRelation2ToCas(jCas);
 
+        Method m = Neo4jRelationsConsumer.class.getDeclaredMethod("convertRelations", JCas.class);
+        m.setAccessible(true);
+        ImportIERelationDocument relations = (ImportIERelationDocument) m.invoke(engine, jCas);
+        assertThat(relations).extracting(ImportIERelationDocument::getRelations).isNotNull();
+        assertThat(relations.getRelations()).hasSize(1);
+        List<ImportIERelation> regulations = relations.getRelations().get("regulation");
+        assertThat(regulations).hasSize(2);
+        assertThat(regulations.get(0)).extracting(ImportIERelation::getCount).isEqualTo(1);
+        assertThat(regulations.get(1)).extracting(ImportIERelation::getCount).isEqualTo(2);
+        assertThat(regulations).flatExtracting(ImportIERelation::getArgs).flatExtracting(ImportIERelationArgument::getId).containsExactlyInAnyOrder("id11", "id12", "id13", "id21", "id22");
+        assertThat(regulations).flatExtracting(ImportIERelation::getArgs).flatExtracting(ImportIERelationArgument::getSource).containsExactlyInAnyOrder("source11", "source12", "source13", "source21", "source22");
+    }
+
+    @Test
+    public void insertEventMentionsGlobalSource() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        Neo4jRelationsConsumer engine = new Neo4jRelationsConsumer();
+        engine.initialize(UimaContextFactory.createUimaContext(Neo4jRelationsConsumer.PARAM_URL, "", Neo4jRelationsConsumer.PARAM_ID_PROPERTY, "sourceIds", Neo4jRelationsConsumer.PARAM_SOURCE, "globalSource"));
+        addFlattenedRelation1ToCas(jCas);
+        addFlattenedRelation2ToCas(jCas);
 
+        Method m = Neo4jRelationsConsumer.class.getDeclaredMethod("convertRelations", JCas.class);
+        m.setAccessible(true);
+        ImportIERelationDocument relations = (ImportIERelationDocument) m.invoke(engine, jCas);
+        assertThat(relations).extracting(ImportIERelationDocument::getRelations).isNotNull();
+        assertThat(relations.getRelations()).hasSize(1);
+        List<ImportIERelation> regulations = relations.getRelations().get("regulation");
+        assertThat(regulations).hasSize(2);
+        // With the global source set, the individual sources are left out
+        assertThat(regulations).flatExtracting(ImportIERelation::getArgs).flatExtracting(ImportIERelationArgument::getSource).containsExactlyInAnyOrder(null, null, null, null, null);
     }
+
+    /**
+     * Adds a FlattenedRelation with three arguments.
+     * @param jCas The CAS.
+     */
+    private void addFlattenedRelation1ToCas(JCas jCas) {
+        FlattenedRelation fr = new FlattenedRelation(jCas);
+        EventMention rootEm = new EventMention(jCas);
+        rootEm.setSpecificType("regulation");
+        fr.setRootRelation(rootEm);
+
+        ArgumentMention am1 = new ArgumentMention(jCas);
+        ConceptMention cm1 = new ConceptMention(jCas);
+        ResourceEntry re1 = new ResourceEntry(jCas);
+        re1.setEntryId("id11");
+        re1.setSource("source11");
+        cm1.setResourceEntryList(JCoReTools.addToFSArray(null, re1));
+        am1.setRef(cm1);
+
+        ArgumentMention am2 = new ArgumentMention(jCas);
+        ConceptMention cm2 = new ConceptMention(jCas);
+        ResourceEntry re2 = new ResourceEntry(jCas);
+        re2.setEntryId("id12");
+        re2.setSource("source12");
+        cm2.setResourceEntryList(JCoReTools.addToFSArray(null, re2));
+        am2.setRef(cm2);
+
+        ArgumentMention am3 = new ArgumentMention(jCas);
+        ConceptMention cm3 = new ConceptMention(jCas);
+        ResourceEntry re3 = new ResourceEntry(jCas);
+        re3.setEntryId("id13");
+        re3.setSource("source13");
+        cm3.setResourceEntryList(JCoReTools.addToFSArray(null, re3));
+        am3.setRef(cm3);
+
+        fr.setArguments(JCoReTools.addToFSArray(null, List.of(am1, am2, am3)));
+        fr.addToIndexes();
+    }
+
+    /**
+     * Adds a FlattenedRelation with two arguments.
+     * @param jCas The CAS.
+     */
+    private void addFlattenedRelation2ToCas(JCas jCas) {
+        FlattenedRelation fr = new FlattenedRelation(jCas);
+        EventMention rootEm = new EventMention(jCas);
+        rootEm.setSpecificType("regulation");
+        fr.setRootRelation(rootEm);
+
+        ArgumentMention am1 = new ArgumentMention(jCas);
+        ConceptMention cm1 = new ConceptMention(jCas);
+        ResourceEntry re1 = new ResourceEntry(jCas);
+        re1.setEntryId("id21");
+        re1.setSource("source21");
+        cm1.setResourceEntryList(JCoReTools.addToFSArray(null, re1));
+        am1.setRef(cm1);
+
+        ArgumentMention am2 = new ArgumentMention(jCas);
+        ConceptMention cm2 = new ConceptMention(jCas);
+        ResourceEntry re2 = new ResourceEntry(jCas);
+        re2.setEntryId("id22");
+        re2.setSource("source22");
+        cm2.setResourceEntryList(JCoReTools.addToFSArray(null, re2));
+        am2.setRef(cm2);
+
+        fr.setArguments(JCoReTools.addToFSArray(null, List.of(am1, am2)));
+        fr.addToIndexes();
+    }
+
 }
diff --git a/jcore-opennlp-chunk-ae/component.meta b/jcore-opennlp-chunk-ae/component.meta
index 202885b41..5254bb51d 100644
--- a/jcore-opennlp-chunk-ae/component.meta
+++ b/jcore-opennlp-chunk-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-chunk-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Open NLP Chunker"
 }
diff --git a/jcore-opennlp-chunk-ae/pom.xml b/jcore-opennlp-chunk-ae/pom.xml
index d691531cd..1ec6f1917 100644
--- a/jcore-opennlp-chunk-ae/pom.xml
+++ b/jcore-opennlp-chunk-ae/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml
index a9d5953cf..dc8612e2d 100644
--- a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml
+++ b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>jcore-openlp-chunk-ae</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml
index 8d522d208..9f2a29ee1 100644
--- a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml
+++ b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>jcore-openlp-chunk-ae</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-parser-ae/component.meta b/jcore-opennlp-parser-ae/component.meta
index 8233a2b6f..c73a0bec7 100644
--- a/jcore-opennlp-parser-ae/component.meta
+++ b/jcore-opennlp-parser-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-parser-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe OpenNLP Constituency Parser"
 }
diff --git a/jcore-opennlp-parser-ae/pom.xml b/jcore-opennlp-parser-ae/pom.xml
index 87af33491..5a2ef3229 100644
--- a/jcore-opennlp-parser-ae/pom.xml
+++ b/jcore-opennlp-parser-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <build>
         <plugins>
diff --git a/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml b/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml
index ca499d279..afa247920 100644
--- a/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml
+++ b/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe OpenNLP Constituency Parser AE</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml b/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml
index 5943431f4..60e9d9e45 100644
--- a/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml
+++ b/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe OpenNLP Parser Test</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-postag-ae/component.meta b/jcore-opennlp-postag-ae/component.meta
index 4f3b87ffb..4996af5fb 100644
--- a/jcore-opennlp-postag-ae/component.meta
+++ b/jcore-opennlp-postag-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-postag-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe OpenNLP POS Tagger"
 }
diff --git a/jcore-opennlp-postag-ae/pom.xml b/jcore-opennlp-postag-ae/pom.xml
index 77abc3243..cadd08079 100644
--- a/jcore-opennlp-postag-ae/pom.xml
+++ b/jcore-opennlp-postag-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <build>
         <pluginManagement>
diff --git a/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml b/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml
index 9a7640c32..ce2228cca 100644
--- a/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml
+++ b/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe OpenNLP POS Tagger</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml b/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml
index 1c6b115ca..ff5e2768a 100644
--- a/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml
+++ b/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>jcore-opennlp-postag-ae</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-sentence-ae/component.meta b/jcore-opennlp-sentence-ae/component.meta
index 33e67cb8d..7980c80b1 100644
--- a/jcore-opennlp-sentence-ae/component.meta
+++ b/jcore-opennlp-sentence-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-sentence-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe OpenNLP Sentence Splitter"
 }
diff --git a/jcore-opennlp-sentence-ae/pom.xml b/jcore-opennlp-sentence-ae/pom.xml
index d2e778487..c1c0c2b03 100644
--- a/jcore-opennlp-sentence-ae/pom.xml
+++ b/jcore-opennlp-sentence-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml b/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml
index 127ce56d8..249d1030e 100644
--- a/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml
+++ b/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>jcore-opennlp-sentence-ae</name>
 <description>sentence splitter based on opennlp</description>
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-token-ae/component.meta b/jcore-opennlp-token-ae/component.meta
index 373b7c246..f394a600a 100644
--- a/jcore-opennlp-token-ae/component.meta
+++ b/jcore-opennlp-token-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-token-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe OpenNLP Tokenizer"
 }
diff --git a/jcore-opennlp-token-ae/desc/TokenAnnotator.xml b/jcore-opennlp-token-ae/desc/TokenAnnotator.xml
index a8eecd2b1..ea840ac0c 100644
--- a/jcore-opennlp-token-ae/desc/TokenAnnotator.xml
+++ b/jcore-opennlp-token-ae/desc/TokenAnnotator.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>jcore-opennlp-token-ae</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-token-ae/pom.xml b/jcore-opennlp-token-ae/pom.xml
index 3145d63a6..306972531 100644
--- a/jcore-opennlp-token-ae/pom.xml
+++ b/jcore-opennlp-token-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml b/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml
index 2ab75743c..749f145df 100644
--- a/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml
+++ b/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>jcore-opennlp-token-ae</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-pmc-reader/component.meta b/jcore-pmc-reader/component.meta
index b71a1930d..6cfbb0efc 100644
--- a/jcore-pmc-reader/component.meta
+++ b/jcore-pmc-reader/component.meta
@@ -23,7 +23,7 @@
     "maven-artifact": {
         "artifactId": "jcore-pmc-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe PubMed Central Reader"
 }
diff --git a/jcore-pmc-reader/pom.xml b/jcore-pmc-reader/pom.xml
index 3f545c1a4..976a1b456 100644
--- a/jcore-pmc-reader/pom.xml
+++ b/jcore-pmc-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml
index dba9b5af0..aafb02d0a 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml
@@ -8,7 +8,7 @@
         <description>This multiplier expect to receive URIs to NXML documents in the form of JCoReURI feature
             structures. All JCoReURI FS in the annotation indexes are read and output as new CASes.
         </description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters />
         <configurationParameterSettings />
         <typeSystemDescription>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
index 88d0d6c73..224b668eb 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe Pubmed Central Multiplier Reader</name>
         <description>Reads a directory of NXML files, possibly assembled into ZIP archives. Requires the Pubmed Central Multiplier to follow in the pipeline. This reader only sends URIs referencing the NXML files to the multiplier that then does the parsing.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>SendCasToLast</name>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
index 74eee3a1a..284f41cdd 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe Pubmed Central Reader</name>
         <description>Reads Pubmed Central documents from the NXML format</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>Input</name>
diff --git a/jcore-ppd-writer/component.meta b/jcore-ppd-writer/component.meta
index d10916db5..9264bc68e 100644
--- a/jcore-ppd-writer/component.meta
+++ b/jcore-ppd-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ppd-writer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Piped Format Writer"
 }
diff --git a/jcore-ppd-writer/pom.xml b/jcore-ppd-writer/pom.xml
index 6009a4286..8e409735b 100644
--- a/jcore-ppd-writer/pom.xml
+++ b/jcore-ppd-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml b/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml
index de2470406..fdf9505bf 100644
--- a/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml
+++ b/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe PPD Writer</name>
         <description>This component writes CAS annotation data to the pipe-separated format. For example, writing tokens with their PoS would result in text like 'The|DET tree|NN is|VBZ green|ADJ'. The component can be configured for an arbitrary number of annotations to be added to each token.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>TypeToLabelMappings</name>
diff --git a/jcore-pubtator-reader/component.meta b/jcore-pubtator-reader/component.meta
index 591a4acb5..ea6504c28 100644
--- a/jcore-pubtator-reader/component.meta
+++ b/jcore-pubtator-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-pubtator-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe PubTator Reader"
 }
diff --git a/jcore-pubtator-reader/pom.xml b/jcore-pubtator-reader/pom.xml
index bd57f680f..84661f424 100644
--- a/jcore-pubtator-reader/pom.xml
+++ b/jcore-pubtator-reader/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>..</relativePath>
     </parent>
     <dependencies>
diff --git a/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml b/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml
index 82cd90174..76985dfd7 100644
--- a/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml
+++ b/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>jcore-pubtator-reader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-stanford-lemmatizer-ae/component.meta b/jcore-stanford-lemmatizer-ae/component.meta
index 872ac0bdd..2da48cfa7 100644
--- a/jcore-stanford-lemmatizer-ae/component.meta
+++ b/jcore-stanford-lemmatizer-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-stanford-lemmatizer-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Stanford Lemmatizer"
 }
diff --git a/jcore-stanford-lemmatizer-ae/pom.xml b/jcore-stanford-lemmatizer-ae/pom.xml
index 99e888260..39eda0c8b 100644
--- a/jcore-stanford-lemmatizer-ae/pom.xml
+++ b/jcore-stanford-lemmatizer-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-stanford-lemmatizer-ae</artifactId>
     <name>JCoRe Stanford Lemmatizer</name>
diff --git a/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml b/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml
index 47dec0beb..2e4b3d99a 100644
--- a/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml
+++ b/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Stanford Lemmatizer</name>
     <description>This is the UIMA Wrapper for the Stanford CoreNLP Lemmatizer component.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml b/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml
index ca8ce0703..653ecbe8e 100644
--- a/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml
+++ b/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml
@@ -7,7 +7,7 @@
 		<name>jcore-stanford-lemmatizer-ae</name>
 		<description>This is the UIMA Wrapper for the Stanford CoreNLP
 			Lemmatizer component.</description>
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
         <vendor />
 		<configurationParameters />
 		<configurationParameterSettings />
diff --git a/jcore-topic-indexing-ae/component.meta b/jcore-topic-indexing-ae/component.meta
index 03a0d63b9..c6cdef338 100644
--- a/jcore-topic-indexing-ae/component.meta
+++ b/jcore-topic-indexing-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-topic-indexing-ae",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe-Topic-Labeling-AE"
 }
diff --git a/jcore-topic-indexing-ae/pom.xml b/jcore-topic-indexing-ae/pom.xml
index 99e1c0173..b378f818d 100644
--- a/jcore-topic-indexing-ae/pom.xml
+++ b/jcore-topic-indexing-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
@@ -53,7 +53,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xmi-reader</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml b/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml
index 95d04054e..6db4c1c87 100644
--- a/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml
+++ b/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Topic Indexer</name>
         <description>This component assigns topics relative to a given topic model to the encoutered documents. The topic model is one trained by the julielab-topic-modeling project.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>TopicModelConfig</name>
diff --git a/jcore-topics-writer/component.meta b/jcore-topics-writer/component.meta
index c98a40a2e..32ac48b74 100644
--- a/jcore-topics-writer/component.meta
+++ b/jcore-topics-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-topics-writer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Topics Writer"
 }
diff --git a/jcore-topics-writer/pom.xml b/jcore-topics-writer/pom.xml
index ad9569a47..19752ec2e 100644
--- a/jcore-topics-writer/pom.xml
+++ b/jcore-topics-writer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml b/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml
index 47a3c1bb6..23aab97c9 100644
--- a/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml
+++ b/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Topics Writer</name>
         <description>Writes the topic weights, given the jcore-topic-indexing-ae running before, into a simple text file. Thus, the output consists of a sequency of double numbers encodes as strings, separated by tab characters. The topic ID is just the 0-based index of each number, from left to right in the written file. The first entry of each file is the document ID.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputDirectory</name>
diff --git a/jcore-txt-consumer/component.meta b/jcore-txt-consumer/component.meta
index 6cf58e0d6..c10e83c2c 100644
--- a/jcore-txt-consumer/component.meta
+++ b/jcore-txt-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-txt-consumer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe TXT Consumer"
 }
diff --git a/jcore-txt-consumer/pom.xml b/jcore-txt-consumer/pom.xml
index bf6de2d14..07b878cab 100644
--- a/jcore-txt-consumer/pom.xml
+++ b/jcore-txt-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-txt-consumer</artifactId>
     <name>JCoRe TXT Consumer</name>
diff --git a/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml b/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml
index 556fbbc5e..55deaaea9 100644
--- a/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml
+++ b/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Text Consumer</name>
         <description>Stores the CAS document text in files. Either in tokenized sentences plus optional PoS tags or just the original document text. The text files can also be stored in GZIP format or batch-wise in ZIP archives.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>de.julielab.jcore.consumer.txt</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-types/pom.xml b/jcore-types/pom.xml
index 6abd932d0..e9571839f 100644
--- a/jcore-types/pom.xml
+++ b/jcore-types/pom.xml
@@ -8,7 +8,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
 	</parent>
 	
 	<build>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
index 57770ed2a..06aa1902b 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
@@ -10,7 +10,7 @@
         base document and annotations that have been previously created and stored in separate tables. This is
         part of the Jena Document Information System, JeDIS.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <types>
         <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
index 7b49b49c9..5f6a3459b 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
@@ -4,7 +4,7 @@
   <description>This is a type system for usage with a CAS multiplier. It should not be included into the
   jcore-all-types type system. This particular type system holds a single URI that points to the resource that
   should be split into CASes by the multiplier.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml
index e3ba78bce..d5e851681 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe ACE Types</name>
 <description>The jcore-ace-types TS represents the complete ACE Annotation in CAS format.</description>
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="../jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
index a95f22bfa..115927024 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Document Meta Types</name>
   <description>The types of this type system reflect meta data about documents for rather specific use cases.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor />
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml
index 387aafda0..89d99ec9a 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 	<name>jcore-dta-types</name>
 	<description />
-	<version>2.5.1-SNAPSHOT</version>
+	<version>2.6.0-SNAPSHOT</version>
         <vendor />
 	<types>
 			<typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml
index bb860b3ec..e02013a5f 100755
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Evaluation Types</name>
   <description>This type system is an extension of the JCoRe type system to cover evaluation Annotations like missing or additional annotations for evaluation purposes.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml
index 9d5ffa276..32ce435a4 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Mantra Types</name>
   <description>The type system contains types for working with documents in the context of the MANTRA project.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml
index 3fca73bdb..af5a69392 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>jcore-medical-types</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
   <imports>
     <import location="../jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml
index 192bf4dc6..fbec38980 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe MMAX Types</name>
   <description>The type system contains types for the import of MMAX2 annotations.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml
index 39575dacb..8aa7a5303 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe MUC7 Type System</name>
 <description>This type system contains types covering annotations for the MUC7 data.</description>
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="../jcore-document-meta-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml
index f2db24b6f..2a2059bf3 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe Semantics ACE Types</name>
 <description>The type system contains ACE types of the ACE taxonomy.</description>
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="../jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml
index 2ae6f1df3..a2f40fc70 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics BOOTStrep Types</name>
   <description>The type system is an extension of the JCoRe core type system for types required in the context of the BOOTStrep project.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-semantics-biology-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml
index 1b67565c2..62cabb2d4 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>JCoRe Semantics Mention Types Extension</name>
     <description>JCoRe type extensions to the JCoRe Semantics Mention types. Required for some processing or representation, these types do not extend the actual semantics of the core type system.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml
index c213f3f08..68f6711bd 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics StemNet Typs</name>
   <description>The type system contains types of the StemNet project.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-semantics-biology-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml
index 48c8e3b9e..cb9265d5b 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Wikipedia Types</name>
   <description>The type system contains types for the annotation of meta information of Wikipedia pages.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml
index 10d3a8bb7..69183e809 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>jcore-affect-types</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
   <imports>
     <import location="jcore-document-meta-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml
index fedf2eec7..d2d038014 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>JCoRe All Types</name>
     <description>This is just a convenience file, assembling all JCoRe types</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml
index 00003147c..c1105adcc 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Basic Types</name>
   <description>The type system contains the basic annotation types.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
index d3190b9e5..ab4888c8c 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Discourse Types</name>
   <description>Discourse types such as coreference relations.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml
index ce908039f..c39e6dd15 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml
@@ -5,7 +5,7 @@
         document meta information (bibliographical and content information),
         especially for PubMed abstracts.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-meta-pubmed-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml
index 6363ece45..2deb2853d 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml
@@ -5,7 +5,7 @@
         document meta information (bibliographical and content information),
         especially for PubMed abstracts.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-meta-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml
index 5a23252bd..2865894e7 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Document Meta Types</name>
   <description>The type system contains types for the annotation of document meta information (bibliographical and content information).</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml
index 082c8e775..77b328da5 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml
@@ -4,7 +4,7 @@
     <description>This type system contains document structure types specific to the clinical trails XML format as
         retrieved from https://clinicaltrials.gov/.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml
index 57ea9b281..b575084d5 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml
@@ -4,7 +4,7 @@
     <description>This type system contains document structure types specific to PubMed or MEDLINE, e.g. detailed
         descriptions of structured abstracts.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml
index 633edd187..5159c11aa 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Document Structure Types</name>
   <description>The type system contains the types for the annotation of document sutructure, e.g. titles, abstract text, captions etc.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml
index 1c65aef6f..0ff447c77 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml
@@ -4,7 +4,7 @@
     <description>The type system contains types for the annotation of morpho-syntactic and syntactic analysis
         (constituncy-based and dependecy-based parsing) results.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml
index 92f89b23b..e421aa1c6 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>JCoRe Semantics Biology Types</name>
     <description>The type system contains types of the biomedical domain.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml
index edea7e8ee..7b4b3d008 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe Semantics Concept Types</name>
 <description>The type system contains core semantic types definitions such as entity, relation and event.</description>
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml
index e2ecd9dd4..87718af50 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics Mention Types</name>
   <description>The type system contains core semantic types definitions such as entity, relation and event. The types in this type system refer to actual text occurrences.</description>
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml
index 1373c4eac..53c5d882f 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml
@@ -2,7 +2,7 @@
 <typePriorities xmlns="http://uima.apache.org/resourceSpecifier">
     <name>jcore-type-priorities</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
 	<priorityList>
 	    <type>de.julielab.jcore.types.Title</type>
diff --git a/jcore-utilities/pom.xml b/jcore-utilities/pom.xml
index 6395a3b73..aafbe74fe 100644
--- a/jcore-utilities/pom.xml
+++ b/jcore-utilities/pom.xml
@@ -10,7 +10,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.5.1-SNAPSHOT</version>
+		<version>2.6.0-SNAPSHOT</version>
 	</parent>
 	
 	<dependencies>
diff --git a/jcore-utilities/src/test/resources/AETestDescriptor.xml b/jcore-utilities/src/test/resources/AETestDescriptor.xml
index ea2658e15..ab602e7c9 100644
--- a/jcore-utilities/src/test/resources/AETestDescriptor.xml
+++ b/jcore-utilities/src/test/resources/AETestDescriptor.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JulesToolsDescriptor</name>
 <description />
-<version>2.5.1-SNAPSHOT</version>
+<version>2.6.0-SNAPSHOT</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-xmi-db-reader/component.meta b/jcore-xmi-db-reader/component.meta
index d8abdab0f..c7c922807 100644
--- a/jcore-xmi-db-reader/component.meta
+++ b/jcore-xmi-db-reader/component.meta
@@ -23,7 +23,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-db-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe XMI Database Reader"
 }
diff --git a/jcore-xmi-db-reader/pom.xml b/jcore-xmi-db-reader/pom.xml
index 4e3f07f1a..6cd48ce47 100644
--- a/jcore-xmi-db-reader/pom.xml
+++ b/jcore-xmi-db-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <artifactId>jcore-xmi-db-reader</artifactId>
@@ -18,7 +18,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-reader</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.testng</groupId>
@@ -59,13 +59,13 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-db-reader</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xmi-db-writer</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml
index 312cb5e0a..9ef28be72 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe XMI Database Multiplier Reader</name>
         <description>This is an extension of the DBMultiplierReader to handle JeDIS XMI annotation module data.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>ReadsBaseDocument</name>
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index 0e15747d0..081c3d6a8 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Abstract Database Multiplier</name>
         <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <copyright>JULIE Lab Jena, Germany</copyright>
         <configurationParameters>
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
index c09220a89..dd703d3d1 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe XMI Database Reader</name>
         <description>A database readerthat expects serialized UIMA CAS objects in XMI format as input. The reader has the capability to read segmented annotation graphs that have been stored by the jcore-xmi-db-writer. This component is part of the Jena Document Information System, JeDIS.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-xmi-db-writer/component.meta b/jcore-xmi-db-writer/component.meta
index 708695365..3c65e61ac 100644
--- a/jcore-xmi-db-writer/component.meta
+++ b/jcore-xmi-db-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-db-writer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe XMI Database Writer"
 }
diff --git a/jcore-xmi-db-writer/pom.xml b/jcore-xmi-db-writer/pom.xml
index 5a7320d2f..657e06c16 100644
--- a/jcore-xmi-db-writer/pom.xml
+++ b/jcore-xmi-db-writer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <artifactId>jcore-xmi-db-writer</artifactId>
@@ -144,7 +144,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-checkpoint-ae</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml b/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
index 9eab689a6..b3b5afac1 100644
--- a/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
+++ b/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe XMI Database Writer</name>
         <description>This component is capable of storing the standard UIMA serialization of documents in one or even multiple database tables. The UIMA serialization format is XMI, an XML format that expressed an annotation graph. This component either stores the whole annotation graph in XMI format in a database row, together with the document ID. Alternatively, it makes use of the jcore-xmi-splitter to segment the annotation graph with respect to a user specified list of annotation types. Then, the XMI data of each annotation type is extracted from the document XMI data and stored in a separate table. The tables are created automatically according to the primary key of the active table schema in the Corpus Storage System (CoStoSys) configuration file that is also given as a parameter. The jcore-xmi-db-reader is capable of reading this kind of distributed annotation graph and reassemble a valid XMI document which then cas be deserialized into a CAS. This consumer is UIMA DUCC compatible. It requires the collection reader to forward the work item CAS to the consumer. This is required so the consumer knows that a work item has been finished and that all cached data - in this case the XMI data - should be flushed. This is important! Without the forwarding of the work item CAS, the last batch of cached XMI data will not be written into the database. This component is part of the Jena Document Information System, JeDIS.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-xmi-reader/component.meta b/jcore-xmi-reader/component.meta
index 701192b4c..347606dc4 100644
--- a/jcore-xmi-reader/component.meta
+++ b/jcore-xmi-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe XMI Collection Reader"
 }
diff --git a/jcore-xmi-reader/pom.xml b/jcore-xmi-reader/pom.xml
index ea0dcd482..9e3df5b4c 100644
--- a/jcore-xmi-reader/pom.xml
+++ b/jcore-xmi-reader/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
 
diff --git a/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml b/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml
index a7701f7e3..c6c747371 100644
--- a/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml
+++ b/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml
@@ -6,7 +6,7 @@
     <name>XmiCollectionReader</name>
     <description>A CollectionReader which reads CAS data stored as XMI files from the file system. The reader grounds on IBM's XmiCollectionReader delivered with older versions of UIMA and has been extended by the Julie Lab team at the University of Jena.
 This XMI reader is capable of reading (g)zipped XMI files and is able to recursively search subdirectories of a delivered root directory for XMI files.</description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xmi-writer/component.meta b/jcore-xmi-writer/component.meta
index 48695ccb1..ef645b6dd 100644
--- a/jcore-xmi-writer/component.meta
+++ b/jcore-xmi-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-writer",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe XMI Writer"
 }
diff --git a/jcore-xmi-writer/pom.xml b/jcore-xmi-writer/pom.xml
index 950de517b..65dd58b07 100644
--- a/jcore-xmi-writer/pom.xml
+++ b/jcore-xmi-writer/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml b/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml
index cfd5692d9..aaeb7196c 100644
--- a/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml
+++ b/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>XMIWriter</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml b/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml
index 1453038df..c11ac0001 100644
--- a/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml
+++ b/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>XMIWriter</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-db-reader/component.meta b/jcore-xml-db-reader/component.meta
index 6fde40ce7..0d7fce2f6 100644
--- a/jcore-xml-db-reader/component.meta
+++ b/jcore-xml-db-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xml-db-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe XML Database Reader"
 }
diff --git a/jcore-xml-db-reader/pom.xml b/jcore-xml-db-reader/pom.xml
index 72a3652f7..145fcc69a 100644
--- a/jcore-xml-db-reader/pom.xml
+++ b/jcore-xml-db-reader/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jedis-parent</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
 
@@ -23,7 +23,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-reader</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
@@ -51,7 +51,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-mapper</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml b/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml
index 6b562101f..a7d8fe03c 100644
--- a/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml
+++ b/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml
@@ -7,7 +7,7 @@
         <description>A collection reader that receives XML document data from a PostgreSQL database. It employs the
         jcore-xml-mapper to populate UIMA CAS instances with the XML data according to a mapping file. For the same
         functionality without using a database, refer to the jcore-xml-reader.</description>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-xml-mapper/pom.xml b/jcore-xml-mapper/pom.xml
index dab5025f2..ded5b9a9f 100644
--- a/jcore-xml-mapper/pom.xml
+++ b/jcore-xml-mapper/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml
index b1878a690..a1bebd5a0 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml
index 31c6e8683..0ce228185 100755
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml
index f103e0d5a..b501db9fa 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml
index bf791c1c9..af51a64c1 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
index b0350909a..fdc051f37 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-reader/component.meta b/jcore-xml-reader/component.meta
index dec59c048..97de60fef 100644
--- a/jcore-xml-reader/component.meta
+++ b/jcore-xml-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xml-reader",
         "groupId": "de.julielab",
-        "version": "2.5.1-SNAPSHOT"
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe XML Reader"
 }
diff --git a/jcore-xml-reader/pom.xml b/jcore-xml-reader/pom.xml
index 1deddb382..1719c5c73 100644
--- a/jcore-xml-reader/pom.xml
+++ b/jcore-xml-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <artifactId>jcore-xml-reader</artifactId>
     <name>JCoRe XML Reader</name>
@@ -14,7 +14,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-mapper</artifactId>
-            <version>2.5.1-SNAPSHOT</version>
+            <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml b/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml
index 34d04d1c7..f13e7b82c 100644
--- a/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml
+++ b/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml
@@ -8,7 +8,7 @@
       This reader is to be used with the JCoRe XML CAS Multiplier. The reader merely distributes the files to
       be read. The actual parsing is done by the multiplier.
     </description>
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml b/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml
index cd9a3ac70..f1aaab0c6 100644
--- a/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml
+++ b/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>MedlineReaderDescriptor_missingInputDir</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml b/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml
index d8ad0005b..964ccdf74 100644
--- a/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml
+++ b/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>PubmedXMLMultiplierDescriptor</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters searchStrategy="language_fallback">
       <configurationParameter>
diff --git a/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml b/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml
index 90a50848b..5d7c405fb 100644
--- a/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml
+++ b/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>MedlineReaderDescriptor_missingInputDir</name>
     <description />
-    <version>2.5.1-SNAPSHOT</version>
+    <version>2.6.0-SNAPSHOT</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 4d1302786..f56a81be0 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.5.1-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
     <packaging>pom</packaging>
     <modelVersion>4.0.0</modelVersion>
diff --git a/pom.xml b/pom.xml
index 274a990dd..7f4011b1e 100644
--- a/pom.xml
+++ b/pom.xml
@@ -21,7 +21,7 @@
       
   <description>The POM for the JCoRe Base projects.</description>
       
-  <version>2.5.1-SNAPSHOT</version>
+  <version>2.6.0-SNAPSHOT</version>
       
   <organization>
             

From 5eec4cbe26f9d67116980b8bcc6eb0b64e841546 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 9 Jun 2020 10:29:23 +0200
Subject: [PATCH 005/269] All Neo4jRelationsConsumer tests running.

---
 .../Neo4jRelationsConsumer.java               | 13 ++-
 ...Neo4jRelationsConsumerIntegrationTest.java | 88 +++++++++++++++++--
 .../Neo4jRelationsConsumerTest.java           |  4 +-
 3 files changed, 96 insertions(+), 9 deletions(-)

diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 9b9a6dddc..a3653b18a 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -39,6 +39,8 @@
 import java.util.*;
 import java.util.stream.StreamSupport;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 @ResourceMetaData(name = "JCoRe Neo4j Relations Consumer", description = "This component assumes that a Neo4j server with an installed julieliab-neo4j-plugins-concepts plugin installed. It then sends FlattenedRelation instances with more then one arguments to Neo4j. Note that this requires the event arguments to have a ResourceEntry list to obtain database concept IDs from.", vendor = "JULIE Lab, Germany", copyright = "JULIE Lab", version = "2.6.0-SNAPSHOT")
 @TypeCapability(inputs = {"de.julielab.jcore.types.EventMention"})
 public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
@@ -69,6 +71,7 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
         om = new ObjectMapper();
         om.setSerializationInclusion(JsonInclude.Include.NON_NULL);
         om.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
+        initImportRelations();
     }
 
     private void initImportRelations() {
@@ -118,6 +121,7 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
         try {
             URL url = URI.create(this.url).toURL();
             HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
+            urlConnection.addRequestProperty("Content-Type", "application/json");
             urlConnection.setRequestMethod(HttpMethod.POST);
             urlConnection.setDoOutput(true);
             try (OutputStream outputStream = urlConnection.getOutputStream()) {
@@ -138,7 +142,14 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
                 g.close();
             }
             try (InputStream inputStream = urlConnection.getInputStream()) {
-                log.debug("Response from Neo4j: {}", IOUtils.toString(inputStream));
+                log.debug("Response from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+            } catch (IOException e) {
+                log.error("Exception occurred while sending relation data to Neo4j server.");
+                try (InputStream inputStream = urlConnection.getErrorStream()) {
+                    if (inputStream != null)
+                        log.error("Error from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+                }
+                throw e;
             }
             importIERelations.clear();
         } catch (IOException e) {
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
index 6c853ecdd..e176fba41 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
@@ -1,31 +1,107 @@
 
 package de.julielab.jcore.consumer.neo4jrelations;
 
+import de.julielab.jcore.types.pubmed.Header;
 import de.julielab.neo4j.plugins.Indexes;
+import de.julielab.neo4j.plugins.concepts.ConceptLookup;
 import de.julielab.neo4j.plugins.concepts.ConceptManager;
-import org.apache.uima.UIMAException;
+import de.julielab.neo4j.plugins.datarepresentation.*;
+import de.julielab.neo4j.plugins.datarepresentation.constants.FacetConstants;
+import de.julielab.neo4j.plugins.datarepresentation.util.ConceptsJsonSerializer;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Rule;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
 import org.junit.Test;
+import org.neo4j.graphdb.GraphDatabaseService;
+import org.neo4j.graphdb.Node;
+import org.neo4j.graphdb.RelationshipType;
+import org.neo4j.graphdb.Transaction;
 import org.neo4j.harness.junit.rule.Neo4jRule;
+import org.neo4j.test.server.HTTP;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import java.util.List;
+import java.util.stream.Stream;
+
+import static de.julielab.jcore.consumer.neo4jrelations.Neo4jRelationsConsumerTest.addFlattenedRelation1ToCas;
+import static de.julielab.jcore.consumer.neo4jrelations.Neo4jRelationsConsumerTest.addFlattenedRelation2ToCas;
+import static de.julielab.neo4j.plugins.constants.semedico.SemanticRelationConstants.PROP_DOC_IDS;
+import static de.julielab.neo4j.plugins.datarepresentation.constants.ConceptConstants.PROP_SRC_IDS;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME;
 
 /**
  * Unit tests for jcore-neo4j-relations-consumer.
  *
  */
 public class Neo4jRelationsConsumerIntegrationTest {
-    @Rule
-    public Neo4jRule neo4j = new Neo4jRule()
+    private final static Logger log = LoggerFactory.getLogger(Neo4jRelationsConsumerIntegrationTest.class);
+    @ClassRule
+    public static Neo4jRule neo4j = new Neo4jRule()
             .withUnmanagedExtension("/concepts", ConceptManager.class).withFixture(graphDatabaseService -> {
                 new Indexes(null).createIndexes(graphDatabaseService);
                 return null;
             });
 
+    @BeforeClass
+    public static void beforeClass() throws Exception {
+        ImportFacet facet = new ImportFacet(new ImportFacetGroup("FG"), "myfacet", "myfacet", "myfacet", FacetConstants.SRC_TYPE_HIERARCHICAL);
+        ImportConcept c11 = new ImportConcept("concept11", new ConceptCoordinates("id11", "source11", CoordinateType.SRC));
+        ImportConcept c12 = new ImportConcept("concept12", new ConceptCoordinates("id12", "source12", CoordinateType.SRC));
+        ImportConcept c13 = new ImportConcept("concept13", new ConceptCoordinates("id13", "source13", CoordinateType.SRC));
+        ImportConcept c21 = new ImportConcept("concept21", new ConceptCoordinates("id21", "source21", CoordinateType.SRC));
+        ImportConcept c22 = new ImportConcept("concept22", new ConceptCoordinates("id22", "source22", CoordinateType.SRC));
+        ImportConcepts importConcepts = new ImportConcepts(Stream.of(c11, c12, c13, c21, c22), facet);
+        String uri = neo4j.httpURI().resolve("concepts/" + ConceptManager.CM_REST_ENDPOINT+"/"+ConceptManager.INSERT_CONCEPTS).toString();
+        log.debug("Sending concepts to {}", uri);
+        HTTP.Response response = HTTP.POST(uri, ConceptsJsonSerializer.toJsonTree(importConcepts));
+        log.debug("Response to test concepts import: {}", response);
+        assertEquals(200, response.status());
+    }
+
     @Test
-    public void insertEventMentions() throws UIMAException {
-        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+    public void insertEventMentions() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        Header h = new Header(jCas);
+        h.setDocId("testdoc");
+        h.addToIndexes();
+        addFlattenedRelation1ToCas(jCas);
+        // Here is a duplicate. It should be recognized and just be counted up
+        addFlattenedRelation2ToCas(jCas);
+        addFlattenedRelation2ToCas(jCas);
+
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(
+                "de.julielab.jcore.consumer.neo4jrelations.desc.jcore-neo4j-relations-consumer",
+                Neo4jRelationsConsumer.PARAM_URL, neo4j.httpURI().resolve("concepts/" + ConceptManager.CM_REST_ENDPOINT+"/"+ConceptManager.INSERT_IE_RELATIONS).toString(),
+                Neo4jRelationsConsumer.PARAM_ID_PROPERTY, "sourceIds");
+
+        engine.process(jCas);
+        engine.collectionProcessComplete();
+
+        GraphDatabaseService graphDb = neo4j.databaseManagementService().database(DEFAULT_DATABASE_NAME);
+        try (Transaction tx = graphDb.beginTx()) {
+            Node id11 = ConceptLookup.lookupSingleConceptBySourceId(tx, "id11");
+            // There should be connections to 12 and 13.
+            assertThat(id11.getRelationships(RelationshipType.withName("regulation"))).hasSize(2);
+            assertThat(id11.getRelationships(RelationshipType.withName("regulation"))).flatExtracting(r -> List.of((String[]) r.getProperty(PROP_DOC_IDS))).containsExactly("testdoc", "testdoc");
+            assertThat(id11.getRelationships(RelationshipType.withName("regulation"))).extracting(r -> r.getOtherNode(id11).getProperty(PROP_SRC_IDS+0)).containsExactlyInAnyOrder("id12", "id13");
+
+            Node id13 = ConceptLookup.lookupSingleConceptBySourceId(tx, "id13");
+            // There should be connections to 11 and 12.
+            assertThat(id13.getRelationships(RelationshipType.withName("regulation"))).hasSize(2);
+            assertThat(id13.getRelationships(RelationshipType.withName("regulation"))).flatExtracting(r -> List.of((String[]) r.getProperty(PROP_DOC_IDS))).containsExactly("testdoc", "testdoc");
+            assertThat(id13.getRelationships(RelationshipType.withName("regulation"))).extracting(r -> r.getOtherNode(id13).getProperty(PROP_SRC_IDS+0)).containsExactlyInAnyOrder("id11", "id12");
 
+            Node id22 = ConceptLookup.lookupSingleConceptBySourceId(tx, "id22");
+            // There should be connections to 21
+            assertThat(id22.getRelationships(RelationshipType.withName("regulation"))).hasSize(1);
+            assertThat(id22.getRelationships(RelationshipType.withName("regulation"))).flatExtracting(r -> List.of((String[]) r.getProperty(PROP_DOC_IDS))).containsExactly("testdoc");
+            assertThat(id22.getRelationships(RelationshipType.withName("regulation"))).extracting(r -> r.getOtherNode(id22).getProperty(PROP_SRC_IDS+0)).containsExactlyInAnyOrder("id21");
+        }
     }
 }
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
index f40f929b2..8cf7bc2e5 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
@@ -73,7 +73,7 @@ public void insertEventMentionsGlobalSource() throws Exception {
      * Adds a FlattenedRelation with three arguments.
      * @param jCas The CAS.
      */
-    private void addFlattenedRelation1ToCas(JCas jCas) {
+    public static void addFlattenedRelation1ToCas(JCas jCas) {
         FlattenedRelation fr = new FlattenedRelation(jCas);
         EventMention rootEm = new EventMention(jCas);
         rootEm.setSpecificType("regulation");
@@ -111,7 +111,7 @@ private void addFlattenedRelation1ToCas(JCas jCas) {
      * Adds a FlattenedRelation with two arguments.
      * @param jCas The CAS.
      */
-    private void addFlattenedRelation2ToCas(JCas jCas) {
+    public static void addFlattenedRelation2ToCas(JCas jCas) {
         FlattenedRelation fr = new FlattenedRelation(jCas);
         EventMention rootEm = new EventMention(jCas);
         rootEm.setSpecificType("regulation");

From e233713340150a84f1f5a3dfbdaa40a1b3208844 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 9 Jun 2020 10:43:49 +0200
Subject: [PATCH 006/269] Adding the component.meta file for the Neo4j
 Relations Consumer.

---
 jcore-neo4j-relations-consumer/component.meta | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 jcore-neo4j-relations-consumer/component.meta

diff --git a/jcore-neo4j-relations-consumer/component.meta b/jcore-neo4j-relations-consumer/component.meta
new file mode 100644
index 000000000..e988fe410
--- /dev/null
+++ b/jcore-neo4j-relations-consumer/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "consumer"
+    ],
+    "description": "Writes EventMentions to Neo4j.",
+    "descriptors": [
+        {
+            "category": "consumer",
+            "location": "de.julielab.jcore.consumer.neo4jrelations.desc.jcore-neo4j-relations-consumer"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-neo4j-relations-consumer",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe Neo4j Relations Consumer"
+}

From febb256a2090fcf0f17ed2f19b568e6299e8959e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 9 Jun 2020 10:58:59 +0200
Subject: [PATCH 007/269] Fixed test bugs where the document ID was not set to
 the CAS.

---
 .../neo4jrelations/Neo4jRelationsConsumerTest.java         | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
index 8cf7bc2e5..28ba51f74 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
@@ -6,6 +6,7 @@
 import de.julielab.jcore.types.EventMention;
 import de.julielab.jcore.types.ResourceEntry;
 import de.julielab.jcore.types.ext.FlattenedRelation;
+import de.julielab.jcore.types.pubmed.Header;
 import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelation;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationArgument;
@@ -30,6 +31,9 @@ public class Neo4jRelationsConsumerTest {
     @Test
     public void insertEventMentions() throws Exception {
         JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        Header h = new Header(jCas);
+        h.setDocId("testdoc");
+        h.addToIndexes();
         Neo4jRelationsConsumer engine = new Neo4jRelationsConsumer();
         engine.initialize(UimaContextFactory.createUimaContext(Neo4jRelationsConsumer.PARAM_URL, "", Neo4jRelationsConsumer.PARAM_ID_PROPERTY, "sourceIds"));
         addFlattenedRelation1ToCas(jCas);
@@ -53,6 +57,9 @@ public void insertEventMentions() throws Exception {
     @Test
     public void insertEventMentionsGlobalSource() throws Exception {
         JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        Header h = new Header(jCas);
+        h.setDocId("testdoc");
+        h.addToIndexes();
         Neo4jRelationsConsumer engine = new Neo4jRelationsConsumer();
         engine.initialize(UimaContextFactory.createUimaContext(Neo4jRelationsConsumer.PARAM_URL, "", Neo4jRelationsConsumer.PARAM_ID_PROPERTY, "sourceIds", Neo4jRelationsConsumer.PARAM_SOURCE, "globalSource"));
         addFlattenedRelation1ToCas(jCas);

From 5c4d0a021f81b55a4e935a356ac17330be7f3e6d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 10 Jun 2020 13:57:17 +0200
Subject: [PATCH 008/269] Bug fixes

---
 .../checkpoint/DocumentReleaseCheckpoint.java | 13 +++++--
 jcore-neo4j-relations-consumer/pom.xml        |  5 +++
 .../Neo4jRelationsConsumer.java               | 38 ++++++++++++++++--
 .../desc/jcore-neo4j-relations-consumer.xml   | 39 ++++++++-----------
 ...Neo4jRelationsConsumerIntegrationTest.java |  2 +-
 .../jcore/consumer/xmi/XMIDBWriter.java       |  1 -
 6 files changed, 66 insertions(+), 32 deletions(-)

diff --git a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
index e67750ed5..cb94a8aa3 100644
--- a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
+++ b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
@@ -14,7 +14,7 @@
  * <p>This is class is a synchronization point for JeDIS components to report documents as being completely finished
  * with processing.</p>
  * <p>Problem explanation: This synchronization is necessary because most database operating components work in batch mode for
- * performance reasons. However, if multiple components use batching with might be out of sync due to different
+ * performance reasons. However, if multiple components use batching wich might be out of sync due to different
  * batch sizes and possibly other factors, one component may have sent a batch of document data to the database
  * while other components have not at a particular point in time. If at such a time point the pipeline crashes
  * or is manually interrupted, the actually written data is incoherent in the sense that some components have sent
@@ -41,6 +41,7 @@ public class DocumentReleaseCheckpoint {
     private static DocumentReleaseCheckpoint checkpoint;
     private Multiset<DocumentId> releasedDocuments;
     private Set<String> registeredComponents;
+    private long lastwarning = 1000;
 
     private DocumentReleaseCheckpoint() {
         releasedDocuments = HashMultiset.create();
@@ -99,13 +100,17 @@ public Set<DocumentId> getReleasedDocumentIds() {
         // Get all documents released by all components
         Set<DocumentId> returnedIds;
         synchronized (releasedDocuments) {
-            returnedIds = this.releasedDocuments.entrySet().stream().filter(e -> e.getCount() == getNumberOfRegisteredComponents()).map(Multiset.Entry::getElement).collect(Collectors.toSet());
+            returnedIds = this.releasedDocuments.elementSet().stream().filter(e -> this.releasedDocuments.count(e) == getNumberOfRegisteredComponents()).collect(Collectors.toSet());
             // Remove the completely released documents from the pool of potentially not yet completely released documents.
             returnedIds.forEach(id -> this.releasedDocuments.remove(id, Integer.MAX_VALUE));
         }
         log.debug("Returning {} documents released by all registered components. {} document IDs remain that have not yet been released by all registered components.", returnedIds.size(), this.releasedDocuments.size());
-        if (this.releasedDocuments.size() > 1000)
-            log.warn("The number of document IDs that have not been released by all registered components has grown to {}. If it does not increase again, there is likely an errorneous component which does not release its documents.", releasedDocuments.size());
+        if (this.releasedDocuments.size() > lastwarning) {
+            log.warn("The number of document IDs that have not been released by all registered components has grown to {}. If it does not decrease again, there is likely an errorneous component which does not release its documents. Currently registered components: {}", releasedDocuments.size(), registeredComponents);
+            lastwarning *= 2;
+        } else if (this.releasedDocuments.size() < 50) {
+            lastwarning = 1000;
+        }
         return returnedIds;
     }
 
diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index b7a2bf83c..6b0d0060c 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -39,6 +39,11 @@
             <artifactId>jcore-utilities</artifactId>
             <version>${jcore-utilities-version}</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-db-checkpoint-ae</artifactId>
+            <version>2.6.0-SNAPSHOT</version>
+        </dependency>
         <dependency>
             <groupId>org.neo4j.test</groupId>
             <artifactId>neo4j-harness</artifactId>
diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index a3653b18a..2ad273897 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -6,9 +6,12 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Multiset;
+import de.julielab.jcore.ae.checkpoint.DocumentId;
+import de.julielab.jcore.ae.checkpoint.DocumentReleaseCheckpoint;
 import de.julielab.jcore.types.ArgumentMention;
 import de.julielab.jcore.types.ConceptMention;
 import de.julielab.jcore.types.ResourceEntry;
+import de.julielab.jcore.types.ext.DBProcessingMetaData;
 import de.julielab.jcore.types.ext.FlattenedRelation;
 import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelation;
@@ -16,6 +19,7 @@
 import de.julielab.neo4j.plugins.datarepresentation.ImportIERelationDocument;
 import de.julielab.neo4j.plugins.datarepresentation.ImportIETypedRelations;
 import de.julielab.neo4j.plugins.datarepresentation.constants.ImportIERelations;
+import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.io.IOUtils;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
@@ -23,6 +27,7 @@
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.resource.ResourceInitializationException;
@@ -48,6 +53,8 @@ public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
     public static final String PARAM_URL = "URL";
     public static final String PARAM_ID_PROPERTY = "IdProperty";
     public static final String PARAM_SOURCE = "ConceptSource";
+    public static final String PARAM_NEO4J_USER = "Neo4jUser";
+    public static final String PARAM_NEO4J_PASSWORD = "Neo4jPassword";
     private final static Logger log = LoggerFactory.getLogger(Neo4jRelationsConsumer.class);
     @ConfigurationParameter(name = PARAM_URL, description = "The complete URL to the endpoint of the Neo4j server for relation insertion.")
     private String url;
@@ -55,10 +62,16 @@ public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
     private String idProperty;
     @ConfigurationParameter(name = PARAM_SOURCE, mandatory = false, description = "Optional. Sets the global source for the concept IDs taken from the ResourceEntry instances of the relation arguments. This causes the 'source' feature of the ResourceEntry objects to be omitted and to globally use the specified source instead. This causes the Neo4j database plugin to resolve the provided argument IDs against the source specified here.")
     private String globalSource;
+    @ConfigurationParameter(name = PARAM_NEO4J_USER, mandatory = false, description = "Optional. The Neo4j server user name.")
+    private String neo4jUser;
+    @ConfigurationParameter(name = PARAM_NEO4J_PASSWORD, mandatory = false, description = "Optional. The Neo4j server password.")
+    private String neo4jPassword;
 
     private ImportIERelations importIERelations;
     private ObjectMapper om;
 
+    private Set<DocumentId> documentIds;
+
     /**
      * This method is called a single time by the framework at component
      * creation. Here, descriptor parameters are read and initial setup is done.
@@ -68,10 +81,14 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
         url = (String) aContext.getConfigParameterValue(PARAM_URL);
         idProperty = (String) aContext.getConfigParameterValue(PARAM_ID_PROPERTY);
         globalSource = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_SOURCE)).orElse(null);
+        neo4jUser = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_USER)).orElse(null);
+        neo4jPassword = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_PASSWORD)).orElse(null);
         om = new ObjectMapper();
         om.setSerializationInclusion(JsonInclude.Include.NON_NULL);
         om.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
         initImportRelations();
+        DocumentReleaseCheckpoint.get().register(Neo4jRelationsConsumer.class.getCanonicalName());
+        documentIds = new HashSet<>();
     }
 
     private void initImportRelations() {
@@ -84,14 +101,20 @@ private void initImportRelations() {
      */
     @Override
     public void process(final JCas aJCas) {
-        importIERelations.addRelationDocument(convertRelations(aJCas));
+        ImportIERelationDocument document = convertRelations(aJCas);
+        if (!document.getRelations().isEmpty())
+            importIERelations.addRelationDocument(document);
+
+        Optional<DBProcessingMetaData> metaOpt = JCasUtil.select(aJCas, DBProcessingMetaData.class).stream().findAny();
+        documentIds.add(metaOpt.isPresent() ? new DocumentId(metaOpt.get()) : new DocumentId(JCoReTools.getDocId(aJCas)));
     }
 
     private ImportIERelationDocument convertRelations(JCas aJCas) {
         Map<String, Multiset<UnificationRelation>> relationCounts = getEquivalentRelationGroups(aJCas);
         ImportIERelationDocument relDoc = new ImportIERelationDocument();
         relDoc.setDb(false);
-        relDoc.setName(JCoReTools.getDocId(aJCas));
+        String docId = JCoReTools.getDocId(aJCas);
+        relDoc.setName(docId);
         ImportIETypedRelations typedRelations = new ImportIETypedRelations();
         for (String relationType : relationCounts.keySet()) {
             Multiset<UnificationRelation> unificationRelations = relationCounts.get(relationType);
@@ -115,6 +138,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
         sendRelationsToNeo4j();
+        DocumentReleaseCheckpoint.get().unregister(Neo4jRelationsConsumer.class.getCanonicalName());
     }
 
     private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
@@ -122,6 +146,11 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
             URL url = URI.create(this.url).toURL();
             HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
             urlConnection.addRequestProperty("Content-Type", "application/json");
+            String authorizationToken = neo4jUser != null && neo4jPassword != null
+                    ? "Basic " + Base64.encodeBase64URLSafeString((neo4jUser + ":" + neo4jPassword).getBytes())
+                    : null;
+            if (authorizationToken != null)
+                urlConnection.setRequestProperty("Authorization", authorizationToken);
             urlConnection.setRequestMethod(HttpMethod.POST);
             urlConnection.setDoOutput(true);
             try (OutputStream outputStream = urlConnection.getOutputStream()) {
@@ -152,8 +181,11 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
                 throw e;
             }
             importIERelations.clear();
+            log.debug("Releasing {} document IDs that have successfully been sent to Neo4j", documentIds.size());
+            DocumentReleaseCheckpoint.get().release(Neo4jRelationsConsumer.class.getCanonicalName(), documentIds.stream());
+            documentIds.clear();
         } catch (IOException e) {
-            log.error("Could not send relations to Neo4j", e);
+            log.error("Could not send relations to Neo4j endpoint {}", url, e);
             throw new AnalysisEngineProcessException(e);
         }
     }
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
index 1119cc5ef..1ce50f4d9 100644
--- a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
+++ b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
@@ -31,34 +31,27 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>Neo4jUser</name>
+                <description>Optional. The Neo4j server user name.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>Neo4jPassword</name>
+                <description>Optional. The Neo4j server password.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-basic-types"/>
-                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
                 <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
             </imports>
         </typeSystemDescription>
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
index e176fba41..9a1fc1905 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
@@ -66,7 +66,7 @@ public static void beforeClass() throws Exception {
 
     @Test
     public void insertEventMentions() throws Exception {
-        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
         Header h = new Header(jCas);
         h.setDocId("testdoc");
         h.addToIndexes();
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index f639e58ae..380c0b232 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -122,7 +122,6 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     private static Map<String, Map<String, Boolean>> binaryMappedFeatures = Collections.emptyMap();
     private static Map<String, Map<DocumentId, XmiBufferItem>> splitterResultMap;
     private static Map<String, Map<String, Pair<List<XmiBufferItem>, CountDownLatch>>> xmiBufferItemsToProcess;
-    private static ReentrantLock missingMappingsGatheringLock;
     private static CountDownLatch missingMappingsGatheringLatch = new CountDownLatch(0);
     private static ReentrantLock mappingUpdateLock;
     private DataBaseConnector dbc;

From c73befcef48271d83b3a4b23a67f33367607c094 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 12 Jun 2020 12:11:01 +0200
Subject: [PATCH 009/269] Letting the Neo4j consumer manually send documents to
 the database according to a new batch size parameter.

---
 .../db/desc/jcore-db-multiplier-reader.xml       |  2 +-
 .../neo4jrelations/Neo4jRelationsConsumer.java   | 16 +++++++++++++++-
 .../desc/jcore-neo4j-relations-consumer.xml      | 16 +++++++++++++++-
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml b/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
index 9637ab27d..593b1ef99 100644
--- a/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
+++ b/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
@@ -162,7 +162,7 @@
             <nameValuePair>
                 <name>BatchSize</name>
                 <value>
-                    <integer>50</integer>
+                    <integer>100</integer>
                 </value>
             </nameValuePair>
             <nameValuePair>
diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 2ad273897..7fbd7b8ac 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -55,6 +55,7 @@ public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
     public static final String PARAM_SOURCE = "ConceptSource";
     public static final String PARAM_NEO4J_USER = "Neo4jUser";
     public static final String PARAM_NEO4J_PASSWORD = "Neo4jPassword";
+    public static final String PARAM_WRITE_BATCH_SIZE = "WriteBatchSize";
     private final static Logger log = LoggerFactory.getLogger(Neo4jRelationsConsumer.class);
     @ConfigurationParameter(name = PARAM_URL, description = "The complete URL to the endpoint of the Neo4j server for relation insertion.")
     private String url;
@@ -66,12 +67,17 @@ public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
     private String neo4jUser;
     @ConfigurationParameter(name = PARAM_NEO4J_PASSWORD, mandatory = false, description = "Optional. The Neo4j server password.")
     private String neo4jPassword;
+    @ConfigurationParameter(name = PARAM_WRITE_BATCH_SIZE, mandatory = false, defaultValue = "50", description =
+            "The number of processed CASes after which the relation data should be flushed into the database. Defaults to 50.")
+    private int writeBatchSize;
 
     private ImportIERelations importIERelations;
     private ObjectMapper om;
 
     private Set<DocumentId> documentIds;
 
+    private long docNum;
+
     /**
      * This method is called a single time by the framework at component
      * creation. Here, descriptor parameters are read and initial setup is done.
@@ -83,12 +89,14 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
         globalSource = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_SOURCE)).orElse(null);
         neo4jUser = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_USER)).orElse(null);
         neo4jPassword = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_PASSWORD)).orElse(null);
+        writeBatchSize = Optional.ofNullable((Integer) aContext.getConfigParameterValue(PARAM_WRITE_BATCH_SIZE)).orElse(50);
         om = new ObjectMapper();
         om.setSerializationInclusion(JsonInclude.Include.NON_NULL);
         om.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
         initImportRelations();
         DocumentReleaseCheckpoint.get().register(Neo4jRelationsConsumer.class.getCanonicalName());
         documentIds = new HashSet<>();
+        docNum = 0;
     }
 
     private void initImportRelations() {
@@ -100,13 +108,18 @@ private void initImportRelations() {
      * is where the actual work happens.
      */
     @Override
-    public void process(final JCas aJCas) {
+    public void process(final JCas aJCas) throws AnalysisEngineProcessException {
         ImportIERelationDocument document = convertRelations(aJCas);
         if (!document.getRelations().isEmpty())
             importIERelations.addRelationDocument(document);
 
         Optional<DBProcessingMetaData> metaOpt = JCasUtil.select(aJCas, DBProcessingMetaData.class).stream().findAny();
         documentIds.add(metaOpt.isPresent() ? new DocumentId(metaOpt.get()) : new DocumentId(JCoReTools.getDocId(aJCas)));
+
+        if (documentIds.size() % writeBatchSize == 0) {
+            log.trace("Document nr {} processed, sending batch nr {} of size {} to database.", docNum, docNum / writeBatchSize, writeBatchSize);
+            batchProcessComplete();
+        }
     }
 
     private ImportIERelationDocument convertRelations(JCas aJCas) {
@@ -137,6 +150,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
     @Override
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
+        log.info("Collection processing finished.");
         sendRelationsToNeo4j();
         DocumentReleaseCheckpoint.get().unregister(Neo4jRelationsConsumer.class.getCanonicalName());
     }
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
index 1ce50f4d9..bebaa2135 100644
--- a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
+++ b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
@@ -45,8 +45,22 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>WriteBatchSize</name>
+                <description>The number of processed CASes after which the relation data should be flushed into the database. Defaults to 50.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
-        <configurationParameterSettings/>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>WriteBatchSize</name>
+                <value>
+                    <integer>50</integer>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>

From f33bc2ceb565920c155d9fa298608e9faaf7c179 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 12 Jun 2020 12:13:19 +0200
Subject: [PATCH 010/269] Adding a log message.

---
 .../jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 7fbd7b8ac..6734d7ea2 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -177,6 +177,7 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
                 List<ImportIERelationDocument> documents = importIERelations.getDocuments();
                 g.writeFieldName(ImportIERelations.NAME_DOCUMENTS);
                 g.writeStartArray();
+                log.debug("Sending {} relation documents to Neo4j.", documents.size());
                 for (ImportIERelationDocument document : (Iterable<ImportIERelationDocument>) documents::iterator) {
                     g.writeObject(document);
                 }

From 12154b8acd95d991787f791017bdf6d88785d58d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 22 Jun 2020 10:46:17 +0200
Subject: [PATCH 011/269] The file reader can now search in arbitrarily deep
 subdirectory trees.

---
 .../jcore/reader/file/main/FileReader.java    | 29 ++++++-------------
 .../Neo4jRelationsConsumer.java               |  4 ++-
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
index 925c91e5e..dee16f1d7 100644
--- a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
+++ b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
@@ -35,6 +35,9 @@
 import org.apache.uima.util.ProgressImpl;
 
 import java.io.*;
+import java.nio.file.FileVisitOption;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -404,26 +407,12 @@ public Progress[] getProgress() {
         return new Progress[]{new ProgressImpl(fileIndex, files.size(), Progress.ENTITIES)};
     }
 
-    private String[] createFileListByType(File inputDirectory, final Set<String> allowedExtensions) throws IOException {
-        String[] path = new File(inputDirectory.getPath()).list();
-
-        for (int i = 0; i < path.length; i++) {
-            File file = new File(inputDirectory.getAbsolutePath() + "/" + path[i]);
-
-            if (!useSubDirs && file.isDirectory())
-                continue;
-
-            String CurrentExtension = path[i].substring(path[i].lastIndexOf('.') + 1);
-            if (allowedExtensions.isEmpty() || allowedExtensions.contains(CurrentExtension)) {
-                files.add(file);
-            }
-
-            if (useSubDirs && file.isDirectory()) {
-                createFileListByType(file, allowedExtensions);
-            }
-        }
-
-        return path;
+    private void createFileListByType(File inputDirectory, final Set<String> allowedExtensions) throws IOException {
+        Files.walk(inputDirectory.toPath(), useSubDirs ? Integer.MAX_VALUE : 1, FileVisitOption.FOLLOW_LINKS)
+                .filter(p -> { if (allowedExtensions.isEmpty()) return true; for (String ext : allowedExtensions) if (p.toString().endsWith(ext)) return true; return false;})
+                .map(Path::toFile)
+                .filter(File::isFile)
+                .forEach(files::add);
     }
 
     private String getFileName(File fi) {
diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 6734d7ea2..7ff69f9f8 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -177,7 +177,7 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
                 List<ImportIERelationDocument> documents = importIERelations.getDocuments();
                 g.writeFieldName(ImportIERelations.NAME_DOCUMENTS);
                 g.writeStartArray();
-                log.debug("Sending {} relation documents to Neo4j.", documents.size());
+                log.debug("Converting {} relation documents to JSON.", documents.size());
                 for (ImportIERelationDocument document : (Iterable<ImportIERelationDocument>) documents::iterator) {
                     g.writeObject(document);
                 }
@@ -212,6 +212,7 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
      * @return The grouped relations.
      */
     private Map<String, Multiset<UnificationRelation>> getEquivalentRelationGroups(JCas aJCas) {
+        // Maps relation types to the complete relations.
         Map<String, Multiset<UnificationRelation>> relationCounts = new HashMap<>();
         for (FlattenedRelation fr : aJCas.<FlattenedRelation>getAnnotationIndex(FlattenedRelation.type)) {
             Iterator<ConceptMention> cmIt = StreamSupport.stream(fr.getArguments().spliterator(), false)
@@ -220,6 +221,7 @@ private Map<String, Multiset<UnificationRelation>> getEquivalentRelationGroups(J
                     .map(ConceptMention.class::cast)
                     .iterator();
             Set<UnificationArgument> unificationArgs = new HashSet<>();
+            // Add all arguments to the relation object. So there could be 1, 2, 3 or even more arguments.
             while (cmIt.hasNext()) {
                 ConceptMention cm = cmIt.next();
                 FSArray resourceEntryList = cm.getResourceEntryList();

From ba7f5cdc55072a3c32f4615610434f23f7d28080 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 23 Jun 2020 13:29:44 +0200
Subject: [PATCH 012/269] FileWriter: Not appending .txt suffix if already
 present.

---
 .../de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java b/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java
index ca3d51d29..a86921ec4 100644
--- a/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java
+++ b/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java
@@ -237,7 +237,7 @@ private void writeSentences2File(String fileId, List<String> sentences) throws I
         OutputStream os = null;
         boolean zipContentWritten = false;
         try {
-            File outputFile = new File(directory.getCanonicalPath() + File.separator + fileId + ".txt" + (gzip ? ".gz" : ""));
+            File outputFile = new File(directory.getCanonicalPath() + File.separator + fileId + (fileId.endsWith(".txt") ? "" : ".txt") + (gzip ? ".gz" : ""));
             os = zip ? currentArchive : FileUtilities.getOutputStreamToFile(outputFile);
             if (zip) {
                 // Initialize the ZIP output stream if necessary

From b8a880f89d5fc3f866712493652e3540d176ffba Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 23 Jun 2020 14:15:18 +0200
Subject: [PATCH 013/269] Revoked the last change because this can influence
 document ID lookup.

---
 .../de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java b/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java
index a86921ec4..ca3d51d29 100644
--- a/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java
+++ b/jcore-txt-consumer/src/main/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumer.java
@@ -237,7 +237,7 @@ private void writeSentences2File(String fileId, List<String> sentences) throws I
         OutputStream os = null;
         boolean zipContentWritten = false;
         try {
-            File outputFile = new File(directory.getCanonicalPath() + File.separator + fileId + (fileId.endsWith(".txt") ? "" : ".txt") + (gzip ? ".gz" : ""));
+            File outputFile = new File(directory.getCanonicalPath() + File.separator + fileId + ".txt" + (gzip ? ".gz" : ""));
             os = zip ? currentArchive : FileUtilities.getOutputStreamToFile(outputFile);
             if (zip) {
                 // Initialize the ZIP output stream if necessary

From 18de1e7dd0c6478b84f8536f060600762fa39515 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 30 Jul 2020 13:49:04 +0200
Subject: [PATCH 014/269] The Cord19 reader now prefers the abstract from the
 meta data.

---
 .../jcore/reader/cord19/Cord19Reader.java     | 74 ++++++++++++-------
 .../cord19/Cord19MultiplierReaderTest.java    |  5 +-
 2 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java
index 54a9f1d5c..176d0f6dd 100644
--- a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java
+++ b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java
@@ -121,31 +121,42 @@ private void addBody(JCas jCas, StringBuilder doctext, Cord19Document document)
     }
 
     private void addAbstract(JCas jCas, StringBuilder doctext, Cord19Document document) {
-        List<AbstractSection> sections = new ArrayList<>(document.getAbstr().size());
-        int abstractBegin = doctext.length();
-        // Stores the end of the last paragraph before the newline
-        int lastEnd = 0;
-        for (Paragraph p : document.getAbstr()) {
-            int paragraphBegin = doctext.length();
-            AbstractSection as = new AbstractSection(jCas, paragraphBegin, doctext.length() + p.getText().length());
-            doctext.append(p.getText());
-            lastEnd = doctext.length();
-            doctext.append(linesep);
-            AbstractSectionHeading asHeading = new AbstractSectionHeading(jCas);
-            asHeading.setTitleType("abstract");
-            asHeading.setLabel(p.getSection());
-            as.setAbstractSectionHeading(asHeading);
-            sections.add(as);
-            addReferences(p, Paragraph::getRefSpans, paragraphBegin, jCas);
-            addReferences(p, Paragraph::getEqSpans, paragraphBegin, jCas);
-            addReferences(p, Paragraph::getCiteSpans, paragraphBegin, jCas);
-        }
-        if (lastEnd - abstractBegin > 0) {
-            AbstractText abstractText = new AbstractText(jCas, abstractBegin, lastEnd);
-            abstractText.setAbstractType("main");
-            abstractText.setStructuredAbstractParts(JCoReTools.addToFSArray(null, sections));
-            abstractText.addToIndexes();
-            doctext.append(linesep);
+        MetadataRecord metadataRecord = metadataIdMap.get(document.getPaperId());
+        if (metadataRecord != null) {
+            String abstractText = metadataRecord.getAbstractText();
+            if (abstractText != null && !abstractText.isBlank()) {
+                AbstractText abstractAnnotation = new AbstractText(jCas, doctext.length(),doctext.length() + abstractText.length());
+                abstractAnnotation.setAbstractType("main");
+                abstractAnnotation.addToIndexes();
+                doctext.append(abstractText);
+            }
+        } else {
+            List<AbstractSection> sections = new ArrayList<>(document.getAbstr().size());
+            int abstractBegin = doctext.length();
+            // Stores the end of the last paragraph before the newline
+            int lastEnd = 0;
+            for (Paragraph p : document.getAbstr()) {
+                int paragraphBegin = doctext.length();
+                AbstractSection as = new AbstractSection(jCas, paragraphBegin, doctext.length() + p.getText().length());
+                doctext.append(p.getText());
+                lastEnd = doctext.length();
+                doctext.append(linesep);
+                AbstractSectionHeading asHeading = new AbstractSectionHeading(jCas);
+                asHeading.setTitleType("abstract");
+                asHeading.setLabel(p.getSection());
+                as.setAbstractSectionHeading(asHeading);
+                sections.add(as);
+                addReferences(p, Paragraph::getRefSpans, paragraphBegin, jCas);
+                addReferences(p, Paragraph::getEqSpans, paragraphBegin, jCas);
+                addReferences(p, Paragraph::getCiteSpans, paragraphBegin, jCas);
+            }
+            if (lastEnd - abstractBegin > 0) {
+                AbstractText abstractText = new AbstractText(jCas, abstractBegin, lastEnd);
+                abstractText.setAbstractType("main");
+                abstractText.setStructuredAbstractParts(JCoReTools.addToFSArray(null, sections));
+                abstractText.addToIndexes();
+                doctext.append(linesep);
+            }
         }
     }
 
@@ -164,7 +175,7 @@ private void addReferences(Paragraph p, Function<Paragraph, Iterable<CiteSpan>>
     private void addTitle(JCas jCas, Cord19Document document, MetadataRecord metadataRecord, StringBuilder doctext) {
         if (metadataRecord != null) {
             String title = metadataRecord.getTitle();
-            if (title != null) {
+            if (title != null && !title.isBlank()) {
                 addTitle(jCas, title, doctext);
             }
         } else {
@@ -221,9 +232,10 @@ private void readMetaData(String metadataFile) {
                         String cordUid = record.get("cord_uid");
                         String sha = record.get("sha");
                         String title = record.get("title");
+                        String abstractText = record.get("abstract");
                         String pmcid = record.get("pmcid");
                         String pmid = record.get("pubmed_id");
-                        MetadataRecord metadataRecord = new MetadataRecord(cordUid, sha, pmcid, pmid, title);
+                        MetadataRecord metadataRecord = new MetadataRecord(cordUid, sha, pmcid, pmid, title, abstractText);
                         for (String hash : metadataRecord.hashes)
                             metadataIdMap.put(hash, metadataRecord);
                         if (pmcid != null)
@@ -244,13 +256,19 @@ private static class MetadataRecord {
         private final String pmid;
         private final String[] hashes;
         private final String title;
+        private String abstractText;
 
-        public MetadataRecord(String cordUid, String sha, String pmcid, String pmid, String title) {
+        public MetadataRecord(String cordUid, String sha, String pmcid, String pmid, String title, String abstractText) {
             this.cordUid = cordUid;
             this.pmcid = pmcid;
             this.pmid = pmid;
             this.title = title;
             this.hashes = Arrays.stream(sha.split(";")).map(String::trim).toArray(String[]::new);
+            this.abstractText = abstractText;
+        }
+
+        public String getAbstractText() {
+            return abstractText;
         }
 
         public String getCordUid() {
diff --git a/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java b/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java
index b5922a816..f7a8e8fcf 100644
--- a/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java
+++ b/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java
@@ -63,9 +63,8 @@ private void checkSecondDocument(JCas cas) {
         assertThat(documentTitles.get(0)).extracting(Annotation::getCoveredText).isEqualTo("Recombinant M protein-based ELISA test for detection of antibodies to canine coronavirus");
 
         AbstractText abstractText = JCasUtil.selectSingle(cas, AbstractText.class);
-        assertThat(abstractText.getCoveredText()).startsWith("The membrane (M) protein of canine");
-        assertThat(abstractText.getCoveredText()).endsWith("antibodies to CCoV in dog sera. #");
-        assertThat(abstractText.getStructuredAbstractParts()).hasSize(1);
+        assertThat(abstractText.getCoveredText()).startsWith("Abstract The membrane (M) protein of canine");
+        assertThat(abstractText.getCoveredText()).endsWith("antibodies to CCoV in dog sera.");
 
         Collection<Paragraph> paragraphs = JCasUtil.select(cas, Paragraph.class);
         assertThat(paragraphs).hasSize(19);

From cda27d66c54ac20918867af9fae3baa5ef12dabb Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 31 Jul 2020 07:43:33 +0200
Subject: [PATCH 015/269] Merging an if-condition for correct branching.

---
 .../julielab/jcore/reader/cord19/Cord19Reader.java   | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java
index 176d0f6dd..60939db2b 100644
--- a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java
+++ b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19Reader.java
@@ -122,14 +122,12 @@ private void addBody(JCas jCas, StringBuilder doctext, Cord19Document document)
 
     private void addAbstract(JCas jCas, StringBuilder doctext, Cord19Document document) {
         MetadataRecord metadataRecord = metadataIdMap.get(document.getPaperId());
-        if (metadataRecord != null) {
+        if (metadataRecord != null && metadataRecord.getAbstractText() != null && !metadataRecord.getAbstractText().isBlank()) {
             String abstractText = metadataRecord.getAbstractText();
-            if (abstractText != null && !abstractText.isBlank()) {
-                AbstractText abstractAnnotation = new AbstractText(jCas, doctext.length(),doctext.length() + abstractText.length());
-                abstractAnnotation.setAbstractType("main");
-                abstractAnnotation.addToIndexes();
-                doctext.append(abstractText);
-            }
+            AbstractText abstractAnnotation = new AbstractText(jCas, doctext.length(), doctext.length() + abstractText.length());
+            abstractAnnotation.setAbstractType("main");
+            abstractAnnotation.addToIndexes();
+            doctext.append(abstractText);
         } else {
             List<AbstractSection> sections = new ArrayList<>(document.getAbstr().size());
             int abstractBegin = doctext.length();

From 02ba4c09236d27e5192a5fe5254959895ee9b356 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sun, 2 Aug 2020 16:20:22 +0200
Subject: [PATCH 016/269] NPE issue with the Cord19MultiplierReader, adding an
 error message for debugging.

---
 .../jcore/reader/cord19/Cord19MultiplierReader.java         | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReader.java b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReader.java
index 5789d935b..3b8b9ff35 100644
--- a/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReader.java
+++ b/jcore-cord19-reader/src/main/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReader.java
@@ -73,7 +73,11 @@ public void getNext(JCas jCas) throws CollectionException {
                     Path p = currentFileBatch.get(currentBatchIndex);
                     if (p != Cord19FileVisitor.END) {
                         JCoReURI uri = new JCoReURI(jCas);
-                        uri.setUri(p.toUri().toString());
+                        try {
+                            uri.setUri(p.toUri().toString());
+                        } catch (NullPointerException e) {
+                            log.error("Could not retrieve URI string for path {}, resolved URI {}", p, p!= null ? p.toUri() : "<path is null>");
+                        }
                         uri.addToIndexes();
                         ++completed;
                     }

From ccd03baf3cd61b9805ce081d580204adaafe6f7c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 5 Aug 2020 10:49:12 +0200
Subject: [PATCH 017/269] Lingpipe Gaz: Fixed an issue where lowercasing could
 break the offset mapping.

Special characters would get torn apart leading to difficult to track behaviour in the string normalization process. The solution is to use the transliterator for lowercasing.
---
 jcore-lingpipegazetteer-ae/pom.xml            |  115 +-
 .../uima/GazetteerAnnotator.java              | 1424 ++---
 .../utils/StringNormalizerForChunking.java    |  405 +-
 .../StringNormalizerForChunkingTest.java      |   67 +-
 .../uima/GazetteerAnnotatorTest.java          |   36 +
 .../src/test/resources/pehc.dict              |    2 +
 .../src/test/resources/unused/bio_text.xmi    |    3 -
 .../src/test/resources/unused/tmp.txt         | 4878 -----------------
 8 files changed, 1055 insertions(+), 5875 deletions(-)
 create mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict
 delete mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/unused/bio_text.xmi
 delete mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/unused/tmp.txt

diff --git a/jcore-lingpipegazetteer-ae/pom.xml b/jcore-lingpipegazetteer-ae/pom.xml
index 080a61539..686f9ae80 100644
--- a/jcore-lingpipegazetteer-ae/pom.xml
+++ b/jcore-lingpipegazetteer-ae/pom.xml
@@ -1,68 +1,77 @@
 <?xml version='1.0' encoding='UTF-8'?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
-	<modelVersion>4.0.0</modelVersion>
-	<artifactId>jcore-lingpipe-gazetteer-ae</artifactId>
-	<packaging>jar</packaging>
-	<name>JCoRe Lingpipe Gazetteer AE</name>
-	<description>Basically used as NE tagger based on Lingpipe's dictionary-lookup tagger.</description>
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-lingpipe-gazetteer-ae</artifactId>
+    <packaging>jar</packaging>
+    <name>JCoRe Lingpipe Gazetteer AE</name>
+    <description>Basically used as NE tagger based on Lingpipe's dictionary-lookup tagger.</description>
 
-	<parent>
-		<groupId>de.julielab</groupId>
-		<artifactId>jcore-base</artifactId>
-		<version>2.6.0-SNAPSHOT</version>
-	</parent>
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
 
 
-	<dependencies>
-		<dependency>
-			<groupId>de.julielab</groupId>
-			<artifactId>jcore-descriptor-creator</artifactId>
-		</dependency>
+    <dependencies>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-types</artifactId>
             <version>${jcore-types-version}</version>
         </dependency>
-		<dependency>
-			<groupId>org.slf4j</groupId>
-			<artifactId>slf4j-api</artifactId>
-		</dependency>
-		<dependency>
-			<groupId>de.julielab</groupId>
-			<artifactId>jcore-utilities</artifactId>
-			<version>${jcore-utilities-version}</version>
-		</dependency>
-		<dependency>
-			<groupId>ch.qos.logback</groupId>
-			<artifactId>logback-classic</artifactId>
-			<scope>provided</scope>
-		</dependency>
-		<dependency>
-			<groupId>com.ibm.icu</groupId>
-			<artifactId>icu4j</artifactId>
-			<version>4.8.1.1</version>
-		</dependency>
-		<dependency>
-			<groupId>de.julielab</groupId>
-			<artifactId>aliasi-lingpipe</artifactId>
-			<version>4.1.2-JL1.0</version>
-		</dependency>
-		<dependency>
-			<groupId>org.apache.commons</groupId>
-			<artifactId>commons-lang3</artifactId>
-			<version>3.4</version>
-		</dependency>
-	<dependency><groupId>junit</groupId><artifactId>junit</artifactId></dependency></dependencies>
-	<organization>
-		<name>JULIE Lab, Germany</name>
-		<url>http://www.julielab.de</url>
-	</organization>
-	<licenses>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>com.ibm.icu</groupId>
+            <artifactId>icu4j</artifactId>
+            <version>4.8.1.1</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>aliasi-lingpipe</artifactId>
+            <version>4.1.2-JL1.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+            <version>3.4</version>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+        </dependency>
+    </dependencies>
+    <organization>
+        <name>JULIE Lab, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <licenses>
         <license>
             <name>GNU Affero General Public License, Version 3.0</name>
             <url>http://www.gnu.org/licenses/agpl-3.0.en.html</url>
         </license>
     </licenses>
-	<url>https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipe-gazetteer-ae</url>
-	</project>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipe-gazetteer-ae</url>
+</project>
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index 6ddd3b58a..dd0c68c20 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -1,21 +1,20 @@
-/** 
- *
+/**
  * Copyright (c) 2015, JULIE Lab.
- *
+ * <p>
  * Author: tomanek, jwermter
- * 
- *
- * Creation date: Jan 14, 2008 
- * 
+ * <p>
+ * <p>
+ * Creation date: Jan 14, 2008
+ * <p>
  * A entity tagger based on a dictionary lookup. Lingpipe's gazetteer is used.
- * 
- * There are two modes: exact matching (only terms which map exactly to 
- * those specified in dictionary are found). Approximate matching (by means of 
- * weighted levenstein distance, approximate matches are found.) 
- * 
- * As approximate matching results in concurring matches on overlapping spans, I 
+ * <p>
+ * There are two modes: exact matching (only terms which map exactly to
+ * those specified in dictionary are found). Approximate matching (by means of
+ * weighted levenstein distance, approximate matches are found.)
+ * <p>
+ * As approximate matching results in concurring matches on overlapping spans, I
  * added a mechanism to resolve this according to this rules: in overlapping matches
- * the one with the best (here: lowest) score is taken, if more than one chunk has the 
+ * the one with the best (here: lowest) score is taken, if more than one chunk has the
  * same score, the one with the longest span is chosen.
  **/
 package de.julielab.jcore.ae.lingpipegazetteer.uima;
@@ -57,702 +56,707 @@
 
 public class GazetteerAnnotator extends JCasAnnotator_ImplBase {
 
-	private static final String COMPONENT_ID = GazetteerAnnotator.class.getCanonicalName();
-	private static final Logger LOGGER = LoggerFactory.getLogger(GazetteerAnnotator.class);
-	public static final String CHUNKER_RESOURCE_NAME = "DictionaryChunkerProvider";
-	// public final static String PARAM_USE_APPROXIMATE_MATCHING =
-	// "UseApproximateMatching";
-	public final static String PARAM_CHECK_ACRONYMS = "CheckAcronyms";
-	public final static String PARAM_OUTPUT_TYPE = "OutputType";
-	/**
-	 * Only required to set to false as an annotator parameter when using
-	 * approximate matching and the ChunkerProvider is set to CaseSensitive false.
-	 * That is because the approximate chunker is always case sensitive.
-	 */
-	// public final static String PARAM_CASE_SENSITIVE = "CaseSensitive";
-	private static final String PARAM_USE_MANTRA_MODE = "MantraMode";
-	/**
-	 * Parameter to indicate whether text - CAS document text for this class -
-	 * should be normalized by completely removing dashes, parenthesis, genitive 's
-	 * and perhaps more. This is meant to replace the generation of term variants
-	 * and cannot be used together with variation generation. If this is switched on
-	 * here, it must also be switched on in the external resource configuration for
-	 * the ChunkerProvider! Can only be used with alternative ChunkerProviderImplAlt
-	 * implementation.
-	 */
-	// public final static String PARAM_NORMALIZE_TEXT = "NormalizeText";
-	/**
-	 * Parameter to indicate whether text - CAS document text for this class -
-	 * should be transliterated, i.e. whether accents and other character variations
-	 * should be stripped. If this is switched on here, it must also be switched on
-	 * in the external resource configuration for the ChunkerProvider! Can only be
-	 * used with alternative ChunkerProviderImplAlt implementation.
-	 */
-	// public final static String PARAM_TRANSLITERATE_TEXT =
-	// "TransliterateText";
-
-	@ConfigurationParameter(name = PARAM_USE_MANTRA_MODE, defaultValue = "false")
-	private boolean mantraMode = false;
-
-	// needs to be true because of chunker injection:
-	@ConfigurationParameter(name = PARAM_CHECK_ACRONYMS, defaultValue = "true")
-	private boolean checkAcronyms = true;
-	@ConfigurationParameter(name = PARAM_OUTPUT_TYPE)
-	private String outputType = null;
-
-	@ExternalResource(key = CHUNKER_RESOURCE_NAME, mandatory = true)
-	private ChunkerProvider provider;
-	/**
-	 * Removes diacritics and does lower casing
-	 */
-	private Transliterator transliterator;
-	private Chunker gazetteer = null;
-	private TokenizerFactory normalizationTokenFactory;
-	private Set<String> stopWords;
-
-	// TODO for debug only
-	private static int initializeCount = 0;
-
-	public void initialize(UimaContext aContext) throws ResourceInitializationException {
-		LOGGER.info("calls to initialize: " + initializeCount);
-
-		super.initialize(aContext);
-		LOGGER.info("initialize() - initializing GazetteerAnnotator...");
-
-		try {
-			provider = (ChunkerProvider) getContext().getResourceObject(CHUNKER_RESOURCE_NAME);
-			gazetteer = provider.getChunker();
+    public static final String CHUNKER_RESOURCE_NAME = "DictionaryChunkerProvider";
+    // public final static String PARAM_USE_APPROXIMATE_MATCHING =
+    // "UseApproximateMatching";
+    public final static String PARAM_CHECK_ACRONYMS = "CheckAcronyms";
+    public final static String PARAM_OUTPUT_TYPE = "OutputType";
+    private static final String COMPONENT_ID = GazetteerAnnotator.class.getCanonicalName();
+    private static final Logger LOGGER = LoggerFactory.getLogger(GazetteerAnnotator.class);
+    /**
+     * Only required to set to false as an annotator parameter when using
+     * approximate matching and the ChunkerProvider is set to CaseSensitive false.
+     * That is because the approximate chunker is always case sensitive.
+     */
+    // public final static String PARAM_CASE_SENSITIVE = "CaseSensitive";
+    private static final String PARAM_USE_MANTRA_MODE = "MantraMode";
+    /**
+     * Parameter to indicate whether text - CAS document text for this class -
+     * should be normalized by completely removing dashes, parenthesis, genitive 's
+     * and perhaps more. This is meant to replace the generation of term variants
+     * and cannot be used together with variation generation. If this is switched on
+     * here, it must also be switched on in the external resource configuration for
+     * the ChunkerProvider! Can only be used with alternative ChunkerProviderImplAlt
+     * implementation.
+     */
+    // public final static String PARAM_NORMALIZE_TEXT = "NormalizeText";
+    // TODO for debug only
+    private static int initializeCount = 0;
+    /**
+     * Parameter to indicate whether text - CAS document text for this class -
+     * should be transliterated, i.e. whether accents and other character variations
+     * should be stripped. If this is switched on here, it must also be switched on
+     * in the external resource configuration for the ChunkerProvider! Can only be
+     * used with alternative ChunkerProviderImplAlt implementation.
+     */
+    // public final static String PARAM_TRANSLITERATE_TEXT =
+    // "TransliterateText";
+
+    @ConfigurationParameter(name = PARAM_USE_MANTRA_MODE, defaultValue = "false")
+    private boolean mantraMode = false;
+    // needs to be true because of chunker injection:
+    @ConfigurationParameter(name = PARAM_CHECK_ACRONYMS, defaultValue = "true")
+    private boolean checkAcronyms = true;
+    @ConfigurationParameter(name = PARAM_OUTPUT_TYPE)
+    private String outputType = null;
+    @ExternalResource(key = CHUNKER_RESOURCE_NAME, mandatory = true)
+    private ChunkerProvider provider;
+    /**
+     * Removes diacritics and does lower casing
+     */
+    private Transliterator transliterator;
+    private Chunker gazetteer = null;
+    private TokenizerFactory normalizationTokenFactory;
+    private Set<String> stopWords;
+
+    static boolean filterParenthesis(String chunkText) {
+        Stack<Character> parenthesisStack = new Stack<>();
+        // Map<ParenthesisType, Integer> pMap = new HashMap<>();
+        for (int i = 0; i < chunkText.length(); i++) {
+            char current = chunkText.charAt(i);
+            if (isParentheses(current)) {
+                if (isOpenedParentheses(current)) {
+                    parenthesisStack.add(current);
+                } else {
+                    if (parenthesisStack.isEmpty())
+                        return true;
+                    if (!isParenthesisCounterpart(parenthesisStack.pop(), current))
+                        return true;
+                }
+            }
+        }
+        if (!parenthesisStack.isEmpty())
+            return true;
+        return false;
+    }
+
+    private static boolean isParenthesisCounterpart(Character char1, Character char2) {
+        ParenthesisType char1ParenthesisType = getParenthesisType(char2);
+        ParenthesisType char2ParenthesisType = getParenthesisType(char1);
+        if (char1ParenthesisType == ParenthesisType.NONE || char2ParenthesisType == ParenthesisType.NONE)
+            throw new IllegalArgumentException("The two characters '" + char1 + "' and '" + char2
+                    + "' were given in order to determine whether they are compatible parenthesis counterparts, but at least one of those characters is no parentheses.");
+        return char1ParenthesisType.equals(char2ParenthesisType);
+    }
+
+    static ParenthesisType getParenthesisType(char current) {
+        switch (current) {
+            case '(':
+            case ')':
+                return ParenthesisType.ROUND;
+            case '[':
+            case ']':
+                return ParenthesisType.BRACKET;
+            case '{':
+            case '}':
+                return ParenthesisType.CURLY;
+            default:
+                return ParenthesisType.NONE;
+        }
+    }
+
+    static boolean isParentheses(char current) {
+        return isOpenedParentheses(current) || isClosedParentheses(current);
+    }
+
+    static boolean isOpenedParentheses(char current) {
+        switch (current) {
+            case '(':
+            case '[':
+            case '{':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    static boolean isClosedParentheses(char current) {
+        switch (current) {
+            case ')':
+            case ']':
+            case '}':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    static List<OverlappingChunk> groupOverlappingChunks(List<Chunk> chunkList, String chunkedText) {
+        // sort chunkList so the grouping works as intended
+        Collections.sort(chunkList, new Comparator<Chunk>() {
+
+            @Override
+            public int compare(Chunk o1, Chunk o2) {
+                return o1.start() - o2.start();
+            }
+
+        });
+        // group overlapping chunks
+        List<OverlappingChunk> overlappingChunks = new ArrayList<OverlappingChunk>();
+        for (Chunk chunk : chunkList) {
+            // for debugging
+            // System.out.println("chunking.add(ChunkFactory.createChunk(" +
+            // chunk.start() + ", " + chunk.end() +
+            // ", 0d));");
+            boolean added = false;
+            for (OverlappingChunk over : overlappingChunks) {
+                if (over.isOverlappingSpan(chunk.start(), chunk.end())) {
+                    over.addChunk(chunk.start(), chunk.end(), chunk);
+                    added = true;
+                }
+            }
+            if (!added) {
+                overlappingChunks.add(new OverlappingChunk(chunk.start(), chunk.end(), chunk, chunkedText));
+                added = true;
+            }
+        }
+        return overlappingChunks;
+    }
+
+    // enum ParenthesesType {
+    // ROUND_CLOSED {
+    // @Override
+    // boolean isOpen() {
+    // return false;
+    // }
+    //
+    // },
+    // BRACKET_CLOSED {
+    // @Override
+    // boolean isOpen() {
+    // return false;
+    // }
+    // },
+    // CURLY_CLOSED {
+    // @Override
+    // boolean isOpen() {
+    // return false;
+    // }
+    //
+    // },
+    // ROUND_OPENED {
+    // @Override
+    // boolean isOpen() {
+    // return true;
+    // }
+    // },
+    // BRACKET_OPENED {
+    // @Override
+    // boolean isOpen() {
+    // return true;
+    // }
+    // },
+    // CURLY_OPENED {
+    // @Override
+    // boolean isOpen() {
+    // return true;
+    // }
+    // };
+    // abstract boolean isOpen();
+    //
+    // boolean isClose() {
+    // return !isOpen();
+    // };
+    // }
+
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        LOGGER.info("calls to initialize: " + initializeCount);
+
+        super.initialize(aContext);
+        LOGGER.info("initialize() - initializing GazetteerAnnotator...");
+
+        try {
+            provider = (ChunkerProvider) getContext().getResourceObject(CHUNKER_RESOURCE_NAME);
+            gazetteer = provider.getChunker();
 //			stopWords = provider.getStopWords();
-			String[] stopwordArray = { "a", "about", "above", "across", "after", "afterwards", "again", "against",
-					"all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among",
-					"amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything",
-					"anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become",
-					"becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside",
-					"besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot",
-					"cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do",
-					"done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere",
-					"empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except",
-					"few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly",
-					"forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has",
-					"hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
-					"herself", "high", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in",
-					"inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
-					"latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill",
-					"mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name",
-					"namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
-					"nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only",
-					"onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
-					"part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed",
-					"seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere",
-					"six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes",
-					"somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them",
-					"themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein",
-					"thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three",
-					"through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards",
-					"twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we",
-					"well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas",
-					"whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who",
-					"whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet",
-					"you", "your", "yours", "yourself", "yourselves", };
-			stopWords = new HashSet<>();
-			for (String sw : stopwordArray)
-				stopWords.add(sw);
-		} catch (ResourceAccessException e) {
-			LOGGER.error("Exception while initializing", e);
-		}
-
-		// check acronyms
-		checkAcronyms = (Boolean) aContext.getConfigParameterValue(PARAM_CHECK_ACRONYMS);
-		LOGGER.info(
-				"Check for acronyms (found dictionary entries that are abbreviations are only accepted if their long form is an abbreviation of the same type, too): {}",
-				checkAcronyms);
-		// filter stop words
-
-		Boolean normalizeBoolean = provider.getNormalize();// (Boolean)
-															// aContext.getConfigParameterValue(PARAM_NORMALIZE_TEXT);
-		if (normalizeBoolean) {
-			normalizationTokenFactory = new IndoEuropeanTokenizerFactory();
-		}
-		LOGGER.info("Normalize CAS document text (i.e. do stemming and remove possessive 's): {}", provider.getNormalize());
-
-		Boolean transliterateBoolean = provider.getTransliterate();// (Boolean)
-																	// aContext.getConfigParameterValue(PARAM_TRANSLITERATE_TEXT);
-		if (transliterateBoolean) {
-			transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
-		}
-		LOGGER.info("Transliterate CAS document text (i.e. transform accented characters to their base forms): {}",
-				provider.getTransliterate());
-
-		// define output level
-		outputType = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_TYPE);
-		if (outputType == null) {
-			LOGGER.error("initialize() - output type not specified.");
-			throw new ResourceInitializationException();
-		}
-
-		mantraMode = aContext.getConfigParameterValue(PARAM_USE_MANTRA_MODE) != null
-				? (Boolean) aContext.getConfigParameterValue(PARAM_USE_MANTRA_MODE)
-				: false;
-	}
-
-	/**
-	 * process the CAS, there are two subroutines: one for exact and one for
-	 * approximate matching.
-	 */
-	public void process(JCas aJCas) throws AnalysisEngineProcessException {
-		if (gazetteer == null)
-			throw new IllegalStateException("The actual gazetteer object is null. Check previous log messages pointing to the error (most probably the dictionary file could not be found).");
-		String docText = aJCas.getDocumentText();
-		if (docText == null || docText.length() == 0)
-			return;
-		if (provider.getUseApproximateMatching() && !provider.getTransliterate() && !provider.getCaseSensitive())
-			docText = docText.toLowerCase();
-		NormalizedString normalizedDocText = null;
-		if (provider.getNormalize()) {
-			normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory,
-					transliterator);
-		}
-
-		IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
-		JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex = new JCoReHashMapAnnotationIndex<>(
-				longOffsetTermGenerator, longOffsetTermGenerator, aJCas, ConceptMention.type);
-		JCoReHashMapAnnotationIndex<Long, Abbreviation> abbreviationIndex = new JCoReHashMapAnnotationIndex<>(
-				longOffsetTermGenerator, longOffsetTermGenerator, aJCas, Abbreviation.type);
-
-		LOGGER.debug("Performing actual Gazetteer annotation...");
-		Chunking chunking;
-		if (provider.getNormalize())
-			chunking = gazetteer.chunk(normalizedDocText.string);
-		else
-			chunking = gazetteer.chunk(docText);
-		LOGGER.debug("Gazetteer annotation done.");
-		if (provider.getUseApproximateMatching()) {
-			/*
-			 * handle matches found by approx matching: this means especially overlapping
-			 * matches with different scores (doesn't happen with exact matches)
-			 */
-			List<Chunk> chunkList = filterChunking(chunking);
-			List<OverlappingChunk> overlappingChunks = groupOverlappingChunks(chunkList,
-					chunking.charSequence().toString());
-			// now add the best chunk of all overlappingChunks to the CAS
-			LOGGER.debug("all overlapping chunks:\n");
-			// Set<Chunk> bestChunksSet = new HashSet<>();
-			for (OverlappingChunk overlappingChunk : overlappingChunks) {
-				// show chunks
-				LOGGER.debug(overlappingChunk.toStringAll());
-				List<Chunk> bestChunks = overlappingChunk.getBestChunks();
-				LOGGER.debug("Found {} best chunks.", bestChunks.size());
-				for (int i = 0; i < bestChunks.size(); i++) {
-					Chunk bestChunk = bestChunks.get(i);
-					LOGGER.debug("Nr. " + i + " best chunk: " + bestChunk.start() + " - " + bestChunk.end() + ": "
-							+ bestChunk.score() + " ; type: " + bestChunk.type());
-					// TODO this check and the corresponding set may be removed
-					// when this exception hasn't been thrown
-					// in a
-					// while. Its currently just to be sure, this should not
-					// happen any more since the chunks are sorted
-					// by
-					// offset in the grouping method.
-					// if (bestChunksSet.contains(bestChunk)) {
-					// throw new IllegalStateException("Duplicate best chunk: " + bestChunk);
-					// }
-					// bestChunksSet.add(bestChunk);
-					// add 2 cas
-					add2Cas(aJCas, bestChunk, normalizedDocText, conceptMentionIndex, abbreviationIndex);
-				}
-			}
-			// for (Chunk chunk : chunking.chunkSet()) {
-			// add2Cas(aJCas, chunk, normalizedDocText);
-			// }
-		} else {
-			for (Chunk chunk : chunking.chunkSet()) {
-				add2Cas(aJCas, chunk, normalizedDocText, conceptMentionIndex, abbreviationIndex);
-			}
-		}
-		if (checkAcronyms && !mantraMode) {
-			LOGGER.debug("process() - checking acronyms");
-			annotateAcronymsWithFullFormEntity(aJCas, conceptMentionIndex);
-		}
-	}
-
-	private List<Chunk> filterChunking(Chunking chunking) {
-		// ChunkingImpl newChunking = new ChunkingImpl(chunking.charSequence());
-		List<Chunk> newChunking = new ArrayList<>(chunking.chunkSet().size());
-		for (Chunk chunk : chunking.chunkSet()) {
-			String chunkText = chunking.charSequence().subSequence(chunk.start(), chunk.end()).toString();
-			if (filterParenthesis(chunkText))
-				continue;
-			if (filterPunctuationArtifacts(chunkText))
-				continue;
-			if (filterStopwords(chunkText))
-				continue;
-			newChunking.add(chunk);
-		}
-		return newChunking;
-	}
-
-	private boolean filterPunctuationArtifacts(String chunkText) {
-		if (chunkText.startsWith("-"))
-			return true;
-		if (chunkText.endsWith("-"))
-			return true;
-		return false;
-	}
-
-	private boolean filterStopwords(String chunkText) {
-		if (stopWords.contains(chunkText.toLowerCase()))
-			return true;
-		if (chunkText.contains(" ")) {
-			String[] words = chunkText.split(" ");
-			int stopWordCounter = 0;
-			for (String word : words) {
-				if (stopWords.contains(word.toLowerCase()))
-					stopWordCounter++;
-			}
-			if (Math.ceil(words.length / 2.0) <= stopWordCounter) {
-				LOGGER.debug("Filtering due to high stop word occurrences: {}", chunkText);
-				return true;
-			}
-		}
-		return false;
-	}
-
-	static boolean filterParenthesis(String chunkText) {
-		Stack<Character> parenthesisStack = new Stack<>();
-		// Map<ParenthesisType, Integer> pMap = new HashMap<>();
-		for (int i = 0; i < chunkText.length(); i++) {
-			char current = chunkText.charAt(i);
-			if (isParentheses(current)) {
-				if (isOpenedParentheses(current)) {
-					parenthesisStack.add(current);
-				} else {
-					if (parenthesisStack.isEmpty())
-						return true;
-					if (!isParenthesisCounterpart(parenthesisStack.pop(), current))
-						return true;
-				}
-			}
-		}
-		if (!parenthesisStack.isEmpty())
-			return true;
-		return false;
-	}
-
-	private static boolean isParenthesisCounterpart(Character char1, Character char2) {
-		ParenthesisType char1ParenthesisType = getParenthesisType(char2);
-		ParenthesisType char2ParenthesisType = getParenthesisType(char1);
-		if (char1ParenthesisType == ParenthesisType.NONE || char2ParenthesisType == ParenthesisType.NONE)
-			throw new IllegalArgumentException("The two characters '" + char1 + "' and '" + char2
-					+ "' were given in order to determine whether they are compatible parenthesis counterparts, but at least one of those characters is no parentheses.");
-		return char1ParenthesisType.equals(char2ParenthesisType);
-	}
-
-	// enum ParenthesesType {
-	// ROUND_CLOSED {
-	// @Override
-	// boolean isOpen() {
-	// return false;
-	// }
-	//
-	// },
-	// BRACKET_CLOSED {
-	// @Override
-	// boolean isOpen() {
-	// return false;
-	// }
-	// },
-	// CURLY_CLOSED {
-	// @Override
-	// boolean isOpen() {
-	// return false;
-	// }
-	//
-	// },
-	// ROUND_OPENED {
-	// @Override
-	// boolean isOpen() {
-	// return true;
-	// }
-	// },
-	// BRACKET_OPENED {
-	// @Override
-	// boolean isOpen() {
-	// return true;
-	// }
-	// },
-	// CURLY_OPENED {
-	// @Override
-	// boolean isOpen() {
-	// return true;
-	// }
-	// };
-	// abstract boolean isOpen();
-	//
-	// boolean isClose() {
-	// return !isOpen();
-	// };
-	// }
-
-	enum ParenthesisType {
-		ROUND, BRACKET, CURLY, NONE
-	}
-
-	static ParenthesisType getParenthesisType(char current) {
-		switch (current) {
-		case '(':
-		case ')':
-			return ParenthesisType.ROUND;
-		case '[':
-		case ']':
-			return ParenthesisType.BRACKET;
-		case '{':
-		case '}':
-			return ParenthesisType.CURLY;
-		default:
-			return ParenthesisType.NONE;
-		}
-	}
-
-	static boolean isParentheses(char current) {
-		return isOpenedParentheses(current) || isClosedParentheses(current);
-	}
-
-	static boolean isOpenedParentheses(char current) {
-		switch (current) {
-		case '(':
-		case '[':
-		case '{':
-			return true;
-		default:
-			return false;
-		}
-	}
-
-	static boolean isClosedParentheses(char current) {
-		switch (current) {
-		case ')':
-		case ']':
-		case '}':
-			return true;
-		default:
-			return false;
-		}
-	}
-
-	static List<OverlappingChunk> groupOverlappingChunks(List<Chunk> chunkList, String chunkedText) {
-		// sort chunkList so the grouping works as intended
-		Collections.sort(chunkList, new Comparator<Chunk>() {
-
-			@Override
-			public int compare(Chunk o1, Chunk o2) {
-				return o1.start() - o2.start();
-			}
-
-		});
-		// group overlapping chunks
-		List<OverlappingChunk> overlappingChunks = new ArrayList<OverlappingChunk>();
-		for (Chunk chunk : chunkList) {
-			// for debugging
-			// System.out.println("chunking.add(ChunkFactory.createChunk(" +
-			// chunk.start() + ", " + chunk.end() +
-			// ", 0d));");
-			boolean added = false;
-			for (OverlappingChunk over : overlappingChunks) {
-				if (over.isOverlappingSpan(chunk.start(), chunk.end())) {
-					over.addChunk(chunk.start(), chunk.end(), chunk);
-					added = true;
-				}
-			}
-			if (!added) {
-				overlappingChunks.add(new OverlappingChunk(chunk.start(), chunk.end(), chunk, chunkedText));
-				added = true;
-			}
-		}
-		return overlappingChunks;
-	}
-
-	// ------------ INFO ..........
-	// String text = aJCas.getDocumentText();
-	// int start = chunk.start();
-	// int end = chunk.end();
-	// String type = chunk.type();
-	// double score = chunk.score();
-	// String phrase = text.substring(start, end);
-	// System.out.println(" found phrase=|" + phrase + "|"
-	// + " start=" + start + " end=" + end + " type=" + type
-	// + " score=" + score);
-	// ------------ INFO ..........
-	/**
-	 * checks whether a chunk (= dictionary match) is an acronym. If yes, checks
-	 * whether respective full form (obtained via abbr textReference) is
-	 * ConceptMention and has same specificType as chunk If these conditions are not
-	 * fulfilled, no entity annotation will be made.
-	 * 
-	 * @param abbreviationIndex
-	 * @param conceptMentionIndex
-	 */
-	private boolean isAcronymWithSameFullFormSpecificType(JCas aJCas, Chunk chunk, NormalizedString normalizedDocText,
-			JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex,
-			JCoReHashMapAnnotationIndex<Long, Abbreviation> abbreviationIndex) {
-		// Annotation anno;
-		int start;
-		int end;
-		if (provider.getNormalize()) {
-			try {
-				start = normalizedDocText.getOriginalOffset(chunk.start());
-				end = normalizedDocText.getOriginalOffset(chunk.end());
-			} catch (Exception e) {
-				System.out.println("Text: " + normalizedDocText);
-				System.out.println("Chunk: " + chunk);
-				System.out.println("Chunk end: " + chunk.end());
-				System.out
-						.println("Normalized Text: " + normalizedDocText.string.substring(chunk.start(), chunk.end()));
-				throw e;
-			}
-			// anno = new Annotation(aJCas, start, end);
-		} else {
-			start = chunk.start();
-			end = chunk.end();
-		}
-
-		LongOffsetIndexTermGenerator longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
-		// Retrieves potential abbr annotation
-		Abbreviation abbr = abbreviationIndex.getFirst(longOffsetTermGenerator.forOffsets(start, end));
-		// check whether it's an abbr
-		String chunktext = null;
-		if (LOGGER.isDebugEnabled())
-			chunktext = aJCas.getDocumentText().substring(start, end);
-		if (abbr == null) {
-			LOGGER.debug("{} chunk \"{}\" is not an abbreviation\n", chunk, chunktext);
-			return true;
-		}
-		// checks whether respective full form is ConceptMention
-		AbbreviationLongform textRef = abbr.getTextReference();
-		ConceptMention em = conceptMentionIndex.getFirst(textRef);
-		if (em == null) {
-			LOGGER.debug(
-					chunk + " chunk \"{}\" is an abbreviation but respective full \"{}\" form is no ConceptMention\n",
-					chunktext, textRef.getCoveredText());
-			return false;
-		}
-
-		// checks whether full form annotation matches the type to be annotated
-		// here
-		String emType = em.getClass().getCanonicalName();
-		if (emType.equals(outputType)) {
-			LOGGER.debug(chunk
-					+ " chunk \"{}\" is an abbreviation and respective full form \"{}\" is ConceptMention with same type as OutputType\n",
-					chunktext, em.getCoveredText());
-			return true;
-		}
-
-		LOGGER.debug(chunk
-				+ " chunk \"{}\" is an abbreviation but respective full form \"{}\" is ConceptMention without the correct OutputType (is: {}; OutputType: {})\n",
-				new Object[] { chunktext, em.getCoveredText(), emType, outputType });
-		return false;
-	}
-
-	/**
-	 * adds a chunk as an annotation to the CAS
-	 * 
-	 * @param normalizedDocText
-	 * @param abbreviationIndex
-	 * @param conceptMentionIndex
-	 */
-	private void add2Cas(JCas aJCas, Chunk chunk, NormalizedString normalizedDocText,
-			JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex,
-			JCoReHashMapAnnotationIndex<Long, Abbreviation> abbreviationIndex) throws AnalysisEngineProcessException {
-		// System.out.println("CHUNK: start=" + chunk.start() + " end=" +
-		// chunk.end());
-		// if checkAcronyms, then check acronyms for compliant full forms (=
-		// with same specificType)
-		if (checkAcronyms && !isAcronymWithSameFullFormSpecificType(aJCas, chunk, normalizedDocText,
-				conceptMentionIndex, abbreviationIndex)) {
-			return;
-		}
-
-		int start = provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.start()) : chunk.start();
-		int end = provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.end()) : chunk.end();
-
-		try {
-			if (mantraMode) {
-				// the "type" string is used to transport all data needed for
-				// the MAN-XML format
-				for (String term : chunk.type().split("@@TERM@@")) {
-					// @@ is used to separate source, cui, type(s) and group (in
-					// this order!)
-					String[] info = term.split("@@");
-					Entity newEntity = (Entity) JCoReAnnotationTools.getAnnotationByClassName(aJCas,
-							"de.julielab.jcore.types.mantra.Entity");
-					newEntity.setBegin(start);
-					newEntity.setEnd(end);
-					newEntity.setComponentId(COMPONENT_ID);
-					newEntity.setConfidence(chunk.score() + "");
-
-					// mantra specific
-					newEntity.setSource(info[0]);
-					newEntity.setCui(info[1]);
-					newEntity.setSemanticType(info[2]);
-					newEntity.setSemanticGroup(info[3]);
-
-					newEntity.addToIndexes();
-				}
-			} else {
-				ConceptMention newEntity = (ConceptMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas,
-						outputType);
-				newEntity.setBegin(start);
-				newEntity.setEnd(end);
-
-				// String entityText = newEntity.getCoveredText();
-				// if (stopWords.contains(entityText.toLowerCase()))
-				// return;
-				// if (entityText.contains(" ")) {
-				// String[] words = entityText.split(" ");
-				// int stopWordCounter = 0;
-				// for (String word : words) {
-				// if (stopWords.contains(word.toLowerCase()))
-				// stopWordCounter++;
-				// }
-				// if (words.length == stopWordCounter)
-				// return;
-				// }
-
-				newEntity.setSpecificType(chunk.type());
-				newEntity.setComponentId(COMPONENT_ID);
-				newEntity.setConfidence(chunk.score() + "");
-				newEntity.addToIndexes();
-
-				conceptMentionIndex.index(newEntity);
-			}
-		} catch (Exception e) {
-			LOGGER.error("process() - could not generate output type: " + e.getMessage());
-			e.printStackTrace();
-			throw new AnalysisEngineProcessException(e);
-		}
-	}
-
-	private void annotateAcronymsWithFullFormEntity(JCas aJCas,
-			JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex)
-			throws AnalysisEngineProcessException {
-
-		JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
-		FSIterator<Annotation> abbrevIter = indexes.getAnnotationIndex(Abbreviation.type).iterator();
-		IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
-
-		// loop over all abbreviations
-		while (abbrevIter.hasNext()) {
-			Abbreviation abbrev = (Abbreviation) abbrevIter.next();
-			AbbreviationLongform fullFormAnnotation = abbrev.getTextReference();
-			LOGGER.debug("annotateAcronymsWithFullFormEntity() - checking abbreviation: " + abbrev.getCoveredText());
-			ConceptMention emFullform = null;// AnnotationRetrieval.getMatchingAnnotation(aJCas, fullFormAnnotation,
-			// ConceptMention.class);
-			emFullform = conceptMentionIndex.getFirst(fullFormAnnotation);
-
-			// The following code was once introduced for gene tagging. There,
-			// the acronym fullforms sometimes miss minor parts of an annotated
-			// gene, leading to non-annotated acronyms that would have been
-			// correct.
-			// However, for general-purpose concept recognition this approach
-			// can be quite harmful. Example: "Anaphase-promoting complex (APC)"
-			// where only "anaphase" is recognized as concept. Now, "APC" would
-			// be annotated as an acronym for "anaphase". Here, a better
-			// recognition of the abbreviation span is required.
-			// ConceptMention emFullform = null;
-			// List<ConceptMention> conceptsInFullform =
-			// JCoReAnnotationTools.getIncludedAnnotations(aJCas,
-			// fullFormAnnotation,
-			// ConceptMention.class);
-			// if (conceptsInFullform.size() == 1) {
-			// emFullform = conceptsInFullform.get(0);
-			// LOGGER.debug("Found a single ConceptMention included in the full
-			// form: {}", emFullform.getCoveredText());
-			// } else if (conceptsInFullform.size() > 1) {
-			// // If there are multiple ConceptMentions found in the full form,
-			// take that largest right-most candidate.
-			// int maxSize = -1;
-			// for (ConceptMention em : conceptsInFullform) {
-			// int emSize = em.getEnd() - em.getBegin();
-			// if (emSize > maxSize) {
-			// emFullform = em;
-			// maxSize = emSize;
-			// }
-			// }
-			// LOGGER.debug("Found multiple ConceptMentions included in the full
-			// form \"{}\", returning the longest.",
-			// fullFormAnnotation.getCoveredText());
-			// if (LOGGER.isTraceEnabled()) {
-			// LOGGER.trace("All found ConceptMentions:");
-			// for (ConceptMention cm : conceptsInFullform) {
-			// LOGGER.trace("Text: {}; offsets: {}-{}",
-			// new Object[] { cm.getCoveredText(), cm.getBegin(), cm.getEnd()
-			// });
-			// }
-			// }
-			// } else {
-			// LOGGER.debug("No ConceptMention in the span of acronym fullform
-			// \"{}\" found.",
-			// fullFormAnnotation.getCoveredText());
-			// }
-
-			String type = null;
-			if (emFullform != null)
-				type = emFullform.getClass().getCanonicalName();
-
-			ConceptMention emAcronym = null;// AnnotationRetrieval.getMatchingAnnotation(aJCas, abbrev,
-											// ConceptMention.class);
-			emAcronym = conceptMentionIndex.getFirst(abbrev);
-			// This is really slow, really a pain with full texts.
-			// It was originally introduced to push recall for gene recognition.
-			// So now we will lose (a bit) of recognition performance there.
-			// ConceptMention emAcronym =
-			// JCoReAnnotationTools.getPartiallyOverlappingAnnotation(aJCas,
-			// abbrev,
-			// ConceptMention.class);
-
-			// if type of the entity is equal to the output type for this
-			// annotator
-			if (type != null && type.equals(outputType)) {
-				if (emFullform == null) {
-					LOGGER.debug(
-							"annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no ConceptMention\n");
-					continue;
-				}
-				if (emFullform.getComponentId() != null && emFullform.getComponentId().equals(COMPONENT_ID)
-						&& (emAcronym == null
-								|| !emAcronym.getClass().getName().equals(emFullform.getClass().getName()))) {
-
-					try {
-						LOGGER.debug("annotateAcronymsWithFullFormEntity() - fullform of abbreviation ("
-								+ abbrev.getCoveredText() + " [begin=" + abbrev.getBegin() + "; end=" + abbrev.getEnd()
-								+ "]) has ConceptMention: " + emFullform.toString());
-						ConceptMention newEntityOnAcronym = (ConceptMention) JCoReAnnotationTools
-								.getAnnotationByClassName(aJCas, outputType);
-						newEntityOnAcronym.setBegin(abbrev.getBegin());
-						newEntityOnAcronym.setEnd(abbrev.getEnd());
-						newEntityOnAcronym.setTextualRepresentation(newEntityOnAcronym.getCoveredText());
-						newEntityOnAcronym.setSpecificType(emFullform.getSpecificType());
-						newEntityOnAcronym.setComponentId(COMPONENT_ID + "+acronym");
-						newEntityOnAcronym.setConfidence(emFullform.getConfidence() + "");
-						newEntityOnAcronym.addToIndexes();
-
-					} catch (Exception e) {
-						LOGGER.error("process() - could not generate output type: " + e.getMessage());
-						e.printStackTrace();
-						throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION,
-								null);
-					}
-
-				} else {
-					if (emAcronym == null)
-						LOGGER.debug("annotateAcronymsWithFullFormEntity() - emAcronym != null");
-					else if (emAcronym.getClass().getName().equals(emFullform.getClass().getName()))
-						LOGGER.debug("annotateAcronymsWithFullFormEntity() - emAcroType="
-								+ emAcronym.getClass().getCanonicalName() + " == emFullformType="
-								+ emFullform.getClass().getCanonicalName());
-				}
-
-			}
-		}
-	}
+            String[] stopwordArray = {"a", "about", "above", "across", "after", "afterwards", "again", "against",
+                    "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among",
+                    "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything",
+                    "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become",
+                    "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside",
+                    "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot",
+                    "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do",
+                    "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere",
+                    "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except",
+                    "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly",
+                    "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has",
+                    "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
+                    "herself", "high", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in",
+                    "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
+                    "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill",
+                    "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name",
+                    "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
+                    "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only",
+                    "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
+                    "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed",
+                    "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere",
+                    "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes",
+                    "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them",
+                    "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein",
+                    "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three",
+                    "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards",
+                    "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we",
+                    "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas",
+                    "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who",
+                    "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet",
+                    "you", "your", "yours", "yourself", "yourselves",};
+            stopWords = new HashSet<>();
+            for (String sw : stopwordArray)
+                stopWords.add(sw);
+        } catch (ResourceAccessException e) {
+            LOGGER.error("Exception while initializing", e);
+        }
+
+        // check acronyms
+        checkAcronyms = (Boolean) aContext.getConfigParameterValue(PARAM_CHECK_ACRONYMS);
+        LOGGER.info(
+                "Check for acronyms (found dictionary entries that are abbreviations are only accepted if their long form is an abbreviation of the same type, too): {}",
+                checkAcronyms);
+        // filter stop words
+
+        Boolean normalizeBoolean = provider.getNormalize();// (Boolean)
+        // aContext.getConfigParameterValue(PARAM_NORMALIZE_TEXT);
+        if (normalizeBoolean) {
+            normalizationTokenFactory = new IndoEuropeanTokenizerFactory();
+        }
+        LOGGER.info("Normalize CAS document text (i.e. do stemming and remove possessive 's): {}", provider.getNormalize());
+
+        Boolean transliterateBoolean = provider.getTransliterate();// (Boolean)
+        // aContext.getConfigParameterValue(PARAM_TRANSLITERATE_TEXT);
+        if (transliterateBoolean || !provider.getCaseSensitive()) {
+            transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
+        }
+        LOGGER.info("Transliterate CAS document text (i.e. transform accented characters to their base forms): {}",
+                provider.getTransliterate());
+
+        // define output level
+        outputType = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_TYPE);
+        if (outputType == null) {
+            LOGGER.error("initialize() - output type not specified.");
+            throw new ResourceInitializationException();
+        }
+
+        mantraMode = aContext.getConfigParameterValue(PARAM_USE_MANTRA_MODE) != null
+                ? (Boolean) aContext.getConfigParameterValue(PARAM_USE_MANTRA_MODE)
+                : false;
+    }
+
+    /**
+     * process the CAS, there are two subroutines: one for exact and one for
+     * approximate matching.
+     */
+    public void process(JCas aJCas) throws AnalysisEngineProcessException {
+        if (gazetteer == null)
+            throw new IllegalStateException("The actual gazetteer object is null. Check previous log messages pointing to the error (most probably the dictionary file could not be found).");
+        String docText = aJCas.getDocumentText();
+        if (docText == null || docText.length() == 0)
+            return;
+        if (provider.getUseApproximateMatching() && !provider.getTransliterate() && !provider.getCaseSensitive())
+            // We use the transliterator because it does lowercasing and also solves issues that could arise due
+            // to the normal docText.toLowerCase() call which would break special characters sometimes
+            docText = transliterator.transform(docText);
+        NormalizedString normalizedDocText = null;
+        if (provider.getNormalize()) {
+            normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory,
+                    transliterator);
+            System.out.println(normalizedDocText.getOffsetMap());
+            System.out.println(normalizedDocText.string);
+        }
+
+        IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
+        JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex = new JCoReHashMapAnnotationIndex<>(
+                longOffsetTermGenerator, longOffsetTermGenerator, aJCas, ConceptMention.type);
+        JCoReHashMapAnnotationIndex<Long, Abbreviation> abbreviationIndex = new JCoReHashMapAnnotationIndex<>(
+                longOffsetTermGenerator, longOffsetTermGenerator, aJCas, Abbreviation.type);
+
+        LOGGER.debug("Performing actual Gazetteer annotation...");
+        Chunking chunking;
+        if (provider.getNormalize())
+            chunking = gazetteer.chunk(normalizedDocText.string);
+        else
+            chunking = gazetteer.chunk(docText);
+        LOGGER.debug("Gazetteer annotation done.");
+        if (provider.getUseApproximateMatching()) {
+            /*
+             * handle matches found by approx matching: this means especially overlapping
+             * matches with different scores (doesn't happen with exact matches)
+             */
+            List<Chunk> chunkList = filterChunking(chunking);
+            List<OverlappingChunk> overlappingChunks = groupOverlappingChunks(chunkList,
+                    chunking.charSequence().toString());
+            // now add the best chunk of all overlappingChunks to the CAS
+            LOGGER.debug("all overlapping chunks:\n");
+            // Set<Chunk> bestChunksSet = new HashSet<>();
+            for (OverlappingChunk overlappingChunk : overlappingChunks) {
+                // show chunks
+                LOGGER.debug(overlappingChunk.toStringAll());
+                List<Chunk> bestChunks = overlappingChunk.getBestChunks();
+                LOGGER.debug("Found {} best chunks.", bestChunks.size());
+                for (int i = 0; i < bestChunks.size(); i++) {
+                    Chunk bestChunk = bestChunks.get(i);
+                    if (LOGGER.isDebugEnabled()) {
+                        String chunkText = provider.getNormalize() ? normalizedDocText.string.substring(bestChunk.start(), bestChunk.end()) : aJCas.getDocumentText().substring(bestChunk.start(), bestChunk.end());
+                        LOGGER.debug("Nr. " + i + " best chunk: " + bestChunk.start() + " - " + bestChunk.end() + ": "
+                                + bestChunk.score() + " ; type: " + bestChunk.type() + " ; text: " + chunkText);
+                    }
+                    // TODO this check and the corresponding set may be removed
+                    // when this exception hasn't been thrown
+                    // in a
+                    // while. Its currently just to be sure, this should not
+                    // happen any more since the chunks are sorted
+                    // by
+                    // offset in the grouping method.
+                    // if (bestChunksSet.contains(bestChunk)) {
+                    // throw new IllegalStateException("Duplicate best chunk: " + bestChunk);
+                    // }
+                    // bestChunksSet.add(bestChunk);
+                    // add 2 cas
+                    add2Cas(aJCas, bestChunk, normalizedDocText, conceptMentionIndex, abbreviationIndex);
+                }
+            }
+            // for (Chunk chunk : chunking.chunkSet()) {
+            // add2Cas(aJCas, chunk, normalizedDocText);
+            // }
+        } else {
+            for (Chunk chunk : chunking.chunkSet()) {
+                add2Cas(aJCas, chunk, normalizedDocText, conceptMentionIndex, abbreviationIndex);
+            }
+        }
+        if (checkAcronyms && !mantraMode) {
+            LOGGER.debug("process() - checking acronyms");
+            annotateAcronymsWithFullFormEntity(aJCas, conceptMentionIndex);
+        }
+    }
+
+    private List<Chunk> filterChunking(Chunking chunking) {
+        // ChunkingImpl newChunking = new ChunkingImpl(chunking.charSequence());
+        List<Chunk> newChunking = new ArrayList<>(chunking.chunkSet().size());
+        for (Chunk chunk : chunking.chunkSet()) {
+            String chunkText = chunking.charSequence().subSequence(chunk.start(), chunk.end()).toString();
+            if (filterParenthesis(chunkText))
+                continue;
+            if (filterPunctuationArtifacts(chunkText))
+                continue;
+            if (filterStopwords(chunkText))
+                continue;
+            newChunking.add(chunk);
+        }
+        return newChunking;
+    }
+
+    private boolean filterPunctuationArtifacts(String chunkText) {
+        if (chunkText.startsWith("-"))
+            return true;
+        if (chunkText.endsWith("-"))
+            return true;
+        return false;
+    }
+
+    private boolean filterStopwords(String chunkText) {
+        if (stopWords.contains(chunkText.toLowerCase()))
+            return true;
+        if (chunkText.contains(" ")) {
+            String[] words = chunkText.split(" ");
+            int stopWordCounter = 0;
+            for (String word : words) {
+                if (stopWords.contains(word.toLowerCase()))
+                    stopWordCounter++;
+            }
+            if (Math.ceil(words.length / 2.0) <= stopWordCounter) {
+                LOGGER.debug("Filtering due to high stop word occurrences: {}", chunkText);
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * checks whether a chunk (= dictionary match) is an acronym. If yes, checks
+     * whether respective full form (obtained via abbr textReference) is
+     * ConceptMention and has same specificType as chunk If these conditions are not
+     * fulfilled, no entity annotation will be made.
+     *
+     * @param abbreviationIndex
+     * @param conceptMentionIndex
+     */
+    private boolean isAcronymWithSameFullFormSpecificType(JCas aJCas, Chunk chunk, NormalizedString normalizedDocText,
+                                                          JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex,
+                                                          JCoReHashMapAnnotationIndex<Long, Abbreviation> abbreviationIndex) {
+        // Annotation anno;
+        int start;
+        int end;
+        if (provider.getNormalize()) {
+            try {
+                start = normalizedDocText.getOriginalOffset(chunk.start());
+                end = normalizedDocText.getOriginalOffset(chunk.end());
+            } catch (Exception e) {
+                System.out.println("Text: " + normalizedDocText);
+                System.out.println("Chunk: " + chunk);
+                System.out.println("Chunk end: " + chunk.end());
+                System.out
+                        .println("Normalized Text: " + normalizedDocText.string.substring(chunk.start(), chunk.end()));
+                throw e;
+            }
+            // anno = new Annotation(aJCas, start, end);
+        } else {
+            start = chunk.start();
+            end = chunk.end();
+        }
+
+        LongOffsetIndexTermGenerator longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
+        // Retrieves potential abbr annotation
+        Abbreviation abbr = abbreviationIndex.getFirst(longOffsetTermGenerator.forOffsets(start, end));
+        // check whether it's an abbr
+        String chunktext = null;
+        if (LOGGER.isDebugEnabled())
+            chunktext = aJCas.getDocumentText().substring(start, end);
+        if (abbr == null) {
+            LOGGER.debug("{} chunk \"{}\" is not an abbreviation\n", chunk, chunktext);
+            return true;
+        }
+        // checks whether respective full form is ConceptMention
+        AbbreviationLongform textRef = abbr.getTextReference();
+        ConceptMention em = conceptMentionIndex.getFirst(textRef);
+        if (em == null) {
+            LOGGER.debug(
+                    chunk + " chunk \"{}\" is an abbreviation but respective full \"{}\" form is no ConceptMention\n",
+                    chunktext, textRef.getCoveredText());
+            return false;
+        }
+
+        // checks whether full form annotation matches the type to be annotated
+        // here
+        String emType = em.getClass().getCanonicalName();
+        if (emType.equals(outputType)) {
+            LOGGER.debug(chunk
+                            + " chunk \"{}\" is an abbreviation and respective full form \"{}\" is ConceptMention with same type as OutputType\n",
+                    chunktext, em.getCoveredText());
+            return true;
+        }
+
+        LOGGER.debug(chunk
+                        + " chunk \"{}\" is an abbreviation but respective full form \"{}\" is ConceptMention without the correct OutputType (is: {}; OutputType: {})\n",
+                new Object[]{chunktext, em.getCoveredText(), emType, outputType});
+        return false;
+    }
+
+    // ------------ INFO ..........
+    // String text = aJCas.getDocumentText();
+    // int start = chunk.start();
+    // int end = chunk.end();
+    // String type = chunk.type();
+    // double score = chunk.score();
+    // String phrase = text.substring(start, end);
+    // System.out.println(" found phrase=|" + phrase + "|"
+    // + " start=" + start + " end=" + end + " type=" + type
+    // + " score=" + score);
+    // ------------ INFO ..........
+
+    /**
+     * adds a chunk as an annotation to the CAS
+     *
+     * @param normalizedDocText
+     * @param abbreviationIndex
+     * @param conceptMentionIndex
+     */
+    private void add2Cas(JCas aJCas, Chunk chunk, NormalizedString normalizedDocText,
+                         JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex,
+                         JCoReHashMapAnnotationIndex<Long, Abbreviation> abbreviationIndex) throws AnalysisEngineProcessException {
+        // System.out.println("CHUNK: start=" + chunk.start() + " end=" +
+        // chunk.end());
+        // if checkAcronyms, then check acronyms for compliant full forms (=
+        // with same specificType)
+        if (checkAcronyms && !isAcronymWithSameFullFormSpecificType(aJCas, chunk, normalizedDocText,
+                conceptMentionIndex, abbreviationIndex)) {
+            return;
+        }
+
+        int start = provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.start()) : chunk.start();
+        int end = provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.end()) : chunk.end();
+
+        try {
+            if (mantraMode) {
+                // the "type" string is used to transport all data needed for
+                // the MAN-XML format
+                for (String term : chunk.type().split("@@TERM@@")) {
+                    // @@ is used to separate source, cui, type(s) and group (in
+                    // this order!)
+                    String[] info = term.split("@@");
+                    Entity newEntity = (Entity) JCoReAnnotationTools.getAnnotationByClassName(aJCas,
+                            "de.julielab.jcore.types.mantra.Entity");
+                    newEntity.setBegin(start);
+                    newEntity.setEnd(end);
+                    newEntity.setComponentId(COMPONENT_ID);
+                    newEntity.setConfidence(chunk.score() + "");
+
+                    // mantra specific
+                    newEntity.setSource(info[0]);
+                    newEntity.setCui(info[1]);
+                    newEntity.setSemanticType(info[2]);
+                    newEntity.setSemanticGroup(info[3]);
+
+                    newEntity.addToIndexes();
+                }
+            } else {
+                ConceptMention newEntity = (ConceptMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas,
+                        outputType);
+                newEntity.setBegin(start);
+                newEntity.setEnd(end);
+
+                // String entityText = newEntity.getCoveredText();
+                // if (stopWords.contains(entityText.toLowerCase()))
+                // return;
+                // if (entityText.contains(" ")) {
+                // String[] words = entityText.split(" ");
+                // int stopWordCounter = 0;
+                // for (String word : words) {
+                // if (stopWords.contains(word.toLowerCase()))
+                // stopWordCounter++;
+                // }
+                // if (words.length == stopWordCounter)
+                // return;
+                // }
+
+                newEntity.setSpecificType(chunk.type());
+                newEntity.setComponentId(COMPONENT_ID);
+                newEntity.setConfidence(chunk.score() + "");
+                newEntity.addToIndexes();
+
+                conceptMentionIndex.index(newEntity);
+            }
+        } catch (Exception e) {
+            LOGGER.error("process() - could not generate output type: " + e.getMessage());
+            e.printStackTrace();
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
+
+    private void annotateAcronymsWithFullFormEntity(JCas aJCas,
+                                                    JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex)
+            throws AnalysisEngineProcessException {
+
+        JFSIndexRepository indexes = aJCas.getJFSIndexRepository();
+        FSIterator<Annotation> abbrevIter = indexes.getAnnotationIndex(Abbreviation.type).iterator();
+        IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
+
+        // loop over all abbreviations
+        while (abbrevIter.hasNext()) {
+            Abbreviation abbrev = (Abbreviation) abbrevIter.next();
+            AbbreviationLongform fullFormAnnotation = abbrev.getTextReference();
+            LOGGER.debug("annotateAcronymsWithFullFormEntity() - checking abbreviation: " + abbrev.getCoveredText());
+            ConceptMention emFullform = null;// AnnotationRetrieval.getMatchingAnnotation(aJCas, fullFormAnnotation,
+            // ConceptMention.class);
+            emFullform = conceptMentionIndex.getFirst(fullFormAnnotation);
+
+            // The following code was once introduced for gene tagging. There,
+            // the acronym fullforms sometimes miss minor parts of an annotated
+            // gene, leading to non-annotated acronyms that would have been
+            // correct.
+            // However, for general-purpose concept recognition this approach
+            // can be quite harmful. Example: "Anaphase-promoting complex (APC)"
+            // where only "anaphase" is recognized as concept. Now, "APC" would
+            // be annotated as an acronym for "anaphase". Here, a better
+            // recognition of the abbreviation span is required.
+            // ConceptMention emFullform = null;
+            // List<ConceptMention> conceptsInFullform =
+            // JCoReAnnotationTools.getIncludedAnnotations(aJCas,
+            // fullFormAnnotation,
+            // ConceptMention.class);
+            // if (conceptsInFullform.size() == 1) {
+            // emFullform = conceptsInFullform.get(0);
+            // LOGGER.debug("Found a single ConceptMention included in the full
+            // form: {}", emFullform.getCoveredText());
+            // } else if (conceptsInFullform.size() > 1) {
+            // // If there are multiple ConceptMentions found in the full form,
+            // take that largest right-most candidate.
+            // int maxSize = -1;
+            // for (ConceptMention em : conceptsInFullform) {
+            // int emSize = em.getEnd() - em.getBegin();
+            // if (emSize > maxSize) {
+            // emFullform = em;
+            // maxSize = emSize;
+            // }
+            // }
+            // LOGGER.debug("Found multiple ConceptMentions included in the full
+            // form \"{}\", returning the longest.",
+            // fullFormAnnotation.getCoveredText());
+            // if (LOGGER.isTraceEnabled()) {
+            // LOGGER.trace("All found ConceptMentions:");
+            // for (ConceptMention cm : conceptsInFullform) {
+            // LOGGER.trace("Text: {}; offsets: {}-{}",
+            // new Object[] { cm.getCoveredText(), cm.getBegin(), cm.getEnd()
+            // });
+            // }
+            // }
+            // } else {
+            // LOGGER.debug("No ConceptMention in the span of acronym fullform
+            // \"{}\" found.",
+            // fullFormAnnotation.getCoveredText());
+            // }
+
+            String type = null;
+            if (emFullform != null)
+                type = emFullform.getClass().getCanonicalName();
+
+            ConceptMention emAcronym = null;// AnnotationRetrieval.getMatchingAnnotation(aJCas, abbrev,
+            // ConceptMention.class);
+            emAcronym = conceptMentionIndex.getFirst(abbrev);
+            // This is really slow, really a pain with full texts.
+            // It was originally introduced to push recall for gene recognition.
+            // So now we will lose (a bit) of recognition performance there.
+            // ConceptMention emAcronym =
+            // JCoReAnnotationTools.getPartiallyOverlappingAnnotation(aJCas,
+            // abbrev,
+            // ConceptMention.class);
+
+            // if type of the entity is equal to the output type for this
+            // annotator
+            if (type != null && type.equals(outputType)) {
+                if (emFullform == null) {
+                    LOGGER.debug(
+                            "annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no ConceptMention\n");
+                    continue;
+                }
+                if (emFullform.getComponentId() != null && emFullform.getComponentId().equals(COMPONENT_ID)
+                        && (emAcronym == null
+                        || !emAcronym.getClass().getName().equals(emFullform.getClass().getName()))) {
+
+                    try {
+                        LOGGER.debug("annotateAcronymsWithFullFormEntity() - fullform of abbreviation ("
+                                + abbrev.getCoveredText() + " [begin=" + abbrev.getBegin() + "; end=" + abbrev.getEnd()
+                                + "]) has ConceptMention: " + emFullform.toString());
+                        ConceptMention newEntityOnAcronym = (ConceptMention) JCoReAnnotationTools
+                                .getAnnotationByClassName(aJCas, outputType);
+                        newEntityOnAcronym.setBegin(abbrev.getBegin());
+                        newEntityOnAcronym.setEnd(abbrev.getEnd());
+                        newEntityOnAcronym.setTextualRepresentation(newEntityOnAcronym.getCoveredText());
+                        newEntityOnAcronym.setSpecificType(emFullform.getSpecificType());
+                        newEntityOnAcronym.setComponentId(COMPONENT_ID + "+acronym");
+                        newEntityOnAcronym.setConfidence(emFullform.getConfidence() + "");
+                        newEntityOnAcronym.addToIndexes();
+
+                    } catch (Exception e) {
+                        LOGGER.error("process() - could not generate output type: " + e.getMessage());
+                        e.printStackTrace();
+                        throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION,
+                                null);
+                    }
+
+                } else {
+                    if (emAcronym == null)
+                        LOGGER.debug("annotateAcronymsWithFullFormEntity() - emAcronym != null");
+                    else if (emAcronym.getClass().getName().equals(emFullform.getClass().getName()))
+                        LOGGER.debug("annotateAcronymsWithFullFormEntity() - emAcroType="
+                                + emAcronym.getClass().getCanonicalName() + " == emFullformType="
+                                + emFullform.getClass().getCanonicalName());
+                }
+
+            }
+        }
+    }
+
+    enum ParenthesisType {
+        ROUND, BRACKET, CURLY, NONE
+    }
 
 }
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
index 2cffe9bde..e51c41eb9 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
@@ -1,4 +1,3 @@
-
 package de.julielab.jcore.ae.lingpipegazetteer.utils;
 
 import com.aliasi.tokenizer.Tokenizer;
@@ -9,205 +8,207 @@
 
 public class StringNormalizerForChunking {
 
-	public enum Mode {
-		/**
-		 * Punctuation characters are deleted completely, shrinking the string.
-		 */
-		DELETE,
-		/** Punctuation characters are replaced by white spaces. */
-		REPLACE
-	}
-
-	private static Set<Character> charsToDelete = new HashSet<>();
-	static {
-		charsToDelete.add('-');
-		charsToDelete.add('+');
-		charsToDelete.add(',');
-		charsToDelete.add('.');
-		charsToDelete.add(':');
-		charsToDelete.add(';');
-		charsToDelete.add('?');
-		charsToDelete.add('!');
-		charsToDelete.add('*');
-		charsToDelete.add('§');
-		charsToDelete.add('$');
-		charsToDelete.add('%');
-		charsToDelete.add('&');
-		charsToDelete.add('/');
-		charsToDelete.add('\\');
-		charsToDelete.add('(');
-		charsToDelete.add(')');
-		charsToDelete.add('<');
-		charsToDelete.add('>');
-		charsToDelete.add('[');
-		charsToDelete.add(']');
-		charsToDelete.add('=');
-		charsToDelete.add('\'');
-		charsToDelete.add('`');
-		charsToDelete.add('´');
-		charsToDelete.add('"');
-		charsToDelete.add('#');
-	}
-
-	public static class NormalizedString {
-		public String string;
-		private Map<Integer, Integer> offsetMap = new HashMap<>();
-
-		public Map<Integer, Integer> getOffsetMap() {
-			return offsetMap;
-		}
-
-		private TreeSet<Integer> normalizedOffsetSet;
-
-		public Integer getOriginalOffset(int normalizedOffset) {
-			Integer originalOffset = offsetMap.get(normalizedOffset);
-			if (originalOffset == null) {
-				originalOffset = deriveOriginalOffset(normalizedOffset);
-				offsetMap.put(normalizedOffset, originalOffset);
-			}
-			return originalOffset;
-		}
-
-		private Integer deriveOriginalOffset(int normalizedOffset) {
-			if (normalizedOffsetSet == null)
-				normalizedOffsetSet = new TreeSet<>(offsetMap.keySet());
-			Integer previousNormalizedOffset = normalizedOffsetSet.floor(normalizedOffset);
-			Integer originalPreviousOffset = offsetMap.get(previousNormalizedOffset);
-			int offsetShift = Math.abs(originalPreviousOffset - previousNormalizedOffset);
-			// Typically, the normalized string will be shorter than the
-			// original, thus the original offset would be larger.
-			if (originalPreviousOffset > previousNormalizedOffset)
-				return normalizedOffset + offsetShift;
-			// But if, for some reason, the normalized string is longer than the
-			// original, we would have to subtract the difference from the
-			// normalized offset.
-			return normalizedOffset - offsetShift;
-		}
-	}
-
-	/**
-	 * This method was meant for text normalization by just deleting punctuation
-	 * characters. However, the approach turned out to be suboptimal in cases
-	 * where a dictionary entry would be "SHP-1" and the text form would be "SHP
-	 * 1". That is, when in the text there is just a whitespace where there is a
-	 * punctuation character in the dictionary, we won't recognize the
-	 * dictionary entry. Thus, a different normalization was developed, namely
-	 * in the other normalization method. It is supposed to be used together
-	 * with an approximate chunker.
-	 * 
-	 * @param str
-	 * @return
-	 */
-	public static NormalizedString normalizeString(String str) {
-		NormalizedString ns = new NormalizedString();
-		StringBuilder sb = new StringBuilder();
-		int deletedChars = 0;
-
-		for (int i = 0; i < str.length(); i++) {
-			char c = str.charAt(i);
-			if (charsToDelete.contains(c)) {
-				deletedChars++;
-				// switch (mode) {
-				// case REPLACE: sb.append(" "); break;
-				// case DELETE: deletedChars++; break;
-				// }
-			} else {
-				sb.append(c);
-			}
-			int newOffset = Math.max(0, i - deletedChars);
-			if (null == ns.offsetMap.get(newOffset))
-				ns.offsetMap.put(newOffset, i);
-		}
-		ns.string = sb.toString();
-		return ns;
-	}
-
-	/**
-	 * This normalization method uses a given TokenizerFactory (could also be a
-	 * PorterStemmerTokenizerFactory for stemming) and additionally removes
-	 * possessive 's constructions. Dashes and other punctuation is left
-	 * untouched. By using an approximate chunker, one can also handle
-	 * punctuation.
-	 * 
-	 * @param str
-	 * @param tokenizerFactory
-	 * @return
-	 */
-	public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory,
-			Transliterator transliterator) {
-		// boolean stemming = tokenizerFactory instanceof
-		// PorterStemmerTokenizerFactory;
-
-		NormalizedString ns = new NormalizedString();
-
-		char[] strChars = str.toCharArray();
-		Tokenizer tokenizer = tokenizerFactory.tokenizer(strChars, 0, strChars.length);
-		StringBuilder sb = new StringBuilder();
-		ArrayDeque<String> tokenS = new ArrayDeque<>();
-		Map<Integer, Integer> deleteCandidateOffsetMap = new HashMap<>();
-		// According to the lingpipe API documentation, one starts with the next
-		// whitespace.
-		sb.append(tokenizer.nextWhitespace());
-		ns.offsetMap.put(0, 0);
-		String token;
-		while ((token = tokenizer.nextToken()) != null) {
-			// Handle possessive 's (like Parkinson's). It will be deleted. In
-			// case we have accidentally deleted some
-			// tokens, those are stored in the stack and their offsets are
-			// stored, too. In case it was an error, the
-			// tokens are later added again in the "else" path.
-			if (token.equals("'")) {
-				int newStartOffset = sb.length() + sumOfStack(tokenS);
-				int newEndOffset = sb.length() + sumOfStack(tokenS) + token.length();
-				deleteCandidateOffsetMap.put(newStartOffset, tokenizer.lastTokenStartPosition());
-				deleteCandidateOffsetMap.put(newEndOffset, tokenizer.lastTokenEndPosition());
-				tokenS.push(token + tokenizer.nextWhitespace());
-			} else if (token.equals("s") && tokenS.size() == 1) {
-				int newStartOffset = sb.length() + sumOfStack(tokenS);
-				int newEndOffset = sb.length() + sumOfStack(tokenS) + token.length();
-				deleteCandidateOffsetMap.put(newStartOffset, tokenizer.lastTokenStartPosition());
-				deleteCandidateOffsetMap.put(newEndOffset, tokenizer.lastTokenEndPosition());
-				tokenS.push(token);
-				String ws = tokenizer.nextWhitespace();
-				if (ws.length() > 0) {
-					sb.append(ws);
-					tokenS.clear();
-					deleteCandidateOffsetMap.clear();
-				}
-			} else {
-				if (!tokenS.isEmpty()) {
-					for (String s : tokenS) {
-						sb.append(s);
-					}
-					tokenS.clear();
-					ns.offsetMap.putAll(deleteCandidateOffsetMap);
-					deleteCandidateOffsetMap.clear();
-				}
-				if (transliterator != null)
-					token = transliterator.transform(token);
-				// plural s, only when no stemming is done
-				// if (!stemming && token.endsWith("s"))
-				// token = token.substring(0, token.length() - 1);
-				sb.append(token);
-				int newStartOffset = sb.length() - token.length();
-				int newEndOffset = sb.length();
-				ns.offsetMap.put(newStartOffset, tokenizer.lastTokenStartPosition());
-				ns.offsetMap.put(newEndOffset, tokenizer.lastTokenEndPosition());
-				sb.append(tokenizer.nextWhitespace());
-			}
-		}
-		ns.string = sb.toString();
-		return ns;
-	}
-
-	private static int sumOfStack(Deque<String> stack) {
-		int sum = 0;
-		for (String i : stack)
-			sum += i.length();
-		return sum;
-	}
-
-	public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory) {
-		return normalizeString(str, tokenizerFactory, null);
-	}
+    private static Set<Character> charsToDelete = new HashSet<>();
+
+    static {
+        charsToDelete.add('-');
+        charsToDelete.add('+');
+        charsToDelete.add(',');
+        charsToDelete.add('.');
+        charsToDelete.add(':');
+        charsToDelete.add(';');
+        charsToDelete.add('?');
+        charsToDelete.add('!');
+        charsToDelete.add('*');
+        charsToDelete.add('§');
+        charsToDelete.add('$');
+        charsToDelete.add('%');
+        charsToDelete.add('&');
+        charsToDelete.add('/');
+        charsToDelete.add('\\');
+        charsToDelete.add('(');
+        charsToDelete.add(')');
+        charsToDelete.add('<');
+        charsToDelete.add('>');
+        charsToDelete.add('[');
+        charsToDelete.add(']');
+        charsToDelete.add('=');
+        charsToDelete.add('\'');
+        charsToDelete.add('`');
+        charsToDelete.add('´');
+        charsToDelete.add('"');
+        charsToDelete.add('#');
+    }
+
+    /**
+     * This method was meant for text normalization by just deleting punctuation
+     * characters. However, the approach turned out to be suboptimal in cases
+     * where a dictionary entry would be "SHP-1" and the text form would be "SHP
+     * 1". That is, when in the text there is just a whitespace where there is a
+     * punctuation character in the dictionary, we won't recognize the
+     * dictionary entry. Thus, a different normalization was developed, namely
+     * in the other normalization method. It is supposed to be used together
+     * with an approximate chunker.
+     *
+     * @param str
+     * @return
+     */
+    public static NormalizedString normalizeString(String str) {
+        NormalizedString ns = new NormalizedString();
+        StringBuilder sb = new StringBuilder();
+        int deletedChars = 0;
+
+        for (int i = 0; i < str.length(); i++) {
+            char c = str.charAt(i);
+            if (charsToDelete.contains(c)) {
+                deletedChars++;
+                // switch (mode) {
+                // case REPLACE: sb.append(" "); break;
+                // case DELETE: deletedChars++; break;
+                // }
+            } else {
+                sb.append(c);
+            }
+            int newOffset = Math.max(0, i - deletedChars);
+            if (null == ns.offsetMap.get(newOffset))
+                ns.offsetMap.put(newOffset, i);
+        }
+        ns.string = sb.toString();
+        return ns;
+    }
+
+    /**
+     * This normalization method uses a given TokenizerFactory (could also be a
+     * PorterStemmerTokenizerFactory for stemming) and additionally removes
+     * possessive 's constructions. Dashes and other punctuation is left
+     * untouched. By using an approximate chunker, one can also handle
+     * punctuation.
+     *
+     * @param str
+     * @param tokenizerFactory
+     * @return
+     */
+    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory,
+                                                   Transliterator transliterator) {
+        // boolean stemming = tokenizerFactory instanceof
+        // PorterStemmerTokenizerFactory;
+
+        NormalizedString ns = new NormalizedString();
+
+        char[] strChars = str.toCharArray();
+        Tokenizer tokenizer = tokenizerFactory.tokenizer(strChars, 0, strChars.length);
+        StringBuilder sb = new StringBuilder();
+        ArrayDeque<String> tokenS = new ArrayDeque<>();
+        Map<Integer, Integer> deleteCandidateOffsetMap = new HashMap<>();
+        // According to the lingpipe API documentation, one starts with the next
+        // whitespace.
+        sb.append(tokenizer.nextWhitespace());
+        ns.offsetMap.put(0, 0);
+        String token;
+        while ((token = tokenizer.nextToken()) != null) {
+            // Handle possessive 's (like Parkinson's). It will be deleted. In
+            // case we have accidentally deleted some
+            // tokens, those are stored in the stack and their offsets are
+            // stored, too. In case it was an error, the
+            // tokens are later added again in the "else" path.
+            if (token.equals("'")) {
+                int newStartOffset = sb.length() + sumOfStack(tokenS);
+                int newEndOffset = sb.length() + sumOfStack(tokenS) + token.length();
+                deleteCandidateOffsetMap.put(newStartOffset, tokenizer.lastTokenStartPosition());
+                deleteCandidateOffsetMap.put(newEndOffset, tokenizer.lastTokenEndPosition());
+                tokenS.push(token + tokenizer.nextWhitespace());
+            } else if (token.equals("s") && tokenS.size() == 1) {
+                int newStartOffset = sb.length() + sumOfStack(tokenS);
+                int newEndOffset = sb.length() + sumOfStack(tokenS) + token.length();
+                deleteCandidateOffsetMap.put(newStartOffset, tokenizer.lastTokenStartPosition());
+                deleteCandidateOffsetMap.put(newEndOffset, tokenizer.lastTokenEndPosition());
+                tokenS.push(token);
+                String ws = tokenizer.nextWhitespace();
+                if (ws.length() > 0) {
+                    sb.append(ws);
+                    tokenS.clear();
+                    deleteCandidateOffsetMap.clear();
+                }
+            } else {
+                if (!tokenS.isEmpty()) {
+                    for (String s : tokenS) {
+                        sb.append(s);
+                    }
+                    tokenS.clear();
+                    ns.offsetMap.putAll(deleteCandidateOffsetMap);
+                    deleteCandidateOffsetMap.clear();
+                }
+                if (transliterator != null)
+                    token = transliterator.transform(token);
+                // plural s, only when no stemming is done
+                // if (!stemming && token.endsWith("s"))
+                // token = token.substring(0, token.length() - 1);
+                sb.append(token);
+                int newStartOffset = sb.length() - token.length();
+                int newEndOffset = sb.length();
+                ns.offsetMap.put(newStartOffset, tokenizer.lastTokenStartPosition());
+                ns.offsetMap.put(newEndOffset, tokenizer.lastTokenEndPosition());
+                sb.append(tokenizer.nextWhitespace());
+            }
+        }
+        ns.string = sb.toString();
+        return ns;
+    }
+
+    private static int sumOfStack(Deque<String> stack) {
+        int sum = 0;
+        for (String i : stack)
+            sum += i.length();
+        return sum;
+    }
+
+    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory) {
+        return normalizeString(str, tokenizerFactory, null);
+    }
+
+    public enum Mode {
+        /**
+         * Punctuation characters are deleted completely, shrinking the string.
+         */
+        DELETE,
+        /**
+         * Punctuation characters are replaced by white spaces.
+         */
+        REPLACE
+    }
+
+    public static class NormalizedString {
+        public String string;
+        private Map<Integer, Integer> offsetMap = new HashMap<>();
+        private TreeSet<Integer> normalizedOffsetSet;
+
+        public Map<Integer, Integer> getOffsetMap() {
+            return offsetMap;
+        }
+
+        public Integer getOriginalOffset(int normalizedOffset) {
+            Integer originalOffset = offsetMap.get(normalizedOffset);
+            if (originalOffset == null) {
+                originalOffset = deriveOriginalOffset(normalizedOffset);
+                offsetMap.put(normalizedOffset, originalOffset);
+            }
+            return originalOffset;
+        }
+
+        private Integer deriveOriginalOffset(int normalizedOffset) {
+            if (normalizedOffsetSet == null)
+                normalizedOffsetSet = new TreeSet<>(offsetMap.keySet());
+            Integer previousNormalizedOffset = normalizedOffsetSet.floor(normalizedOffset);
+            Integer originalPreviousOffset = offsetMap.get(previousNormalizedOffset);
+            int offsetShift = Math.abs(originalPreviousOffset - previousNormalizedOffset);
+            // Typically, the normalized string will be shorter than the
+            // original, thus the original offset would be larger.
+            if (originalPreviousOffset > previousNormalizedOffset)
+                return normalizedOffset + offsetShift;
+            // But if, for some reason, the normalized string is longer than the
+            // original, we would have to subtract the difference from the
+            // normalized offset.
+            return normalizedOffset - offsetShift;
+        }
+    }
 }
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
index fe1ac16a0..fef412a2e 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
@@ -49,22 +49,22 @@ public void testNormalizedOffsets() {
 		text = "-aa :+bb";
 		// Outcome: "aabb";
 		ns = StringNormalizerForChunking.normalizeString(text);
-		assertEquals("The original offset is computed wrong", new Integer(0), ns.getOriginalOffset(0));
-		assertEquals("The original offset is computed wrong", new Integer(2), ns.getOriginalOffset(1));
-		assertEquals("The original offset is computed wrong", new Integer(3), ns.getOriginalOffset(2));
-		assertEquals("The original offset is computed wrong", new Integer(6), ns.getOriginalOffset(3));
-		assertEquals("The original offset is computed wrong", new Integer(7), ns.getOriginalOffset(4));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(0), ns.getOriginalOffset(0));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(2), ns.getOriginalOffset(1));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(3), ns.getOriginalOffset(2));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(6), ns.getOriginalOffset(3));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(7), ns.getOriginalOffset(4));
 		assertNull("There are more offset mappings than should be", ns.getOffsetMap().get(5));
 
 		text = "((2-n-butyl-6,7-dichloro-2-cyclopentyl-2,3-dihydro-1-oxo-1H-inden-5-yl)oxy)acetic acid";
 		// Outcome:
 		// "2nbutyl67dichloro2cyclopentyl23dihydro1oxo1Hinden5yloxyacetic acid";
 		ns = StringNormalizerForChunking.normalizeString(text);
-		assertEquals("The original offset is computed wrong", new Integer(0), ns.getOriginalOffset(0));
-		assertEquals("The original offset is computed wrong", new Integer(4), ns.getOriginalOffset(1));
-		assertEquals("The original offset is computed wrong", new Integer(6), ns.getOriginalOffset(2));
-		assertEquals("The original offset is computed wrong", new Integer(16), ns.getOriginalOffset(9));
-		assertEquals("The original offset is computed wrong", new Integer(82), ns.getOriginalOffset(62));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(0), ns.getOriginalOffset(0));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(4), ns.getOriginalOffset(1));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(6), ns.getOriginalOffset(2));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(16), ns.getOriginalOffset(9));
+		assertEquals("The original offset is computed wrong", Integer.valueOf(82), ns.getOriginalOffset(62));
 		assertNull("There are more offset mappings than should be", ns.getOffsetMap().get(66));
 	}
 
@@ -84,8 +84,8 @@ public void testNormalizedOffsetsTransliterate() {
 		assertEquals("Transliteration wasn't done correctly",
 				"each node either a sensor or a beacon is noted as nodep, p ∈ 𝕊 ∪ 𝔹, and vector vp is used to represent the coordinate of nodep. beacons are placed onto the map with fixed coordinates vj, where j ∈ 𝔹. we assume that each beacon is aware of its own absolute location.",
 				ns.string);
-		assertEquals(new Integer(83), ns.getOriginalOffset(82));
-		assertEquals(new Integer(188), ns.getOriginalOffset(186));
+		assertEquals(Integer.valueOf(83), ns.getOriginalOffset(82));
+		assertEquals(Integer.valueOf(188), ns.getOriginalOffset(186));
 	}
 
 	@Test
@@ -114,29 +114,29 @@ public void testNormalizeWithTokenizer() {
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
 		assertEquals("Normalization was wrong: ",
 				"We saw Parkinson Diseas and S(H)P 1 in a sadli-form circumvent of applic.", ns.string);
-		assertEquals("Offset wrong: ", new Integer(0), ns.getOriginalOffset(new Integer(0)));
-		assertEquals("Offset wrong: ", new Integer(16), ns.getOriginalOffset(new Integer(16)));
-		assertEquals("Offset wrong: ", new Integer(19), ns.getOriginalOffset(new Integer(17)));
-		assertEquals("Offset wrong: ", new Integer(26), ns.getOriginalOffset(new Integer(23)));
-		assertEquals("Offset wrong: ", new Integer(49), ns.getOriginalOffset(new Integer(46)));
-		assertEquals("Offset wrong: ", new Integer(50), ns.getOriginalOffset(new Integer(47)));
-		assertEquals("Offset wrong: ", new Integer(56), ns.getOriginalOffset(new Integer(51)));
+		assertEquals("Offset wrong: ", Integer.valueOf(0), ns.getOriginalOffset(Integer.valueOf(0)));
+		assertEquals("Offset wrong: ", Integer.valueOf(16), ns.getOriginalOffset(Integer.valueOf(16)));
+		assertEquals("Offset wrong: ", Integer.valueOf(19), ns.getOriginalOffset(Integer.valueOf(17)));
+		assertEquals("Offset wrong: ", Integer.valueOf(26), ns.getOriginalOffset(Integer.valueOf(23)));
+		assertEquals("Offset wrong: ", Integer.valueOf(49), ns.getOriginalOffset(Integer.valueOf(46)));
+		assertEquals("Offset wrong: ", Integer.valueOf(50), ns.getOriginalOffset(Integer.valueOf(47)));
+		assertEquals("Offset wrong: ", Integer.valueOf(56), ns.getOriginalOffset(Integer.valueOf(51)));
 		str = "We go to James' to have some coffee'ses.";
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
 		assertEquals("Normalization was wrong: ", "We go to Jame' to have some coffe'se.", ns.string);
-		assertEquals("Offset wrong: ", new Integer(0), ns.getOriginalOffset(new Integer(0)));
-		assertEquals("Offset wrong: ", new Integer(9), ns.getOriginalOffset(new Integer(9)));
-		assertEquals("Offset wrong: ", new Integer(14), ns.getOriginalOffset(new Integer(13)));
-		assertEquals("Offset wrong: ", new Integer(35), ns.getOriginalOffset(new Integer(33)));
+		assertEquals("Offset wrong: ", Integer.valueOf(0), ns.getOriginalOffset(Integer.valueOf(0)));
+		assertEquals("Offset wrong: ", Integer.valueOf(9), ns.getOriginalOffset(Integer.valueOf(9)));
+		assertEquals("Offset wrong: ", Integer.valueOf(14), ns.getOriginalOffset(Integer.valueOf(13)));
+		assertEquals("Offset wrong: ", Integer.valueOf(35), ns.getOriginalOffset(Integer.valueOf(33)));
 		str = "We have some 'serious things' to talk about.";
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
 		assertEquals("Normalization was wrong: ", "We have some 'seriou thing' to talk about.", ns.string);
-		assertEquals("Offset wrong: ", new Integer(0), ns.getOriginalOffset(new Integer(0)));
-		assertEquals("Offset wrong: ", new Integer(12), ns.getOriginalOffset(new Integer(12)));
-		assertEquals("Offset wrong: ", new Integer(13), ns.getOriginalOffset(new Integer(13)));
-		assertEquals("Offset wrong: ", new Integer(28), ns.getOriginalOffset(new Integer(26)));
-		assertEquals("Offset wrong: ", new Integer(29), ns.getOriginalOffset(new Integer(27)));
-		assertEquals("Offset wrong: ", new Integer(30), ns.getOriginalOffset(new Integer(28)));
+		assertEquals("Offset wrong: ", Integer.valueOf(0), ns.getOriginalOffset(Integer.valueOf(0)));
+		assertEquals("Offset wrong: ", Integer.valueOf(12), ns.getOriginalOffset(Integer.valueOf(12)));
+		assertEquals("Offset wrong: ", Integer.valueOf(13), ns.getOriginalOffset(Integer.valueOf(13)));
+		assertEquals("Offset wrong: ", Integer.valueOf(28), ns.getOriginalOffset(Integer.valueOf(26)));
+		assertEquals("Offset wrong: ", Integer.valueOf(29), ns.getOriginalOffset(Integer.valueOf(27)));
+		assertEquals("Offset wrong: ", Integer.valueOf(30), ns.getOriginalOffset(Integer.valueOf(28)));
 
 		str = "test dosing unit KLRg1 killer cell lectin like receptor G2 Parkinson's Disease";
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
@@ -144,6 +144,15 @@ public void testNormalizeWithTokenizer() {
 
 	}
 
+	@Test
+	public void testNewlines() {
+		String str = "Clinical Features and Course of Patients with Peripheral Exudative Hemorrhagic Chorioretinopathy.\n" +
+				"To evaluate the clinical characteristics of patients who were followed in our clinic with the diagnosis of peripheral exudative hemorrhagic chorioretinopathy (PEHC).\n" +
+				"Medical records of 12 patients who were diagnosed with PEHC in İstanbul University İstanbul Faculty of Medicine, Department of Ophthalmology between July 2006 and June 2014 were reviewed retrospectively.";
+		NormalizedString normalizedString = StringNormalizerForChunking.normalizeString(str, new IndoEuropeanTokenizerFactory(), Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower"));
+		System.out.println(normalizedString.getOffsetMap());
+	}
+
 	@Test
 	@Ignore
 	/**
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 2266b3e4b..612e8c094 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -18,6 +18,7 @@
 import com.aliasi.chunk.Chunk;
 import com.aliasi.chunk.ChunkFactory;
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.ChunkerProviderImplAlt;
+import de.julielab.jcore.ae.lingpipegazetteer.chunking.ConfigurableChunkerProviderImplAlt;
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.OverlappingChunk;
 import de.julielab.jcore.types.*;
 import junit.framework.TestCase;
@@ -49,6 +50,8 @@
 import java.util.List;
 import java.util.Set;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static org.assertj.core.api.Assertions.assertThat;
 public class GazetteerAnnotatorTest extends TestCase {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(GazetteerAnnotatorTest.class);
@@ -654,4 +657,37 @@ public void testReadCompressedDictionary() throws Exception {
 		assertEquals(1, counter);
 	}
 
+	@Test
+	public void testOffsetIssueWhenNoTransliteration() throws Exception {
+		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
+				ConfigurableChunkerProviderImplAlt.class, "file:src/test/resources/pehc.dict", ConfigurableChunkerProviderImplAlt.PARAM_CASE_SENSITIVE, false, ConfigurableChunkerProviderImplAlt.PARAM_NORMALIZE_TEXT, true, ConfigurableChunkerProviderImplAlt.PARAM_TRANSLITERATE_TEXT, false, ConfigurableChunkerProviderImplAlt.PARAM_STOPWORD_FILE, "de/julielab/jcore/ae/lingpipegazetteer/stopwords/general_english_words", ConfigurableChunkerProviderImplAlt.PARAM_USE_APPROXIMATE_MATCHING, true, ConfigurableChunkerProviderImplAlt.PARAM_MAKE_VARIANTS, false);
+		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
+				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
+
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
+				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
+				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
+
+		JCas jCas = annotator.newJCas();
+
+		jCas.setDocumentText("Clinical Features and Course of Patients with Peripheral Exudative Hemorrhagic Chorioretinopathy.\nTo evaluate the clinical characteristics of patients who were followed in our clinic with the diagnosis of peripheral exudative hemorrhagic chorioretinopathy (PEHC).\nMedical records of 12 patients who were diagnosed with PEHC in İstanbul University İstanbul Faculty of Medicine, Department of Ophthalmology between July 2006 and June 2014 were reviewed retrospectively.\nThis study included 21 eyes of 12 patients. Four (33.3%) of the patients were male and 8 (66.7%) were female and ages ranged between 73 and 89 years. Eight (66.7%) of the patients were referred to us with the diagnosis of choroidal mass. Unilateral involvement was found in 3 and bilateral involvement in 9 patients. Temporal quadrants were involved in all eyes. Fifteen eyes (71.4%) had subretinal hemorrhage and hemorrhagic/serous retinal pigment epithelial detachment, 11 (52.4%) had lipid exudation, 5 (23.8%) had chronic retinal pigment epithelium alterations, 2 (9.5%) had subretinal fibrosis and 1 (4.8%) had vitreous hemorrhage. PEHC lesions were accompanied by drusen in 11 eyes (52.4%), geographic atrophy in 2 eyes (9.5%), and choroidal neovascularization scar in 2 eyes (9.5%).");
+		annotator.process(jCas);
+
+		List<String> entityStrings = new ArrayList<>();
+		for (EntityMention g : jCas.<EntityMention>getAnnotationIndex(EntityMention.type)) {
+			entityStrings.add(g.getCoveredText());
+		}
+		assertThat(entityStrings).containsExactly("PEHC", "PEHC", "PEHC", "lesions");
+	}
+
+	@Test
+	public void testEncoding() {
+		String s1 = "İ";
+		String s2 = "i̇";
+		System.out.println(s1.getBytes(UTF_8).length);
+		System.out.println(s1.length());
+		System.out.println(s2.getBytes(UTF_8).length);
+		System.out.println(s2.length());
+	}
+
 }
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict b/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict
new file mode 100644
index 000000000..79830708e
--- /dev/null
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict
@@ -0,0 +1,2 @@
+PEHC	Gene
+lesions	Gene
\ No newline at end of file
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/unused/bio_text.xmi b/jcore-lingpipegazetteer-ae/src/test/resources/unused/bio_text.xmi
deleted file mode 100644
index 5e3993e5f..000000000
--- a/jcore-lingpipegazetteer-ae/src/test/resources/unused/bio_text.xmi
+++ /dev/null
@@ -1,3 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xmi:XMI xmlns:cas="http:///uima/cas.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:xmi="http://www.omg.org/XMI"
-         xmi:version="2.0"><cas:NULL xmi:id="0"/><cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="KLRG1 binds cadherins and preferentially associates with SHIP-1.&#10;&#10;The killer cell lectin-like receptor G1 (KLRG1) is a unique inhibitory&#10;receptor expressed on a phenotypically mature subset of resting NK cells as&#10;well as subsets of T cells in naive mice. In vivo, pathogenic immune system&#10;activation induces dramatic changes in the expression patterns of KLRG1&#10;among the different cell subsets. In order to enhance our understanding of&#10;KLRG1 signaling properties and to clarify the functions of KLRG1 on these&#10;cells, we identified the broadly expressed N-cadherin molecule as a ligand&#10;for KLRG1. We further demonstrate that a second member of this superfamily&#10;of adhesion molecules, E-cadherin, binds to KLRG1. Additionally, we show&#10;that upon phosphorylation of the immunoreceptor tyrosine-based inhibitory&#10;motif (ITIM) tyrosine, KLRG1 recruits both SHIP-1 and SHP-2 but not SHP-1.&#10;We also delineate the key KLRG1 ITIM amino acid residues required for&#10;optimal association with these phosphatases. Finally, we demonstrate that&#10;KLRG1 engagement can inhibit sub-optimal TCR signaling. Taken together, our&#10;results indicate that KLRG1 may differentially regulate NK cell and T cell&#10;functions through the association with different ligands as well as the&#10;recruitment of distinct phosphatases.&#10;"/><tcas:DocumentAnnotation xmi:id="8" sofa="1" begin="0" end="1287" language="x-unspecified"/><cas:View sofa="1" members="8"/></xmi:XMI>
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/unused/tmp.txt b/jcore-lingpipegazetteer-ae/src/test/resources/unused/tmp.txt
deleted file mode 100644
index 93e1214e3..000000000
--- a/jcore-lingpipegazetteer-ae/src/test/resources/unused/tmp.txt
+++ /dev/null
@@ -1,4878 +0,0 @@
-0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 40    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerA0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-0    [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - tests for errors when loading and initializing dictionary...
-375  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-419  [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testReadDictionary() - building dictionary took: 0 secs
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-2155 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-2321 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-2327 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2330 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2331 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2334 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-2344 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for EXACT matching (6 matches expected)...
-14407 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14416 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14433 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14434 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14435 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14436 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14437 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14440 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14466 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14594 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14595 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14596 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@1.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14597 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@1.0 chunk is not an abbreviation
-
-14605 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14608 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@1.0 chunk is an abbreviation and respective full form is EntityMention with same specificType
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@1.0 chunk is not an abbreviation
-
-14609 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@1.0 chunk is not an abbreviation
-
-14610 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14614 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation has no EntityMention
-
-14615 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14616 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - emAcroSpecType=KLRG2 == emFullformSpecType=KLRG2
-14617 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14626 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14630 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14631 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14632 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14640 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14642 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "1.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14643 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - testProcess() - testing process for APPROX matching (13 matches expected)...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - initialize() - initializing GazetteerAnnotator...
-14828 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-readDictionary() - adding entries from src/test/resources/general_english_words to dictionary...
-14848 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-readDictionary() - adding entries from src/test/resources/dictionary.tst to dictionary...
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14849 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14850 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14851 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - readDictionary() - make term variants and add them to dictionary (NOTE: this may take a while if dictionary is big!)
-14859 [main] INFO  de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - process() - processing next document with GazetteerAnnotator...
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - all overlapping chunks:
-
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 563-569
-563-568:KLRG1@10.0	start=563 end=568 score=10.0
-563-569:KLRG2@100.0	start=563 end=569 score=100.0
-563-568:KLRG2@0.0	start=563 end=568 score=0.0
-562-568:KLRG2@100.0	start=562 end=568 score=100.0
-14958 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 563 - 568: 0.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 563-568:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 679-685
-679-684:KLRG1@10.0	start=679 end=684 score=10.0
-679-684:KLRG2@0.0	start=679 end=684 score=0.0
-679-685:KLRG2@100.0	start=679 end=685 score=100.0
-14959 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 679 - 684: 0.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 679-684:KLRG2@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 40-46
-41-46:KLRG1@0.0	start=41 end=46 score=0.0
-40-46:KLRG1@100.0	start=40 end=46 score=100.0
-41-47:KLRG1@100.0	start=41 end=47 score=100.0
-41-46:KLRG2@10.0	start=41 end=46 score=10.0
-14960 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 41 - 46: 0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 41-46:KLRG1@0.0 chunk is an abbreviation but respective full form is no EntityMention
-
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 526-561
-526-561:KLRG2@0.0	start=526 end=561 score=0.0
-14963 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 526 - 561: 0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 526-561:KLRG2@0.0 chunk is not an abbreviation
-
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 741-787
-741-787:ITIM@0.0	start=741 end=787 score=0.0
-14964 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 741 - 787: 0.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 741-787:ITIM@0.0 chunk is not an abbreviation
-
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 4-39
-4-39:KLRG2@10.0	start=4 end=39 score=10.0
-14968 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 4 - 39: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 4-39:KLRG2@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 829-834
-829-832:SHP-1@100.0	start=829 end=832 score=100.0
-829-834:SHP-1@10.0	start=829 end=834 score=10.0
-829-833:SHP-1@50.0	start=829 end=833 score=50.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 829 - 834: 10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 829-834:SHP-1@10.0 chunk is not an abbreviation
-
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 843-848
-843-847:SHP-1@60.0	start=843 end=847 score=60.0
-843-848:SHP-1@10.0	start=843 end=848 score=10.0
-14969 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 843 - 848: 10.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 843-848:SHP-1@10.0 chunk is not an abbreviation
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 289-294
-289-294:KLRG2@10.0	start=289 end=294 score=10.0
-289-294:KLRG1@0.0	start=289 end=294 score=0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 289 - 294: 0.0
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 289-294:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14970 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 370-374
-370-374:KLRG2@100.0	start=370 end=374 score=100.0
-370-374:KLRG1@100.0	start=370 end=374 score=100.0
-14971 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 370 - 374: 100.0
-14973 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 370-374:KLRG2@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 818-824
-818-822:SHP-1@100.0	start=818 end=822 score=100.0
-818-824:SHP-1@100.0	start=818 end=824 score=100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 818 - 824: 100.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 818-824:SHP-1@100.0 chunk is not an abbreviation
-
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - max span: 428-433
-428-433:KLRG1@0.0	start=428 end=433 score=0.0
-428-433:KLRG2@10.0	start=428 end=433 score=10.0
-14975 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - best chunk: 428 - 433: 0.0
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - 428-433:KLRG1@0.0 chunk is an abbreviation but respective full form is EntityMention without same specificType
-
-14983 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14984 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=41; end=46]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-   
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-   
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-   
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-   nnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-   
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=289; end=294]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14985 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG1
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG1 [begin=428; end=433]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14986 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=563; end=568]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: KLRG2
-14987 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (KLRG2 [begin=679; end=684]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14988 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - checking abbreviation: ITIM
-14989 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotator  - annotateAcronymsWithFullFormEntity() - fullform of abbreviation (ITIM [begin=789; end=793]) has EntityMention: EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - 
-
-+++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++ OUTPUTTING ENTITIES +++
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G1
-EntityMention
-   sofa: _InitialView
-   begin: 4
-   end: 39
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G1"
-   head: <null>
-   mentionLevel: <null>
-
-14990 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 41
-   end: 46
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 289
-   end: 294
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG
-EntityMention
-   sofa: _InitialView
-   begin: 370
-   end: 374
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG"
-   head: <null>
-   mentionLevel: <null>
-
-14991 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG1
-EntityMention
-   sofa: _InitialView
-   begin: 428
-   end: 433
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG1"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-   
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: killer cell lectin-like receptor G2
-EntityMention
-   sofa: _InitialView
-   begin: 526
-   end: 561
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "killer cell lectin-like receptor G2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 563
-   end: 568
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14992 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: KLRG2
-EntityMention
-   sofa: _InitialView
-   begin: 679
-   end: 684
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "KLRG2"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "KLRG2"
-   head: <null>
-   mentionLevel: <null>
-
-14993 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: immunoreceptor tyrosine-based inhibitory motif
-EntityMention
-   sofa: _InitialView
-   begin: 741
-   end: 787
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "immunoreceptor tyrosine-based inhibitory motif"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: ITIM
-EntityMention
-   sofa: _InitialView
-   begin: 789
-   end: 793
-   confidence: "0.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "ITIM"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "ITIM"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHIP-1
-EntityMention
-   sofa: _InitialView
-   begin: 818
-   end: 824
-   confidence: "100.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHIP-1"
-   head: <null>
-   mentionLevel: <null>
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-
-
-14994 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: SHP-2
-EntityMention
-   sofa: _InitialView
-   begin: 829
-   end: 834
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "SHP-2"
-   head: <null>
-   mentionLevel: <null>
-
-14996 [main] DEBUG de.julielab.jules.lingpipegazetteer.GazetteerAnnotatorTest  - entity: sHP-1
-EntityMention
-   sofa: _InitialView
-   begin: 843
-   end: 848
-   confidence: "10.0"
-   componentId: "de.julielab.jules.lingpipegazetteer.GazetteerAnnotator"
-   id: <null>
-   specificType: "SHP-1"
-   ref: <null>
-   resourceEntryList: <null>
-   textualRepresentation: "sHP-1"
-   head: <null>
-   mentionLevel: <null>
-

From 281dc8ba60844d7a67884e98c9bee1590cc5ceb3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 5 Aug 2020 11:42:15 +0200
Subject: [PATCH 018/269] Removing debug output.

---
 .../lingpipegazetteer/uima/GazetteerAnnotator.java |  2 --
 .../uima/GazetteerAnnotatorTest.java               | 14 +++++++-------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index dd0c68c20..1a9220007 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -359,8 +359,6 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
         if (provider.getNormalize()) {
             normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory,
                     transliterator);
-            System.out.println(normalizedDocText.getOffsetMap());
-            System.out.println(normalizedDocText.string);
         }
 
         IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 612e8c094..7134ae3e7 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -241,7 +241,7 @@ public void testProcessWithNormalizationAndApproximateMatching() throws Exceptio
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine gazetteerAnnotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine gazetteerAnnotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_CHECK_ACRONYMS, false, GazetteerAnnotator.PARAM_OUTPUT_TYPE,
 				"de.julielab.jcore.types.OntClassMention", GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 
@@ -285,7 +285,7 @@ public void testAnnotatorWithTextNormalization()
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine annotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
 				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 		JCas jCas = annotator.newJCas();
@@ -366,7 +366,7 @@ public void testAnnotateAcronymsWithFullFormEntity() throws Exception {
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine annotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
 				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 		JCas jCas = annotator.newJCas();
@@ -440,7 +440,7 @@ public void testAnnotatorWithTextNormalizationMuh()
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine annotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
 				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 		JCas jCas = annotator.newJCas();
@@ -462,7 +462,7 @@ public void testSontesthalt() throws Exception {
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine annotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
 				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 
@@ -516,7 +516,7 @@ public void testApproximate() throws Exception {
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine annotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
 				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 
@@ -638,7 +638,7 @@ public void testReadCompressedDictionary() throws Exception {
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
 				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
 
-		AnalysisEngine annotator = AnalysisEngineFactory.createPrimitive(GazetteerAnnotator.class, tsDesc,
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
 				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
 				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
 

From c1182553fd403e03ffcf95694dc246393dae3395 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 14 Aug 2020 09:45:29 +0200
Subject: [PATCH 019/269] XMI Reader/Multiplier: Removed the jcore-all-types
 type system from the descriptors.

The completely inclusion of all types caused problems when other type systems should be included as well. This change could introduce issues at other places where now types are missing. If such cases appear we will need to introduce a new way to add type systems, for example with a no-op component that only exist for type system imports.
---
 jcore-xmi-db-reader/README.md                                 | 4 +++-
 .../jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml         | 2 +-
 .../de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml | 2 +-
 jedis-parent/pom.xml                                          | 2 +-
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/jcore-xmi-db-reader/README.md b/jcore-xmi-db-reader/README.md
index d587fa8b1..af691dee8 100644
--- a/jcore-xmi-db-reader/README.md
+++ b/jcore-xmi-db-reader/README.md
@@ -1,8 +1,10 @@
 # JCoRe XMI Database Reader
 
-**Descriptor Path**:
+**Descriptor Paths**:
 ```
 de.julielab.jcore.reader.xmi.desc.jcore-xmi-db-reader
+de.julielab.jcore.reader.xmi.desc.jcore-xmi-db-multiplier-reader
+de.julielab.jcore.reader.xmi.desc.jcore-xmi-db-multiplier
 ```
 
 ### Objective
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index 081c3d6a8..992ed962a 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -29,7 +29,7 @@
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.jcore-all-types" />
+<!--                <import name="de.julielab.jcore.types.jcore-all-types" />-->
             </imports>
         </typeSystemDescription>
         <fsIndexCollection />
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
index dd703d3d1..fb634e618 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
@@ -169,7 +169,7 @@
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.jcore-xmi-splitter-types" />
-                <import name="de.julielab.jcore.types.jcore-all-types" />
+<!--                <import name="de.julielab.jcore.types.jcore-all-types" />-->
             </imports>
         </typeSystemDescription>
         <fsIndexCollection />
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index f56a81be0..794ee5c9c 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -27,7 +27,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>jcore-xmi-splitter</artifactId>
-                <version>2.3.4</version>
+                <version>2.3.5-SNAPSHOT</version>
             </dependency>
         </dependencies>
     </dependencyManagement>

From d983891d47dd6b1623dc328221bb6859ddd2f10f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 14 Aug 2020 09:45:56 +0200
Subject: [PATCH 020/269] DBCheckpointAE: Correcting descriptors paths in the
 README.md file.

---
 jcore-db-checkpoint-ae/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/jcore-db-checkpoint-ae/README.md b/jcore-db-checkpoint-ae/README.md
index 6a4ed4f4b..a74f91d53 100644
--- a/jcore-db-checkpoint-ae/README.md
+++ b/jcore-db-checkpoint-ae/README.md
@@ -2,7 +2,8 @@
 
 **Descriptor Path**:
 ```
-de.julielab.desc.jcore-db-checkpoint-ae
+de.julielab.jcore.ae.checkpoint.desc.jcore-db-checkpoint-ae
+de.julielab.jcore.ae.checkpoint.desc.jcore-db-checkpoint-consumer
 ```
 
 This is a JeDiS[1] component. It can be used to set the 'last component' column in a subset table. This help to keep track of the pipeline status.

From 140d91d185387e96383be0fc143dc8c0df2c44d0 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 18 Aug 2020 08:22:20 +0200
Subject: [PATCH 021/269] Neo4jRelationsConsumer: Not sending empty relation
 documents.

---
 .../sharedresources/AbstractMapProvider.java  |  2 +-
 .../Neo4jRelationsConsumer.java               | 74 ++++++++++---------
 2 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
index 6491627cf..fdc15aaa1 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
@@ -49,7 +49,7 @@ public void load(DataResource aData) throws ResourceInitializationException {
                     map.put(getKey(split[0]), getValue(split[1]));
             }
             log.info("Finished reading resource {}", aData.getUri());
-            log.info("Copying {} values into a fresh HashMap of the exactly correct size", map.size());
+            log.info("Copying {} values into a fresh HashMap of the exact correct size", map.size());
             HashMap<K, V> tmp = new HashMap<>(map.size(), 1f);
             tmp.putAll(map);
             map = tmp;
diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 7ff69f9f8..0a1aaafff 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -157,45 +157,47 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
 
     private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
         try {
-            URL url = URI.create(this.url).toURL();
-            HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
-            urlConnection.addRequestProperty("Content-Type", "application/json");
-            String authorizationToken = neo4jUser != null && neo4jPassword != null
-                    ? "Basic " + Base64.encodeBase64URLSafeString((neo4jUser + ":" + neo4jPassword).getBytes())
-                    : null;
-            if (authorizationToken != null)
-                urlConnection.setRequestProperty("Authorization", authorizationToken);
-            urlConnection.setRequestMethod(HttpMethod.POST);
-            urlConnection.setDoOutput(true);
-            try (OutputStream outputStream = urlConnection.getOutputStream()) {
-                JsonFactory jf = new JsonFactory(om);
-                JsonGenerator g = jf.createGenerator(outputStream);
-                g.writeStartObject();
-                g.writeObjectField(ImportIERelations.NAME_ID_PROPERTY, idProperty);
-                g.writeObjectField(ImportIERelations.NAME_ID_SOURCE, globalSource);
-
-                List<ImportIERelationDocument> documents = importIERelations.getDocuments();
-                g.writeFieldName(ImportIERelations.NAME_DOCUMENTS);
-                g.writeStartArray();
-                log.debug("Converting {} relation documents to JSON.", documents.size());
-                for (ImportIERelationDocument document : (Iterable<ImportIERelationDocument>) documents::iterator) {
-                    g.writeObject(document);
+            if (!importIERelations.getDocuments().isEmpty()) {
+                URL url = URI.create(this.url).toURL();
+                HttpURLConnection urlConnection = (HttpURLConnection) url.openConnection();
+                urlConnection.addRequestProperty("Content-Type", "application/json");
+                String authorizationToken = neo4jUser != null && neo4jPassword != null
+                        ? "Basic " + Base64.encodeBase64URLSafeString((neo4jUser + ":" + neo4jPassword).getBytes())
+                        : null;
+                if (authorizationToken != null)
+                    urlConnection.setRequestProperty("Authorization", authorizationToken);
+                urlConnection.setRequestMethod(HttpMethod.POST);
+                urlConnection.setDoOutput(true);
+                try (OutputStream outputStream = urlConnection.getOutputStream()) {
+                    JsonFactory jf = new JsonFactory(om);
+                    JsonGenerator g = jf.createGenerator(outputStream);
+                    g.writeStartObject();
+                    g.writeObjectField(ImportIERelations.NAME_ID_PROPERTY, idProperty);
+                    g.writeObjectField(ImportIERelations.NAME_ID_SOURCE, globalSource);
+
+                    List<ImportIERelationDocument> documents = importIERelations.getDocuments();
+                    g.writeFieldName(ImportIERelations.NAME_DOCUMENTS);
+                    g.writeStartArray();
+                    log.debug("Converting {} relation documents to JSON.", documents.size());
+                    for (ImportIERelationDocument document : (Iterable<ImportIERelationDocument>) documents::iterator) {
+                        g.writeObject(document);
+                    }
+                    g.writeEndArray();
+                    g.writeEndObject();
+                    g.close();
                 }
-                g.writeEndArray();
-                g.writeEndObject();
-                g.close();
-            }
-            try (InputStream inputStream = urlConnection.getInputStream()) {
-                log.debug("Response from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
-            } catch (IOException e) {
-                log.error("Exception occurred while sending relation data to Neo4j server.");
-                try (InputStream inputStream = urlConnection.getErrorStream()) {
-                    if (inputStream != null)
-                        log.error("Error from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+                try (InputStream inputStream = urlConnection.getInputStream()) {
+                    log.debug("Response from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+                } catch (IOException e) {
+                    log.error("Exception occurred while sending relation data to Neo4j server.");
+                    try (InputStream inputStream = urlConnection.getErrorStream()) {
+                        if (inputStream != null)
+                            log.error("Error from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+                    }
+                    throw e;
                 }
-                throw e;
+                importIERelations.clear();
             }
-            importIERelations.clear();
             log.debug("Releasing {} document IDs that have successfully been sent to Neo4j", documentIds.size());
             DocumentReleaseCheckpoint.get().release(Neo4jRelationsConsumer.class.getCanonicalName(), documentIds.stream());
             documentIds.clear();

From 23265383355a0fde824ba41ac733f7115d052863 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Sep 2020 14:05:01 +0200
Subject: [PATCH 022/269] Adding a error log message to the FlairNerAnnotator.

---
 .../de/julielab/jcore/ae/flairner/FlairNerAnnotator.java   | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 4aea01797..f09332fd0 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -140,7 +140,12 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
             for (TaggedEntity entity : taggedEntities) {
                 final Sentence sentence = sentenceMap.get(entity.getDocumentId());
                 EntityMention em = (EntityMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityClass);
-                helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
+                try {
+                    helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
+                } catch (AnnotationOffsetException e) {
+                    log.error("Cannot add entity {} to sentence: {}", entity, sentence.getCoveredText());
+                    throw e;
+                }
                 em.setSpecificType(entity.getTag());
                 em.setConfidence(String.valueOf(entity.getLabelConfidence()));
                 em.setComponentId(componentId);

From e89dae61eb1674bbfc8c8a85fd9ace67fce1c3db Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Sep 2020 14:36:37 +0200
Subject: [PATCH 023/269] Flair NER AE: Token offset issue fix with flair 0.6.

We now explicitly use the SpaceTokenizer when creating a flair Sentence to make sure that the given tokenization is employed.
---
 .../jcore/ae/annotationadder/AnnotationAdderHelper.java   | 8 +++++++-
 .../de/julielab/jcore/ae/flairner/FlairNerAnnotator.java  | 7 +------
 .../de/julielab/jcore/ae/flairner/python/nerScript.py     | 6 ++++--
 .../julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java | 2 +-
 4 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
index 831ecb280..97a2d8447 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
@@ -8,6 +8,8 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -15,11 +17,13 @@
 import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 /**
  * Caches information for the current document.
  */
 public class AnnotationAdderHelper {
+    private final static Logger log = LoggerFactory.getLogger(AnnotationAdderHelper.class);
     // Required for token-offsets
     private List<Token> tokenList;
     private Map<Sentence, List<Token>> tokensBySentences;
@@ -68,8 +72,10 @@ public void setAnnotationOffsetsRelativeToSentence(Sentence sentence, Annotation
             List<Token> tokenList = tokensBySentences.get(sentence);
             int startTokenNum = a.getStart();
             int endTokenNum = a.getEnd();
-            if (startTokenNum < 1 || startTokenNum > tokenList.size())
+            if (startTokenNum < 1 || startTokenNum > tokenList.size()) {
+                log.error("Cannot create entity because of a token offset mismatch. The entity should tart at token {} and end at {}. But there are only {} tokens available: {}", startTokenNum, endTokenNum, tokenList.size(), tokenList.stream().map(Annotation::getCoveredText).collect(Collectors.joining(" ")));
                 throw new AnnotationOffsetException("The current annotation to add to the CAS starts at token " + startTokenNum + " which does not fit to the range of tokens in the sentence with ID " + sentence.getId() + " which is 1 - " + tokenList.size());
+            }
             if (endTokenNum < 1 || endTokenNum > tokenList.size())
                 throw new AnnotationOffsetException("The current annotation to add to the CAS ends at token " + endTokenNum + " which does not fit to the range of tokens in the sentence with ID " + sentence.getId() + " which is 1 - " + tokenList.size());
             if (endTokenNum < startTokenNum)
diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index f09332fd0..4aea01797 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -140,12 +140,7 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
             for (TaggedEntity entity : taggedEntities) {
                 final Sentence sentence = sentenceMap.get(entity.getDocumentId());
                 EntityMention em = (EntityMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityClass);
-                try {
-                    helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
-                } catch (AnnotationOffsetException e) {
-                    log.error("Cannot add entity {} to sentence: {}", entity, sentence.getCoveredText());
-                    throw e;
-                }
+                helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
                 em.setSpecificType(entity.getTag());
                 em.setConfidence(String.valueOf(entity.getLabelConfidence()));
                 em.setComponentId(componentId);
diff --git a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py
index d55859594..e405ea93b 100644
--- a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py
+++ b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py
@@ -4,6 +4,7 @@
 import torch
 from flair.data import Sentence
 from flair.models import SequenceTagger
+from flair.tokenization import SpaceTokenizer
 from struct import *
 
 
@@ -43,9 +44,10 @@ def decodeString(buffer):
     ba = bytearray()
     for sentenceToTag in sentenceTaggingRequests:
         sid      = sentenceToTag['sid']
-        sentence = Sentence(sentenceToTag['text'])
+        # Use the SpaceTokenizer to just use the tokenization given from UIMA
+        sentence = Sentence(sentenceToTag['text'], use_tokenizer=SpaceTokenizer())
         # NER tagging
-        embeddingStorageMode = "none" if sendEmbeddings == "NONE" else "cpu";
+        embeddingStorageMode = "none" if sendEmbeddings == "NONE" else "cpu"
         tagger.predict(sentence, embedding_storage_mode = embeddingStorageMode)
 
         for e in sentence.get_spans("ner"):
diff --git a/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java b/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java
index 2317e08e9..9c5171fd6 100644
--- a/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java
+++ b/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java
@@ -240,7 +240,7 @@ public void testAnnotator2() throws Exception {
     }
 
     @Test
-    public void testAnnotatorOnOffsetIsseDocument() throws Exception {
+    public void testAnnotatorOnOffsetIssueDocument() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
 

From c82db9a47632fb5b926cb8865ef2a93ced39a491 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Sep 2020 15:09:53 +0200
Subject: [PATCH 024/269] Adapting the flair NER script to still support flair
 0.4x apart from newer versions.

---
 .../de/julielab/jcore/ae/flairner/python/nerScript.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py
index e405ea93b..f37fdab4a 100644
--- a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py
+++ b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/python/nerScript.py
@@ -4,7 +4,6 @@
 import torch
 from flair.data import Sentence
 from flair.models import SequenceTagger
-from flair.tokenization import SpaceTokenizer
 from struct import *
 
 
@@ -44,8 +43,14 @@ def decodeString(buffer):
     ba = bytearray()
     for sentenceToTag in sentenceTaggingRequests:
         sid      = sentenceToTag['sid']
-        # Use the SpaceTokenizer to just use the tokenization given from UIMA
-        sentence = Sentence(sentenceToTag['text'], use_tokenizer=SpaceTokenizer())
+        # In newer flair versions we need to specify the tokenizer in order to use
+        # the exact input tokenization and avoid token offset mismatches
+        if "0.4" in flair.__version__:
+            sentence = Sentence(sentenceToTag['text'])
+        else:
+            from flair.tokenization import SpaceTokenizer
+            # Use the SpaceTokenizer to just use the tokenization given from UIMA
+            sentence = Sentence(sentenceToTag['text'], use_tokenizer=SpaceTokenizer())
         # NER tagging
         embeddingStorageMode = "none" if sendEmbeddings == "NONE" else "cpu"
         tagger.predict(sentence, embedding_storage_mode = embeddingStorageMode)

From 3bdb8c7f7ad9ba62c16fc8060fc640e843cb3b80 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Sep 2020 15:31:20 +0200
Subject: [PATCH 025/269] Updating BioC TextMining API to v1.0.3.

---
 jcore-ign-reader/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-ign-reader/pom.xml b/jcore-ign-reader/pom.xml
index df7d561d4..423a3fbce 100644
--- a/jcore-ign-reader/pom.xml
+++ b/jcore-ign-reader/pom.xml
@@ -17,7 +17,7 @@
         <dependency>
             <groupId>com.pengyifan.bioc</groupId>
             <artifactId>pengyifan-bioc</artifactId>
-            <version>1.0.2</version>
+            <version>1.0.3</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>

From 84477bf167f74f80306fd0cfe64c8eb3fdbd3221 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 14 Oct 2020 13:22:06 +0200
Subject: [PATCH 026/269] PMC Reader: Had issues with file paths containing
 XML-style escaped characters. Now applying XML-unescaping on the input URLs.

---
 jcore-flair-ner-ae/pom.xml                         |  2 +-
 jcore-pmc-reader/pom.xml                           |  5 +++++
 .../de/julielab/jcore/reader/pmc/CasPopulator.java |  5 ++---
 .../julielab/jcore/reader/pmc/NXMLURIIterator.java |  3 ++-
 .../jcore/reader/pmc/NXMLURIIteratorTest.java      | 14 ++++++++++++++
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/jcore-flair-ner-ae/pom.xml b/jcore-flair-ner-ae/pom.xml
index 9ad39de20..f608f17a3 100644
--- a/jcore-flair-ner-ae/pom.xml
+++ b/jcore-flair-ner-ae/pom.xml
@@ -21,7 +21,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>java-stdio-ipc</artifactId>
-            <version>1.0.1</version>
+            <version>1.0.2</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-pmc-reader/pom.xml b/jcore-pmc-reader/pom.xml
index 976a1b456..8325af177 100644
--- a/jcore-pmc-reader/pom.xml
+++ b/jcore-pmc-reader/pom.xml
@@ -14,6 +14,11 @@
     </parent>
 
     <dependencies>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-text</artifactId>
+            <version>1.9</version>
+        </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 481e4db4c..ff3a1e0f0 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -29,9 +29,8 @@ public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException {
                 nxmlDocumentParser.reset(currentUri, cas);
                 result = nxmlDocumentParser.parse();
             } catch (DocumentParsingException e) {
-                log.warn("Error occurred: {}. Skipping document.", e.getMessage());
-                if (nxmlIterator.hasNext())
-                    currentUri = nxmlIterator.next();
+                log.warn("Error occurred when trying to read from URI {} (ASCII string: {}): {}. Skipping document.", currentUri, currentUri.toASCIIString(), e.getMessage());
+                currentUri = nxmlIterator.next();
             }
         }
         StringBuilder sb = populateCas(result, new StringBuilder());
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
index 02b5d7feb..5ef2dbe94 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
@@ -1,5 +1,6 @@
 package de.julielab.jcore.reader.pmc;
 
+import org.apache.commons.text.StringEscapeUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -94,7 +95,7 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
                     while (entries.hasMoreElements()) {
                         final ZipEntry e = entries.nextElement();
                         if (!e.isDirectory() && e.getName().contains(".nxml") && isInWhitelist(new File(e.getName()))) {
-                            final String urlStr = "jar:" + directory.toURI().toString() + "!/" + e.getName();
+                            final String urlStr = StringEscapeUtils.unescapeXml("jar:" + directory.toURI().toString() + "!/" + e.getName());
                             URL url = new URL(urlStr);
                             try {
                                 final URI uri = url.toURI();
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
index 14faf27df..df967924b 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
@@ -1,16 +1,20 @@
 package de.julielab.jcore.reader.pmc;
 
+import org.apache.commons.text.StringEscapeUtils;
 import org.junit.Test;
 
 import java.io.File;
 import java.io.FileNotFoundException;
+import java.net.MalformedURLException;
 import java.net.URI;
+import java.net.URL;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatCode;
 import static org.junit.Assert.assertTrue;
 
 public class NXMLURIIteratorTest {
@@ -53,4 +57,14 @@ public void testGetPmcFiles() throws Exception {
         assertThat(expectedFiles).containsExactlyInAnyOrder("PMC2847692.nxml.gz", "PMC2758189.nxml.gz",
                 "PMC2970367.nxml.gz", "PMC3201365.nxml.gz", "PMC4257438.nxml.gz");
     }
+
+    @Test
+    public void testXmlEntities() throws MalformedURLException {
+        String s = "jar:file:/data/data_corpora/PMC/non_comm_use.O-Z.xml.zip!/P&#x000e4;diatrische_Gastroenterologie,_Hepatologie_und_Ern&#x000e4;hrung/PMC7498810.nxml";
+        s = StringEscapeUtils.unescapeXml(s);
+        assertThat(s).doesNotContain("&#x000e4;");
+        URL url = new URL(s);
+        assertThat(url).isNotNull();
+        assertThatCode(() -> url.toURI()).doesNotThrowAnyException();
+    }
 }

From 7685201d7ca91f5143b3fa91596f565bbd9dc7d0 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 14 Oct 2020 13:40:06 +0200
Subject: [PATCH 027/269] Now only falling back to XML unescaping if the first
 try throw an exception.

This is a safety measure to avoid other issues by always doing the unescaping.
---
 .../jcore/reader/pmc/NXMLURIIterator.java      | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
index 5ef2dbe94..652e9db17 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
@@ -95,7 +95,7 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
                     while (entries.hasMoreElements()) {
                         final ZipEntry e = entries.nextElement();
                         if (!e.isDirectory() && e.getName().contains(".nxml") && isInWhitelist(new File(e.getName()))) {
-                            final String urlStr = StringEscapeUtils.unescapeXml("jar:" + directory.toURI().toString() + "!/" + e.getName());
+                            final String urlStr ="jar:" + directory.toURI().toString() + "!/" + e.getName();
                             URL url = new URL(urlStr);
                             try {
                                 final URI uri = url.toURI();
@@ -106,7 +106,21 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
                                 logFileSearch.error("Putting URI for URL {} into the queue was interrupted", url);
                                 throw new UncheckedPmcReaderException(e1);
                             } catch (URISyntaxException e1) {
-                                logFileSearch.error("Could not convert URL {} to URI.", url, e);
+                                // This exception can happen when the path contains XML escaped characters, e.g.
+                                // non_comm_use.O-Z.xml.zip!/P&#x000e4;diatrische_Gastroenterologie,_Hepatologie_und_Ern&#x000e4;hrung/PMC7498810.nxml
+                                // Try to unescape it.
+                                try {
+                                    url = new URL(StringEscapeUtils.unescapeXml(urlStr));
+                                    final URI uri = url.toURI();
+                                    logFileSearch.trace("Waiting to put URI {} into queue", uri);
+                                    uris.put(uri);
+                                    logFileSearch.trace("Successfully put URI {} into queue", uri);
+                                } catch (URISyntaxException e2) {
+                                    logFileSearch.error("Could not convert URL {} to URI.", url, e);
+                                } catch (InterruptedException e2) {
+                                    logFileSearch.error("Putting URI for URL {} into the queue was interrupted", url);
+                                    throw new UncheckedPmcReaderException(e2);
+                                }
                             }
                         }
                     }

From d401ae110d9a1a21bf047cb6ada9ea9af535db76 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 15 Oct 2020 11:51:18 +0200
Subject: [PATCH 028/269] Now handling the issue correctly: By URLEncoding the
 file path.

---
 jcore-pmc-reader/pom.xml                      |  5 ---
 .../jcore/reader/pmc/NXMLURIIterator.java     | 39 +++++++++----------
 .../reader/pmc/parser/NxmlDocumentParser.java |  5 +--
 .../jcore/reader/pmc/NXMLURIIteratorTest.java | 22 ++++++-----
 4 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/jcore-pmc-reader/pom.xml b/jcore-pmc-reader/pom.xml
index 8325af177..976a1b456 100644
--- a/jcore-pmc-reader/pom.xml
+++ b/jcore-pmc-reader/pom.xml
@@ -14,11 +14,6 @@
     </parent>
 
     <dependencies>
-        <dependency>
-            <groupId>org.apache.commons</groupId>
-            <artifactId>commons-text</artifactId>
-            <version>1.9</version>
-        </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
             <artifactId>slf4j-api</artifactId>
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
index 652e9db17..7aa245057 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
@@ -1,6 +1,5 @@
 package de.julielab.jcore.reader.pmc;
 
-import org.apache.commons.text.StringEscapeUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -10,15 +9,19 @@
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.net.URL;
+import java.net.URLEncoder;
 import java.nio.file.Path;
 import java.util.*;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.CompletableFuture;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 public class NXMLURIIterator implements Iterator<URI> {
     private final static Logger log = LoggerFactory.getLogger(NXMLURIIterator.class);
     private final static Logger logFileSearch = LoggerFactory.getLogger(NXMLURIIterator.class.getCanonicalName() + ".FileSearch");
@@ -84,49 +87,45 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
                         throw new UncheckedPmcReaderException(e);
                     }
                 }
+                // Save the subdirectories and potentially ZIP files for a recursive reading call further below
                 Stream.of(directory.listFiles(f -> f.isDirectory())).forEach(pendingSubdirs::push);
                 if (searchZip)
                     Stream.of(directory.listFiles(f -> f.isFile() && isZipFile(f))).forEach(pendingSubdirs::push);
+                logFileSearch.trace("Added subdirectories and/or ZIP files to the list of pending directories and archives. There are now {} pending.", pendingSubdirs.size());
             } else if (searchZip && isZipFile(directory)) {
                 logFileSearch.debug("Identified {} as a ZIP archive, retrieving its inventory", directory);
                 logFileSearch.debug("Searching ZIP archive {} for eligible documents", directory);
                 try (ZipFile zf = new ZipFile(directory)) {
                     final Enumeration<? extends ZipEntry> entries = zf.entries();
+                    int numEntries = 0;
                     while (entries.hasMoreElements()) {
                         final ZipEntry e = entries.nextElement();
                         if (!e.isDirectory() && e.getName().contains(".nxml") && isInWhitelist(new File(e.getName()))) {
-                            final String urlStr ="jar:" + directory.toURI().toString() + "!/" + e.getName();
-                            URL url = new URL(urlStr);
+                            final String urlStr = "jar:" + directory.toURI().toString() + "!/" + e.getName();
+                            int exclamationIndex = urlStr.indexOf('!');
+                            final String urlEncodedStr = urlStr.substring(0, exclamationIndex + 2) + Stream.of(urlStr.substring(exclamationIndex + 2).split("/")).map(x -> URLEncoder.encode(x, UTF_8)).collect(Collectors.joining("/"));
+                            URL url = new URL(urlEncodedStr);
                             try {
                                 final URI uri = url.toURI();
                                 logFileSearch.trace("Waiting to put URI {} into queue", uri);
                                 uris.put(uri);
-                                logFileSearch.trace("Successfully put URI {} into queue", uri);
+                                ++numEntries;
+                                logFileSearch.trace("Successfully put URI {} into queue. Queue size: {}", uri, uris.size());
                             } catch (InterruptedException e1) {
                                 logFileSearch.error("Putting URI for URL {} into the queue was interrupted", url);
                                 throw new UncheckedPmcReaderException(e1);
                             } catch (URISyntaxException e1) {
-                                // This exception can happen when the path contains XML escaped characters, e.g.
-                                // non_comm_use.O-Z.xml.zip!/P&#x000e4;diatrische_Gastroenterologie,_Hepatologie_und_Ern&#x000e4;hrung/PMC7498810.nxml
-                                // Try to unescape it.
-                                try {
-                                    url = new URL(StringEscapeUtils.unescapeXml(urlStr));
-                                    final URI uri = url.toURI();
-                                    logFileSearch.trace("Waiting to put URI {} into queue", uri);
-                                    uris.put(uri);
-                                    logFileSearch.trace("Successfully put URI {} into queue", uri);
-                                } catch (URISyntaxException e2) {
-                                    logFileSearch.error("Could not convert URL {} to URI.", url, e);
-                                } catch (InterruptedException e2) {
-                                    logFileSearch.error("Putting URI for URL {} into the queue was interrupted", url);
-                                    throw new UncheckedPmcReaderException(e2);
-                                }
+                                logFileSearch.error("Could not convert URL {} to URI.", url, e);
+                                throw new UncheckedPmcReaderException(e1);
                             }
                         }
                     }
+                    logFileSearch.trace("Finished retrieving files from ZIP archive {}. {} eligible documents were read.", directory, numEntries);
                 } catch (IOException e) {
                     logFileSearch.error("Could not read from {}", directory);
                     throw new UncheckedPmcReaderException(e);
+                } catch (Throwable t) {
+                    logFileSearch.error("Unexpected error:", t);
                 }
             } else {
                 logFileSearch.debug("Recursive search is deactivated, skipping subdirectory {}", directory);
@@ -179,7 +178,7 @@ private boolean isInWhitelist(File file) {
     private boolean isInWhitelist(String name) {
         boolean inWhitelist = whitelist.contains(name) || (whitelist.size() == 1 && whitelist.contains("all"));
         if (!inWhitelist)
-            log.trace("Skipping document with name/id {} because it is not contained in the white list.", name);
+            logFileSearch.trace("Skipping document with name/id {} because it is not contained in the white list.", name);
         return inWhitelist;
     }
 
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
index d85e133c2..069d038f1 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
@@ -40,16 +40,15 @@ public class NxmlDocumentParser extends NxmlParser {
     private DefaultElementParser defaultElementParser;
     private Map<String, Map<String, Object>> tagProperties;
     private Tagset tagset;
-    private URI uri;
 
     public void reset(File nxmlFile, JCas cas) throws DocumentParsingException {
         reset(nxmlFile.toURI(), cas);
     }
 
     public void reset(URI uri, JCas cas) throws DocumentParsingException {
-        this.uri = uri;
-        boolean gzipped = uri.toString().endsWith(".gz") || this.uri.toString().endsWith(".gzip");
+        boolean gzipped = uri.toString().endsWith(".gz") || uri.toString().endsWith(".gzip");
         try {
+            log.debug("Reading from URL {}", uri.toURL());
             InputStream is = uri.toURL().openStream();
             if (gzipped)
                 is = new GZIPInputStream(is);
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
index df967924b..8c328c2ac 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
@@ -1,18 +1,18 @@
 package de.julielab.jcore.reader.pmc;
 
-import org.apache.commons.text.StringEscapeUtils;
 import org.junit.Test;
 
 import java.io.File;
 import java.io.FileNotFoundException;
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URL;
+import java.net.*;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
 import static org.junit.Assert.assertTrue;
@@ -59,12 +59,14 @@ public void testGetPmcFiles() throws Exception {
     }
 
     @Test
-    public void testXmlEntities() throws MalformedURLException {
-        String s = "jar:file:/data/data_corpora/PMC/non_comm_use.O-Z.xml.zip!/P&#x000e4;diatrische_Gastroenterologie,_Hepatologie_und_Ern&#x000e4;hrung/PMC7498810.nxml";
-        s = StringEscapeUtils.unescapeXml(s);
-        assertThat(s).doesNotContain("&#x000e4;");
-        URL url = new URL(s);
+    public void testXmlEntities() throws MalformedURLException, URISyntaxException {
+        String inputPath = "jar:file:/data/data_corpora/PMC/non_comm_use.O-Z.xml.zip!/P&#x000e4;diatrische_Gastroenterologie,_Hepatologie_und_Ern&#x000e4;hrung/PMC7498810.nxml";
+        int exclamationIndex = inputPath.indexOf('!');
+        String encoded = inputPath.substring(0, exclamationIndex + 2) + Stream.of(inputPath.substring(exclamationIndex+2).split("/")).map(x -> URLEncoder.encode(x, UTF_8)).collect(Collectors.joining("/"));
+        URL url = new URL(encoded);
         assertThat(url).isNotNull();
-        assertThatCode(() -> url.toURI()).doesNotThrowAnyException();
+        assertThatCode(() -> url.toURI().toASCIIString()).doesNotThrowAnyException();
+        String outputPath = Stream.of(url.toURI().toASCIIString().split("/")).map(x -> URLDecoder.decode(x, UTF_8)).collect(Collectors.joining("/"));
+        assertThat(inputPath).isEqualTo(outputPath);
     }
 }

From d4739984e1a7974291818a60b8acb028ea2181b1 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 20 Oct 2020 09:59:22 +0200
Subject: [PATCH 029/269] `JCoReOverlapAnnotationIndex`: When searching,
 returning a list instead of a stream from that list. Added getters for the
 internal index lists. Fixes #117.

---
 .../index/JCoReOverlapAnnotationIndex.java    | 286 +++++++++---------
 1 file changed, 141 insertions(+), 145 deletions(-)

diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndex.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndex.java
index ea919ae06..7a44dedee 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndex.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndex.java
@@ -1,11 +1,10 @@
-/** 
- * 
+/**
  * Copyright (c) 2017, JULIE Lab.
- * All rights reserved. This program and the accompanying materials 
+ * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the BSD-2-Clause License
- *
- * Author: 
- * 
+ * <p>
+ * Author:
+ * <p>
  * Description:
  **/
 package de.julielab.jcore.utility.index;
@@ -19,7 +18,6 @@
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
-import java.util.stream.Stream;
 
 /**
  * <p>
@@ -46,145 +44,143 @@
  * elements. Those are - in the case a lies in the middle of the index elements
  * - n/2.
  * </p>
- * 
- * @author faessler
  *
- * @param <E>
- *            The annotation type the index should be over.
+ * @param <E> The annotation type the index should be over.
+ * @author faessler
  */
 public class JCoReOverlapAnnotationIndex<E extends Annotation> implements JCoReAnnotationIndex<E> {
-	private List<E> beginIndex;
-	private List<E> endIndex;
-	private boolean frozen;
-
-	public JCoReOverlapAnnotationIndex() {
-		beginIndex = new ArrayList<>();
-		endIndex = new ArrayList<>();
-	}
-
-	public JCoReOverlapAnnotationIndex(JCas jcas, int type) {
-		this(jcas, jcas.getCasType(type));
-	}
-
-	public JCoReOverlapAnnotationIndex(JCas jcas, Type type) {
-		this();
-		index(jcas, type);
-		freeze();
-	}
-
-	/**
-	 * Indexes the whole contents of the CAS annotation index of type
-	 * <tt>type</tt>. For each annotation, the {@link #indexTermGenerator} is
-	 * used to create terms with which the annotation will be associated in the
-	 * index and can be retrieved by a <code>search</code> method.
-	 * 
-	 * @param jCas
-	 *            A CAS instance.
-	 * @param type
-	 *            The annotation type to index.
-	 */
-	public void index(JCas jCas, int type) {
-		index(jCas, jCas.getCasType(type));
-	}
-
-	/**
-	 * Indexes the whole contents of the CAS annotation index of type
-	 * <tt>type</tt>. For each annotation, the {@link #indexTermGenerator} is
-	 * used to create terms with which the annotation will be associated in the
-	 * index and can be retrieved by a <code>search</code> method.
-	 * 
-	 * @param jCas
-	 *            A CAS instance.
-	 * @param type
-	 *            The annotation type to index.
-	 */
-	@SuppressWarnings("unchecked")
-	public void index(JCas jCas, Type type) {
-		FSIterator<Annotation> it = jCas.getAnnotationIndex(type).iterator();
-		while (it.hasNext()) {
-			Annotation annotation = (Annotation) it.next();
-			index((E) annotation);
-		}
-	}
-
-	public void index(E annotation) {
-		if (frozen)
-			throw new IllegalStateException("This index is frozen and cannot except further items.");
-		beginIndex.add(annotation);
-		endIndex.add(annotation);
-	}
-
-	public void freeze() {
-		frozen = true;
-		Collections.sort(beginIndex, Comparators.beginOffsetComparator());
-		Collections.sort(endIndex, Comparators.endOffsetComparator());
-	}
-
-	/**
-	 * Returns all annotation in the index overlapping in any way with a
-	 * (embedded, covering, partial overlappings). The resulting list is either
-	 * sorted by begin or end offset. It is not easily predictable which case it
-	 * is (could be added as a return value if that would be useful in any way).
-	 * 
-	 * @param a
-	 *            The annotation to retrieve overlapping annotations from the
-	 *            index for.
-	 * @return All annotations in the index overlapping a.
-	 */
-	public <T extends Annotation> Stream<E> search(T a) {
-		if (!frozen)
-			throw new IllegalStateException(
-					"This index is not frozen and cannot be used yet. Freeze the index before searching.");
-		if (beginIndex.isEmpty())
-			return Stream.empty();
-		// The following is rather difficult to understand from the code. The
-		// idea is the following:
-		// We search annotations overlapping with a. Thus, we can rule out those
-		// annotations that end before a or start after a.
-		// In the next 4 lines, we determine how many annotations can be ruled
-		// out because they start after a and how many end before a.
-		int begin = a.getBegin();
-		int end = a.getEnd();
-		int indexBeginAfterEnd = insertionPoint(JCoReTools.binarySearch(beginIndex, an -> an.getBegin(), end));
-		int indexEndBeforeBegin = insertionPoint(JCoReTools.binarySearch(endIndex, an -> an.getEnd(), begin));
-
-		// Depending on which case rules out more annotations - ending before a
-		// or starting after a - we look at the case that leaves us with the
-		// fewest annotations. If those were the annotations that started after
-		// a, then we keep those that start before a ends. Those are than
-		// filtered for annotations that end before a starts.
-		if (indexBeginAfterEnd < endIndex.size() - indexEndBeforeBegin) {
-			List<E> beginBeforeEnd = new ArrayList<>(beginIndex.subList(0, indexBeginAfterEnd));
-			ArrayList<E> result = new ArrayList<>();
-			for (E e : beginBeforeEnd) {
-				if (e.getEnd() > begin)
-					result.add(e);
-			}
-			return result.stream();
-		} else {
-			List<E> endAfterBegin = new ArrayList<>(endIndex.subList(indexEndBeforeBegin, endIndex.size()));
-			ArrayList<E> result = new ArrayList<>();
-			for (E e : endAfterBegin) {
-				if (e.getBegin() < end)
-					result.add(e);
-			}
-			return result.stream();
-		}
-	}
-
-	private int insertionPoint(int i) {
-		return i < 0 ? -(i + 1) : i;
-	}
-
-	/**
-	 * Un-freeze the index to allow new elements to be added.
-	 */
-	public void melt() {
-		frozen = false;
-	}
-
-	@Override
-	public void add(E a) {
-		index(a);
-	}
+    private List<E> beginIndex;
+    private List<E> endIndex;
+    private boolean frozen;
+
+    public JCoReOverlapAnnotationIndex() {
+        beginIndex = new ArrayList<>();
+        endIndex = new ArrayList<>();
+    }
+
+    public JCoReOverlapAnnotationIndex(JCas jcas, int type) {
+        this(jcas, jcas.getCasType(type));
+    }
+
+    public JCoReOverlapAnnotationIndex(JCas jcas, Type type) {
+        this();
+        index(jcas, type);
+        freeze();
+    }
+
+    public void index(JCas jCas, int type) {
+        index(jCas, jCas.getCasType(type));
+    }
+
+    public void index(JCas jCas, Type type) {
+        FSIterator<Annotation> it = jCas.getAnnotationIndex(type).iterator();
+        while (it.hasNext()) {
+            Annotation annotation = it.next();
+            index((E) annotation);
+        }
+    }
+
+    public void index(E annotation) {
+        if (frozen)
+            throw new IllegalStateException("This index is frozen and cannot accept further items.");
+        beginIndex.add(annotation);
+        endIndex.add(annotation);
+    }
+
+    public void freeze() {
+        frozen = true;
+        Collections.sort(beginIndex, Comparators.beginOffsetComparator());
+        Collections.sort(endIndex, Comparators.endOffsetComparator());
+    }
+
+    /**
+     * Returns all annotation in the index overlapping in any way with a
+     * (embedded, covering, partial overlappings). The resulting list is either
+     * sorted by begin or end offset. It is not easily predictable which case it
+     * is (could be added as a return value if that would be useful in any way).
+     *
+     * @param a The annotation to retrieve overlapping annotations from the
+     *          index for.
+     * @return All annotations in the index overlapping a.
+     */
+    public <T extends Annotation> List<E> search(T a) {
+        if (!frozen)
+            throw new IllegalStateException(
+                    "This index is not frozen and cannot be used yet. Freeze the index before searching.");
+        if (beginIndex.isEmpty())
+            return Collections.emptyList();
+        // The following is rather difficult to understand from the code. The
+        // idea is the following:
+        // We search annotations overlapping with a. Thus, we can rule out those
+        // annotations that end before a or start after a.
+        // In the next 4 lines, we determine how many annotations can be ruled
+        // out because they start after a and how many end before a.
+        int begin = a.getBegin();
+        int end = a.getEnd();
+        int indexBeginAfterEnd = insertionPoint(JCoReTools.binarySearch(beginIndex, an -> an.getBegin(), end));
+        int indexEndBeforeBegin = insertionPoint(JCoReTools.binarySearch(endIndex, an -> an.getEnd(), begin));
+
+        // Depending on which case rules out more annotations - ending before a
+        // or starting after a - we look at the case that leaves us with the
+        // fewest annotations. If those were the annotations that started after
+        // a, then we keep those that start before a ends. Those are than
+        // filtered for annotations that end before a starts.
+        if (indexBeginAfterEnd < endIndex.size() - indexEndBeforeBegin) {
+            List<E> beginBeforeEnd = new ArrayList<>(beginIndex.subList(0, indexBeginAfterEnd));
+            List<E> result = new ArrayList<>();
+            for (E e : beginBeforeEnd) {
+                if (e.getEnd() > begin)
+                    result.add(e);
+            }
+            return result;
+        } else {
+            List<E> endAfterBegin = new ArrayList<>(endIndex.subList(indexEndBeforeBegin, endIndex.size()));
+            List<E> result = new ArrayList<>();
+            for (E e : endAfterBegin) {
+                if (e.getBegin() < end)
+                    result.add(e);
+            }
+            return result;
+        }
+    }
+
+    private int insertionPoint(int i) {
+        return i < 0 ? -(i + 1) : i;
+    }
+
+    /**
+     * Un-freeze the index to allow new elements to be added.
+     */
+    public void melt() {
+        frozen = false;
+    }
+
+    @Override
+    public void add(E a) {
+        index(a);
+    }
+
+    /**
+     * <p>Returns the internal list where the indexed annotations are sorted by begin offset. External changes to
+     * this list might break the index.</p>
+     *
+     * @return The indexed annotations sorted bei their begin offset.
+     */
+    public List<E> getBeginIndex() {
+        return beginIndex;
+    }
+
+    /**
+     * <p>Returns the internal list where the indexed annotations are sorted by end offset. External changes to
+     * this list might break the index.</p>
+     *
+     * @return The indexed annotations sorted bei their end offset.
+     */
+    public List<E> getEndIndex() {
+        return endIndex;
+    }
+
+    public boolean isFrozen() {
+        return frozen;
+    }
 }

From dce28b13dc75b3077b96d26b0a83ea3c77017f71 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 20 Oct 2020 10:00:47 +0200
Subject: [PATCH 030/269] Adapted the index test to the fact that we now return
 the list.

---
 .../utility/index/JCoReOverlapAnnotationIndexTest.java   | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java
index ef0a044c9..e2f7a39b2 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java
@@ -16,7 +16,6 @@
 import org.junit.Test;
 
 import java.util.List;
-import java.util.stream.Collectors;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
@@ -39,23 +38,23 @@ public void testOverlapAnnotationIndex() throws Exception {
 		t6.addToIndexes();
 
 		JCoReOverlapAnnotationIndex<Token> index = new JCoReOverlapAnnotationIndex<>(jcas, Token.type);
-		List<Token> result = index.search(t2).collect(Collectors.toList());
+		List<Token> result = index.search(t2);
 		assertTrue(result.contains(t1));
 		assertTrue(result.contains(t2));
 		assertTrue(result.contains(t3));
 		assertEquals(3, result.size());
 		
-		result = index.search(t1).collect(Collectors.toList());
+		result = index.search(t1);
 		assertTrue(result.contains(t1));
 		assertTrue(result.contains(t2));
 		assertEquals(2, result.size());
 		
-		result = index.search(t4).collect(Collectors.toList());
+		result = index.search(t4);
 		assertTrue(result.contains(t4));
 		assertTrue(result.contains(t5));
 		assertEquals(2, result.size());
 		
-		result = index.search(t6).collect(Collectors.toList());
+		result = index.search(t6);
 		assertTrue(result.contains(t6));
 		assertEquals(1, result.size());
 	}

From e66ebf2ef0f32ddb1ae21c336e7552032c77a99e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 20 Oct 2020 10:07:38 +0200
Subject: [PATCH 031/269] Updating to jcore-parent 2.5.2-SNAPSHOT which updates
 the JCoRe version properties to 2.6.0-SNAPSHOT.

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 7f4011b1e..f401caac7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -9,7 +9,7 @@
             
     <artifactId>jcore-parent</artifactId>
             
-    <version>2.5.1</version>
+    <version>2.5.2-SNAPSHOT</version>
         
   </parent>
       

From 36b225278f1c847a416889d3887dc011df17f265 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 20 Oct 2020 12:32:48 +0200
Subject: [PATCH 032/269] JeDIS: Bumping xmi splitter version to 2.3.5.

---
 jedis-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 794ee5c9c..71ffa5ceb 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -27,7 +27,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>jcore-xmi-splitter</artifactId>
-                <version>2.3.5-SNAPSHOT</version>
+                <version>2.3.5</version>
             </dependency>
         </dependencies>
     </dependencyManagement>

From f2f60d8d82f6cf55904d0c6e84c3a306a2d93d33 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 29 Oct 2020 10:15:05 +0100
Subject: [PATCH 033/269] PMC Reader: PMC IDs now have the "PMC" prefix.

This is how PubMed and PMC handle it so one can recognize full text IDs immediately.
---
 .../java/de/julielab/jcore/reader/pmc/parser/FrontParser.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index 4823fed54..6548e00ea 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -109,7 +109,7 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
 			Header header = new Header(nxmlDocumentParser.cas);
 			header.setComponentId(PMCReader.class.getName());
 
-			pmcid.ifPresent(header::setDocId);
+			pmcid.ifPresent(id -> header.setDocId("PMC" + id));
 			pmid.ifPresent(p -> {
 				OtherID otherID = new OtherID(nxmlDocumentParser.cas);
 				otherID.setComponentId(PMCReader.class.getName());

From 59c020c31f644d14fb7e5f45e5ad8891a14c5803 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 29 Oct 2020 10:37:18 +0100
Subject: [PATCH 034/269] Fixing the PMC reader test with regards to the PMC
 prefix for the PMC IDs.

---
 .../julielab/jcore/reader/pmc/PMCMultiplierTest.java   |  8 ++++----
 .../de/julielab/jcore/reader/pmc/PMCReaderTest.java    | 10 +++++-----
 .../jcore/reader/pmc/parser/FrontParserTest.java       |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java
index b411afc46..8a8527930 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java
@@ -47,8 +47,8 @@ public void testMultiplier() throws UIMAException, IOException {
             }
             ++numBatches;
         }
-        assertThat(receivedDocIds).containsExactlyInAnyOrder("2847692", "2758189",
-                "2970367", "3201365", "4257438");
+        assertThat(receivedDocIds).containsExactlyInAnyOrder("PMC2847692", "PMC2758189",
+                "PMC2970367", "PMC3201365", "PMC4257438");
         assertThat(numBatches).isEqualTo(3);
     }
 
@@ -78,8 +78,8 @@ public void testMultiplierFromDescriptors() throws UIMAException, IOException {
             }
             ++numBatches;
         }
-        assertThat(receivedDocIds).containsExactlyInAnyOrder("2847692", "2758189",
-                "2970367", "3201365", "4257438");
+        assertThat(receivedDocIds).containsExactlyInAnyOrder("PMC2847692", "PMC2758189",
+                "PMC2970367", "PMC3201365", "PMC4257438");
         assertThat(numBatches).isEqualTo(3);
     }
 }
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
index 308f950d2..9d5d91007 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
@@ -83,7 +83,7 @@ public void testPmcReader2() throws Exception {
 
             cas.reset();
         }
-        assertThat(foundDocuments).containsExactlyInAnyOrder("2847692", "3201365", "4257438", "2758189", "2970367");
+        assertThat(foundDocuments).containsExactlyInAnyOrder("PMC2847692", "PMC3201365", "PMC4257438", "PMC2758189", "PMC2970367");
     }
 
     @Test
@@ -122,7 +122,7 @@ public void testPmcReaderRecursiveZip() throws Exception {
 
             cas.reset();
         }
-        assertThat(foundDocuments).containsExactlyInAnyOrder("2847692", "3201365", "4257438", "2758189", "2970367");
+        assertThat(foundDocuments).containsExactlyInAnyOrder("PMC2847692", "PMC3201365", "PMC4257438", "PMC2758189", "PMC2970367");
     }
 
     @Test
@@ -146,7 +146,7 @@ public void testPmcReaderWhitelist() throws Exception {
             foundDocuments.add(header.getDocId());
             cas.reset();
         }
-        assertThat(foundDocuments).containsExactlyInAnyOrder("2847692", "2758189");
+        assertThat(foundDocuments).containsExactlyInAnyOrder("PMC2847692", "PMC2758189");
     }
 
     @Test
@@ -176,7 +176,7 @@ public void testHeader() throws Exception {
         Header header = (Header) CasUtil.selectSingle(cas.getCas(),
                 CasUtil.getAnnotationType(cas.getCas(), Header.class));
         assertNotNull(header);
-        assertEquals("2847692", header.getDocId());
+        assertEquals("PMC2847692", header.getDocId());
         assertNotNull(header.getPubTypeList());
         assertTrue(header.getPubTypeList().size() > 0);
         assertEquals("Ambio", ((Journal) header.getPubTypeList(0)).getTitle());
@@ -378,7 +378,7 @@ public void testPmcReaderDescriptor() throws Exception {
 
             cas.reset();
         }
-        assertThat(foundDocuments).containsExactlyInAnyOrder("2847692", "3201365", "4257438", "2758189", "2970367");
+        assertThat(foundDocuments).containsExactlyInAnyOrder("PMC2847692", "PMC3201365", "PMC4257438", "PMC2758189", "PMC2970367");
     }
 
     @Test
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java
index c09fc6313..c5ac41078 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java
@@ -38,7 +38,7 @@ public void testParser() throws Exception {
 		Annotation annotation = frontResult.getAnnotation();
 		assertTrue(annotation instanceof Header);
 		Header header = (Header) annotation;
-		assertEquals("2847692", header.getDocId());
+		assertEquals("PMC2847692", header.getDocId());
 		assertEquals("10.1007/s13280-009-0005-8", header.getDoi());
 		assertNotNull(header.getOtherIDs());
 		assertTrue(header.getOtherIDs().size() > 0);

From 429ef777337a6dd5cf91fb2b3f0d7ce4af29d294 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 29 Oct 2020 15:10:31 +0100
Subject: [PATCH 035/269] Adding the source "PubMed Central" to the header
 created by the PMC reader.

---
 .../java/de/julielab/jcore/reader/pmc/parser/FrontParser.java    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index 6548e00ea..b21a66aec 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -107,6 +107,7 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
 			assert volume.isPresent();
 
 			Header header = new Header(nxmlDocumentParser.cas);
+			header.setSource("PubMed Central");
 			header.setComponentId(PMCReader.class.getName());
 
 			pmcid.ifPresent(id -> header.setDocId("PMC" + id));

From 0f8211ba29a5e7c3ec95929c5073f793417a7d18 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 29 Oct 2020 15:57:58 +0100
Subject: [PATCH 036/269] Updating flair in travis to 0.6.1.

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 57daeceac..172756b0e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,7 @@ before_install:
     if ! find "$HOME/pip-cache" -mindepth 1 -print -quit 2>/dev/null | grep -q .; then
       $PYTHON -m pip download --destination-directory="$HOME/pip-cache" flair
     fi
-    sudo -H $PYTHON -m pip install --find-links="$HOME/pip-cache" flair==0.4.5
+    sudo -H $PYTHON -m pip install --find-links="$HOME/pip-cache" flair==0.6.1
   - #./travis-deployment/install-flair-nightly.sh
   - export BOTO_CONFIG=/dev/null
 install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V

From 02857425ded3855bdfe5fb2498d7e913eb8dc50a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 19 Feb 2021 13:47:57 +0100
Subject: [PATCH 037/269] Fixed the parent of the acronym writer

---
 jcore-acronym-writer/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-acronym-writer/pom.xml b/jcore-acronym-writer/pom.xml
index e01349996..69f995886 100644
--- a/jcore-acronym-writer/pom.xml
+++ b/jcore-acronym-writer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.5.0-SNAPSHOT</version>
+        <version>2.6.0-SNAPSHOT</version>
     </parent>
 
     <dependencies>

From 754e2983bfbe5355662719c74ca4e7f48e855df1 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 23 Feb 2021 15:19:43 +0100
Subject: [PATCH 038/269] Trying to add a coreference writer and made some
 small changes. Have issues with not found classes in IntelliJ which are
 obviously present, dont't know.

---
 jcore-acronym-writer/pom.xml                  |  3 +-
 .../acronyms/desc/jcore-acronym-writer.xml    |  2 +-
 .../consumer/acronyms/AcronymWriterTest.java  |  2 +-
 jcore-coreference-writer/LICENSE              | 26 ++++++
 jcore-coreference-writer/README.md            | 26 ++++++
 jcore-coreference-writer/component.meta       | 20 +++++
 jcore-coreference-writer/pom.xml              | 61 +++++++++++++
 .../coreference/CoreferenceWriter.java        | 86 +++++++++++++++++++
 .../coreference/desc/jcore-acronym-writer.xml | 33 +++++++
 .../coreference/CoreferenceWriterTest.java    | 10 +++
 jcore-neo4j-relations-consumer/pom.xml        | 12 +--
 .../jcore/types/jcore-discourse-types.xml     |  2 +-
 .../jcore/utility/index/JCoReCoverIndex.java  |  6 +-
 pom.xml                                       |  6 +-
 14 files changed, 279 insertions(+), 16 deletions(-)
 create mode 100644 jcore-coreference-writer/LICENSE
 create mode 100644 jcore-coreference-writer/README.md
 create mode 100644 jcore-coreference-writer/component.meta
 create mode 100644 jcore-coreference-writer/pom.xml
 create mode 100644 jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
 create mode 100644 jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml
 create mode 100644 jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java

diff --git a/jcore-acronym-writer/pom.xml b/jcore-acronym-writer/pom.xml
index 69f995886..035774709 100644
--- a/jcore-acronym-writer/pom.xml
+++ b/jcore-acronym-writer/pom.xml
@@ -5,7 +5,6 @@
     <modelVersion>4.0.0</modelVersion>
     <artifactId>jcore-acronym-writer</artifactId>
     <packaging>jar</packaging>
-    <groupId>de.julielab.jcore.consumer.acronyms</groupId>
 
     <parent>
         <groupId>de.julielab</groupId>
@@ -58,5 +57,5 @@
         </license>
     </licenses>
     <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-acronym-writer</url>
-    <description>Writes acronyms annotations from the CAS to a text file format.</description>
+    <description>Writes acronym annotations from the CAS to a text file format.</description>
 </project>
diff --git a/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml b/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
index 6659cbf31..26840e7c6 100644
--- a/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
+++ b/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
@@ -2,7 +2,7 @@
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.consumer.acronyms.AcronymWriter</annotatorImplementationName>
+    <annotatorImplementationName>de.julielab.jcore.consumer.coreference.AcronymWriter</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCoRe Acronym Writer</name>
         <description>Writes acronym annotation to a text file.</description>
diff --git a/jcore-acronym-writer/src/test/java/de/julielab/jcore/consumer/acronyms/AcronymWriterTest.java b/jcore-acronym-writer/src/test/java/de/julielab/jcore/consumer/acronyms/AcronymWriterTest.java
index 243f4481a..c63bfd442 100644
--- a/jcore-acronym-writer/src/test/java/de/julielab/jcore/consumer/acronyms/AcronymWriterTest.java
+++ b/jcore-acronym-writer/src/test/java/de/julielab/jcore/consumer/acronyms/AcronymWriterTest.java
@@ -1,5 +1,5 @@
 
-package de.julielab.jcore.consumer.acronyms;
+package de.julielab.jcore.consumer.coreference;
 
 /**
  * Unit tests for jcore-acronym-writer.
diff --git a/jcore-coreference-writer/LICENSE b/jcore-coreference-writer/LICENSE
new file mode 100644
index 000000000..7190118b3
--- /dev/null
+++ b/jcore-coreference-writer/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2021, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-coreference-writer/README.md b/jcore-coreference-writer/README.md
new file mode 100644
index 000000000..da767a4d1
--- /dev/null
+++ b/jcore-coreference-writer/README.md
@@ -0,0 +1,26 @@
+# JCoRe Acronym Writer
+
+**Descriptor Path**:
+```
+de.julielab.jcore.consumer.acronyms.desc.jcore-acronym-writer
+```
+
+Writes acronyms annotations from the CAS to a text file format.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| OutputFile | string | true | false | Path to the ourput file. |
+
+
+**2. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.Abbreviation | `+` | |
+
+
+
diff --git a/jcore-coreference-writer/component.meta b/jcore-coreference-writer/component.meta
new file mode 100644
index 000000000..b0999bc38
--- /dev/null
+++ b/jcore-coreference-writer/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "consumer"
+    ],
+    "description": "Writes acronyms annotations from the CAS to a text file format.",
+    "descriptors": [
+        {
+            "category": "consumer",
+            "location": "de.julielab.jcore.consumer.acronyms.desc.jcore-acronym-writer"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-acronym-writer",
+        "groupId": "de.julielab.jcore.consumer.acronyms",
+        "version": "2.5.0-SNAPSHOT"
+    },
+    "name": "JCoRe Acronym Writer"
+}
diff --git a/jcore-coreference-writer/pom.xml b/jcore-coreference-writer/pom.xml
new file mode 100644
index 000000000..ee4c26044
--- /dev/null
+++ b/jcore-coreference-writer/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-coreference-writer</artifactId>
+    <packaging>jar</packaging>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-java-utilities</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe Coreference Writer</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <licenses>
+        <license>
+            <name>BSD-2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-coreference-writer</url>
+    <description>Writes coreference annotations from the CAS to a text file format.</description>
+</project>
diff --git a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
new file mode 100644
index 000000000..27eb28de2
--- /dev/null
+++ b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
@@ -0,0 +1,86 @@
+
+package de.julielab.jcore.consumer.coreference;
+
+import de.julielab.java.utilities.FileUtilities;
+import de.julielab.jcore.types.Abbreviation;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.commons.io.IOUtils;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+@ResourceMetaData(name = "JCoRe Coreference Writer", description = "Writes co-reference annotation to a text file.")
+public class CoreferenceWriter extends JCasAnnotator_ImplBase {
+
+	public static final String PARAM_OUTPUTFILE = "OutputFile";
+
+	@ConfigurationParameter(name = PARAM_OUTPUTFILE)
+	private String outputFile;
+	private OutputStream os;
+
+	@Override
+	public void initialize(UimaContext aContext) throws ResourceInitializationException {
+		super.initialize(aContext);
+		outputFile = (String) aContext.getConfigParameterValue(PARAM_OUTPUTFILE);
+		try {
+			os = FileUtilities.getOutputStreamToFile(new File(outputFile));
+		} catch (IOException e) {
+			throw new ResourceInitializationException(e);
+		}
+	}
+
+	@Override
+	public void process(JCas jcas) throws AnalysisEngineProcessException {
+		try {
+			String pubmedId = JCoReTools.getDocId(jcas);
+			FSIterator<Annotation> it = jcas.getAnnotationIndex(Abbreviation.type).iterator();
+
+			Map<de.julielab.jcore.types.Annotation, String> fullForms = new HashMap<>();
+			int abbrCount = 0;
+			while (it.hasNext()) {
+				Abbreviation abbr = (Abbreviation) it.next();
+				de.julielab.jcore.types.Annotation textReference = abbr.getTextReference();
+
+				String abbrId = "A" + abbrCount;
+
+				String fullformId = fullForms.get(textReference);
+				if (fullformId == null) {
+					fullformId = "F" + abbrCount;
+					fullForms.put(textReference, fullformId);
+					IOUtils.write(String.join("\t", pubmedId, fullformId, String.valueOf(textReference.getBegin()),
+							String.valueOf(textReference.getEnd())) + "\n", os, "UTF-8");
+				}
+
+				IOUtils.write(String.join("\t", pubmedId, abbrId, String.valueOf(abbr.getBegin()),
+						String.valueOf(abbr.getEnd()), fullformId) + "\n", os, "UTF-8");
+
+				++abbrCount;
+			}
+		} catch (CASRuntimeException | IOException e) {
+			throw new AnalysisEngineProcessException(e);
+		}
+	}
+
+	@Override
+	public void collectionProcessComplete() throws AnalysisEngineProcessException {
+		try {
+			os.close();
+		} catch (IOException e) {
+			throw new AnalysisEngineProcessException(e);
+		}
+	}
+
+}
diff --git a/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml b/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml
new file mode 100644
index 000000000..71991a2ce
--- /dev/null
+++ b/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml
@@ -0,0 +1,33 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.consumer.acronyms.CoreferenceWriter</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe Acronym Writer</name>
+        <description>Writes acronym annotation to a text file.</description>
+        <version>2.6.0-SNAPSHOT</version>
+        <configurationParameters>
+            <configurationParameter>
+                <name>OutputFile</name>
+                <description />
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings />
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection />
+        <capabilities />
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java b/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java
new file mode 100644
index 000000000..9e3e8e14a
--- /dev/null
+++ b/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java
@@ -0,0 +1,10 @@
+
+package de.julielab.jcore.consumer.acronyms;
+
+/**
+ * Unit tests for jcore-acronym-writer.
+ *
+ */
+public class CoreferenceWriterTest {
+// TODO
+}
diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index 6b0d0060c..92fc5f29b 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -29,11 +29,6 @@
             <artifactId>jcore-types</artifactId>
             <version>${jcore-types-version}</version>
         </dependency>
-        <dependency>
-            <groupId>de.julielab</groupId>
-            <artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
-            <version>3.0.0-SNAPSHOT</version>
-        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-utilities</artifactId>
@@ -50,10 +45,15 @@
             <version>4.0.4</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
+            <version>3.0.1-SNAPSHOT</version>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-neo4j-plugins-concepts</artifactId>
-            <version>3.0.0-SNAPSHOT</version>
+            <version>3.0.1-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
index ab4888c8c..01d7e272e 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
@@ -34,7 +34,7 @@
     </typeDescription>
     <typeDescription>
       <name>de.julielab.jcore.types.CorefExpression</name>
-      <description>A 'coreference expression' might by any span of text that is part of a set of text expressions refering to the same entity. Speaking in the anaphora framework, coreference expressions are either anaphors - mostly pronouns and definite noun phrases - or their antecedents - the original, first mention of an entity or already an anaphoric expression referring itself to a reference to the original entity mention.</description>
+      <description>A 'coreference expression' might be any span of text that is part of a set of text expressions referring to the same entity. Speaking in the anaphora framework, co-reference expressions are either anaphors - mostly pronouns and definite noun phrases - or their antecedents - the original, first mention of an entity or already an anaphoric expression referring itself to a reference to the original entity mention.</description>
       <supertypeName>de.julielab.jcore.types.Annotation</supertypeName>
       <features>
         <featureDescription>
diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReCoverIndex.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReCoverIndex.java
index 02d192b73..ef6c6588b 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReCoverIndex.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/index/JCoReCoverIndex.java
@@ -106,10 +106,8 @@ public void index(JCas jCas, Type type) {
 	 * indexed annotations, first {@link #freeze()} the index and then
 	 * {@link #search(int, int)} it.
 	 * 
-	 * @param jCas
-	 *            A CAS instance.
-	 * @param type
-	 *            The annotation type to index.
+	 * @param annotation
+	 *            A UIMA annotation
 	 */
 	public void index(E annotation) {
 		if (frozen)
diff --git a/pom.xml b/pom.xml
index f401caac7..8768f3a25 100644
--- a/pom.xml
+++ b/pom.xml
@@ -74,6 +74,8 @@
     <module>jcore-ace-reader</module>
             
     <module>jcore-acronym-ae</module>
+
+    <module>jcore-acronym-writer</module>
             
     <module>jcore-banner-ae</module>
             
@@ -88,7 +90,9 @@
     <module>jcore-conll-consumer</module>
             
     <module>jcore-coordination-baseline-ae</module>
-            
+
+    <module>jcore-coreference-writer</module>
+
     <module>jcore-ct-reader</module>
             
     <module>jcore-descriptor-creator</module>

From 9f80444f9df180c3ee5bb5b0a2c381c071d1c440 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 23 Feb 2021 15:37:46 +0100
Subject: [PATCH 039/269] First sketch of the coreference writer

---
 jcore-acronym-writer/component.meta           |  6 ++--
 jcore-coreference-writer/component.meta       | 12 +++----
 .../coreference/CoreferenceWriter.java        | 36 +++++++++++--------
 .../coreference/CoreferenceWriterTest.java    |  4 +--
 4 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/jcore-acronym-writer/component.meta b/jcore-acronym-writer/component.meta
index b0999bc38..6869b7664 100644
--- a/jcore-acronym-writer/component.meta
+++ b/jcore-acronym-writer/component.meta
@@ -2,7 +2,7 @@
     "categories": [
         "consumer"
     ],
-    "description": "Writes acronyms annotations from the CAS to a text file format.",
+    "description": "Writes acronym annotations from the CAS to a text file format.",
     "descriptors": [
         {
             "category": "consumer",
@@ -13,8 +13,8 @@
     "group": "general",
     "maven-artifact": {
         "artifactId": "jcore-acronym-writer",
-        "groupId": "de.julielab.jcore.consumer.acronyms",
-        "version": "2.5.0-SNAPSHOT"
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
     },
     "name": "JCoRe Acronym Writer"
 }
diff --git a/jcore-coreference-writer/component.meta b/jcore-coreference-writer/component.meta
index b0999bc38..ec5fe6810 100644
--- a/jcore-coreference-writer/component.meta
+++ b/jcore-coreference-writer/component.meta
@@ -2,19 +2,19 @@
     "categories": [
         "consumer"
     ],
-    "description": "Writes acronyms annotations from the CAS to a text file format.",
+    "description": "Writes coreference annotations from the CAS to a text file format.",
     "descriptors": [
         {
             "category": "consumer",
-            "location": "de.julielab.jcore.consumer.acronyms.desc.jcore-acronym-writer"
+            "location": "de.julielab.jcore.consumer.coreference.desc.jcore-acronym-writer"
         }
     ],
     "exposable": true,
     "group": "general",
     "maven-artifact": {
-        "artifactId": "jcore-acronym-writer",
-        "groupId": "de.julielab.jcore.consumer.acronyms",
-        "version": "2.5.0-SNAPSHOT"
+        "artifactId": "jcore-coreference-writer",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
     },
-    "name": "JCoRe Acronym Writer"
+    "name": "JCoRe Coreference Writer"
 }
diff --git a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
index 27eb28de2..0884f6509 100644
--- a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
+++ b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
@@ -3,6 +3,8 @@
 
 import de.julielab.java.utilities.FileUtilities;
 import de.julielab.jcore.types.Abbreviation;
+import de.julielab.jcore.types.CorefExpression;
+import de.julielab.jcore.types.CorefRelation;
 import de.julielab.jcore.utility.JCoReTools;
 import org.apache.commons.io.IOUtils;
 import org.apache.uima.UimaContext;
@@ -10,9 +12,11 @@
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.CASRuntimeException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 
@@ -20,6 +24,7 @@
 import java.io.IOException;
 import java.io.OutputStream;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
 
 @ResourceMetaData(name = "JCoRe Coreference Writer", description = "Writes co-reference annotation to a text file.")
@@ -46,28 +51,29 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 	public void process(JCas jcas) throws AnalysisEngineProcessException {
 		try {
 			String pubmedId = JCoReTools.getDocId(jcas);
-			FSIterator<Annotation> it = jcas.getAnnotationIndex(Abbreviation.type).iterator();
+			FSIterator<CorefRelation> it = jcas.<CorefRelation>getAnnotationIndex(CorefRelation.type).iterator();
 
-			Map<de.julielab.jcore.types.Annotation, String> fullForms = new HashMap<>();
-			int abbrCount = 0;
+			int relcount = 0;
 			while (it.hasNext()) {
-				Abbreviation abbr = (Abbreviation) it.next();
-				de.julielab.jcore.types.Annotation textReference = abbr.getTextReference();
+				CorefRelation rel = it.next();
+				de.julielab.jcore.types.Annotation anaphora = rel.getAnaphora();
 
-				String abbrId = "A" + abbrCount;
+				String abbrId = "Ana" + relcount;
 
-				String fullformId = fullForms.get(textReference);
-				if (fullformId == null) {
-					fullformId = "F" + abbrCount;
-					fullForms.put(textReference, fullformId);
-					IOUtils.write(String.join("\t", pubmedId, fullformId, String.valueOf(textReference.getBegin()),
-							String.valueOf(textReference.getEnd())) + "\n", os, "UTF-8");
+				IOUtils.write(String.join("\t", pubmedId, abbrId, String.valueOf(anaphora.getBegin()),
+						String.valueOf(anaphora.getEnd())) + "\n", os, "UTF-8");
+
+				Iterator<FeatureStructure> antecedentsIt = rel.getAntecedents().iterator();
+				while (antecedentsIt.hasNext()) {
+					CorefExpression antecedent = (CorefExpression) antecedentsIt.next();
+
+					String antecedentGroup = "Ant" + relcount;
+					IOUtils.write(String.join("\t", pubmedId, antecedentGroup, String.valueOf(antecedent.getBegin()),
+							String.valueOf(antecedent.getEnd())) + "\n", os, "UTF-8");
 				}
 
-				IOUtils.write(String.join("\t", pubmedId, abbrId, String.valueOf(abbr.getBegin()),
-						String.valueOf(abbr.getEnd()), fullformId) + "\n", os, "UTF-8");
 
-				++abbrCount;
+				++relcount;
 			}
 		} catch (CASRuntimeException | IOException e) {
 			throw new AnalysisEngineProcessException(e);
diff --git a/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java b/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java
index 9e3e8e14a..7b7bf0429 100644
--- a/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java
+++ b/jcore-coreference-writer/src/test/java/de/julielab/jcore/consumer/coreference/CoreferenceWriterTest.java
@@ -1,8 +1,8 @@
 
-package de.julielab.jcore.consumer.acronyms;
+package de.julielab.jcore.consumer.coreference;
 
 /**
- * Unit tests for jcore-acronym-writer.
+ * Unit tests for jcore-coreference-writer.
  *
  */
 public class CoreferenceWriterTest {

From cf54d884c5677b8ef2f0e64bb02bdef2b474ab5a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 23 Feb 2021 15:56:24 +0100
Subject: [PATCH 040/269] Bug fixing

---
 ...re-acronym-writer.xml => jcore-coreference-writer.xml} | 8 ++++----
 .../ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml    | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)
 rename jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/{jcore-acronym-writer.xml => jcore-coreference-writer.xml} (77%)

diff --git a/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml b/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml
similarity index 77%
rename from jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml
rename to jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml
index 71991a2ce..855be5b78 100644
--- a/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-acronym-writer.xml
+++ b/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml
@@ -2,10 +2,10 @@
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.consumer.acronyms.CoreferenceWriter</annotatorImplementationName>
+    <annotatorImplementationName>de.julielab.jcore.consumer.coreference.CoreferenceWriter</annotatorImplementationName>
     <analysisEngineMetaData>
-        <name>JCoRe Acronym Writer</name>
-        <description>Writes acronym annotation to a text file.</description>
+        <name>JCoRe Coreference Writer</name>
+        <description>Writes coreference annotation to a text file.</description>
         <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
@@ -19,7 +19,7 @@
         <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+                <import name="de.julielab.jcore.types.jcore-discourse-types" />
             </imports>
         </typeSystemDescription>
         <fsIndexCollection />
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
index 28c03ebe8..835faf684 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
@@ -54,6 +54,7 @@
     <typeSystemDescription>
       <imports>
         <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+        <import name="de.julielab.jcore.types.jcore-document-meta-types" />
         <import location="paragraph-scope-type.xml" />
       </imports>
     </typeSystemDescription>

From a45cb71e67bb1284b989bc4d0b92ed79258d408a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 23 Feb 2021 16:02:10 +0100
Subject: [PATCH 041/269] Meta descriptor update

---
 jcore-coreference-writer/component.meta | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-coreference-writer/component.meta b/jcore-coreference-writer/component.meta
index ec5fe6810..77f18d497 100644
--- a/jcore-coreference-writer/component.meta
+++ b/jcore-coreference-writer/component.meta
@@ -6,7 +6,7 @@
     "descriptors": [
         {
             "category": "consumer",
-            "location": "de.julielab.jcore.consumer.coreference.desc.jcore-acronym-writer"
+            "location": "de.julielab.jcore.consumer.coreference.desc.jcore-coreference-writer"
         }
     ],
     "exposable": true,

From 9878a3979d4a62dd58816e695286dbd541dae8f0 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 1 Apr 2021 13:06:34 +0200
Subject: [PATCH 042/269] Adding the jcore-semantics-mention-types type system
 to the lingpipe gazetteer configurable descriptor.

---
 .../coreference/CoreferenceWriter.java        | 119 +++++++++---------
 .../jcore/misc/DescriptorCreator.java         |  30 ++---
 .../de.julielab.jcore.ae.testae.TestAE.xml    |  62 ++++-----
 ...ore.consumer.testconsumer.Testconsumer.xml |  62 ++++-----
 ...ltiplier.testmultiplier.TestMultiplier.xml |  62 ++++-----
 ...lab.jcore.reader.testreader.TestReader.xml |  62 ++++-----
 ...-0.json-Eriks-MacBook-Air-2.local-2-2.json |   3 -
 .../EntityEvaluatorConsumer.java              |   2 +-
 ...ipe-gazetteer-ae-configurable-resource.xml |   3 +
 9 files changed, 204 insertions(+), 201 deletions(-)
 delete mode 100644 jcore-elasticsearch-consumer/src/test/resources/onefile-output/thefile-Eriks-MacBook-Air-2.local-1-0.json-Eriks-MacBook-Air-2.local-2-2.json

diff --git a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
index 0884f6509..32613e57d 100644
--- a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
+++ b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
@@ -1,4 +1,3 @@
-
 package de.julielab.jcore.consumer.coreference;
 
 import de.julielab.java.utilities.FileUtilities;
@@ -26,67 +25,69 @@
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map;
+import java.util.Spliterators;
 
 @ResourceMetaData(name = "JCoRe Coreference Writer", description = "Writes co-reference annotation to a text file.")
 public class CoreferenceWriter extends JCasAnnotator_ImplBase {
 
-	public static final String PARAM_OUTPUTFILE = "OutputFile";
-
-	@ConfigurationParameter(name = PARAM_OUTPUTFILE)
-	private String outputFile;
-	private OutputStream os;
-
-	@Override
-	public void initialize(UimaContext aContext) throws ResourceInitializationException {
-		super.initialize(aContext);
-		outputFile = (String) aContext.getConfigParameterValue(PARAM_OUTPUTFILE);
-		try {
-			os = FileUtilities.getOutputStreamToFile(new File(outputFile));
-		} catch (IOException e) {
-			throw new ResourceInitializationException(e);
-		}
-	}
-
-	@Override
-	public void process(JCas jcas) throws AnalysisEngineProcessException {
-		try {
-			String pubmedId = JCoReTools.getDocId(jcas);
-			FSIterator<CorefRelation> it = jcas.<CorefRelation>getAnnotationIndex(CorefRelation.type).iterator();
-
-			int relcount = 0;
-			while (it.hasNext()) {
-				CorefRelation rel = it.next();
-				de.julielab.jcore.types.Annotation anaphora = rel.getAnaphora();
-
-				String abbrId = "Ana" + relcount;
-
-				IOUtils.write(String.join("\t", pubmedId, abbrId, String.valueOf(anaphora.getBegin()),
-						String.valueOf(anaphora.getEnd())) + "\n", os, "UTF-8");
-
-				Iterator<FeatureStructure> antecedentsIt = rel.getAntecedents().iterator();
-				while (antecedentsIt.hasNext()) {
-					CorefExpression antecedent = (CorefExpression) antecedentsIt.next();
-
-					String antecedentGroup = "Ant" + relcount;
-					IOUtils.write(String.join("\t", pubmedId, antecedentGroup, String.valueOf(antecedent.getBegin()),
-							String.valueOf(antecedent.getEnd())) + "\n", os, "UTF-8");
-				}
-
-
-				++relcount;
-			}
-		} catch (CASRuntimeException | IOException e) {
-			throw new AnalysisEngineProcessException(e);
-		}
-	}
-
-	@Override
-	public void collectionProcessComplete() throws AnalysisEngineProcessException {
-		try {
-			os.close();
-		} catch (IOException e) {
-			throw new AnalysisEngineProcessException(e);
-		}
-	}
+    public static final String PARAM_OUTPUTFILE = "OutputFile";
+
+    @ConfigurationParameter(name = PARAM_OUTPUTFILE)
+    private String outputFile;
+    private OutputStream os;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        outputFile = (String) aContext.getConfigParameterValue(PARAM_OUTPUTFILE);
+        try {
+            os = FileUtilities.getOutputStreamToFile(new File(outputFile));
+        } catch (IOException e) {
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public void process(JCas jcas) throws AnalysisEngineProcessException {
+        try {
+            String pubmedId = JCoReTools.getDocId(jcas);
+            FSIterator<CorefRelation> it = jcas.<CorefRelation>getAnnotationIndex(CorefRelation.type).iterator();
+
+            int relcount = 0;
+            while (it.hasNext()) {
+                CorefRelation rel = it.next();
+                de.julielab.jcore.types.Annotation anaphora = rel.getAnaphora();
+
+                String abbrId = "Ana" + relcount;
+
+                IOUtils.write(String.join("\t", pubmedId, abbrId, String.valueOf(anaphora.getBegin()),
+                        String.valueOf(anaphora.getEnd())) + "\n", os, "UTF-8");
+
+                Iterator<FeatureStructure> antecedentsIt = rel.getAntecedents() != null ? rel.getAntecedents().iterator() : null;
+                while (antecedentsIt != null && antecedentsIt.hasNext()) {
+                    CorefExpression antecedent = (CorefExpression) antecedentsIt.next();
+                    if (antecedent != null) {
+                        String antecedentGroup = "Ant" + relcount;
+                        IOUtils.write(String.join("\t", pubmedId, antecedentGroup, String.valueOf(antecedent.getBegin()),
+                                String.valueOf(antecedent.getEnd())) + "\n", os, "UTF-8");
+                    }
+                }
+
+
+                ++relcount;
+            }
+        } catch (CASRuntimeException | IOException e) {
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
+
+    @Override
+    public void collectionProcessComplete() throws AnalysisEngineProcessException {
+        try {
+            os.close();
+        } catch (IOException e) {
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
 
 }
diff --git a/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java b/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java
index 3f5ca368a..92c3178a1 100644
--- a/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java
+++ b/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java
@@ -38,33 +38,35 @@ public class DescriptorCreator {
     private static final String DESC = "desc";
 
     public static void main(String[] args) throws Exception {
+        String basePackage = "de.julielab.jcore";
+        if (args.length > 0)
+            basePackage = args[0];
         DescriptorCreator creator = new DescriptorCreator();
-        creator.run();
+        creator.run(basePackage);
     }
 
     public static String getComponentName() {
         return new File(".").getAbsoluteFile().getParentFile().getName();
     }
 
-    public void run() throws Exception {
-        run(DEFAULT_OUTPUT_ROOT);
+    public void run(String basePackage) throws Exception {
+        run(basePackage, DEFAULT_OUTPUT_ROOT);
     }
 
-    public void run(String outputRoot) throws Exception {
-        List<Class<? extends CollectionReader>> readers;
-        List<Class<? extends AnalysisComponent>> aes;
-        readers = findSubclasses(CollectionReader.class.getCanonicalName());
-        aes = findSubclasses(AnalysisComponent.class.getCanonicalName());
+    public void run(String basePackage, String outputRoot) throws Exception {
+        List<Class<? extends CollectionReader>> readers = findSubclasses(CollectionReader.class.getCanonicalName());
+        List<Class<? extends AnalysisComponent>> aes = findSubclasses(AnalysisComponent.class.getCanonicalName());
 
-        readers = readers.stream().filter(c -> c.getPackage().getName().contains("de.julielab.jcore.reader"))
+        readers = readers.stream().filter(c -> c.getPackage().getName().startsWith(basePackage) && (c.getPackage().getName().endsWith("reader") || c.getName().toLowerCase().endsWith("reader")))
                 .collect(toList());
-        // Since consumers and also multipliers can be or are AnalysisComponents, were may list all component categories here.
+        // Since consumers and also multipliers can be or are AnalysisComponents, we may list all component categories here.
         // Also, remove abstract classes
         aes = aes.stream().filter(c -> !Modifier.isAbstract(c.getModifiers())).
-                filter(c -> c.getPackage().getName().contains("de.julielab.jcore.ae")
-                        || c.getPackage().getName().contains("de.julielab.jcore.consumer")
-                        || c.getPackage().getName().contains("de.julielab.jcore.multiplier")
-                        || c.getPackage().getName().contains("de.julielab.jcore.reader")).collect(toList());
+                filter(c -> c.getPackage().getName().startsWith(basePackage) &&
+                          (c.getPackage().getName().endsWith("ae") || c.getName().toLowerCase().endsWith("ae") || c.getName().toLowerCase().endsWith("annotator")
+                        || c.getPackage().getName().endsWith("consumer") || c.getName().toLowerCase().endsWith("consumer") || c.getName().toLowerCase().endsWith("writer")
+                        || c.getPackage().getName().endsWith("multiplier") || c.getName().toLowerCase().endsWith("multiplier"))
+                ).collect(toList());
 
         if (readers.isEmpty() && aes.isEmpty()) {
             log.warn("No JCoRe UIMA component classes were found.");
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
index 558a62b57..3cf0a3a39 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,41 +6,41 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.testae.TestAE</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>unknown</version>
         <vendor>de.julielab.jcore.ae.testae</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
+        <configurationParameters/>
+        <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
+                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
+                <import name="de.julielab.jcore.types.jcore-basic-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
index 3bf9a16c1..cf47fdd0f 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,41 +6,41 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.consumer.testconsumer.Testconsumer</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>unknown</version>
         <vendor>de.julielab.jcore.consumer.testconsumer</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
+        <configurationParameters/>
+        <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
+                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
+                <import name="de.julielab.jcore.types.jcore-basic-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
index 8ef78db33..703b7b436 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,41 +6,41 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.multiplier.testmultiplier.TestMultiplier</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>unknown</version>
         <vendor>de.julielab.jcore.multiplier.testmultiplier</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
+        <configurationParameters/>
+        <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
+                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
+                <import name="de.julielab.jcore.types.jcore-basic-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
index bd482d6ee..24cc9ac66 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
@@ -1,45 +1,45 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <implementationName>de.julielab.jcore.reader.testreader.TestReader</implementationName>
     <processingResourceMetaData>
         <name>de.julielab.jcore.reader.testreader.TestReader</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>unknown</version>
         <vendor>de.julielab.jcore.reader.testreader</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
+        <configurationParameters/>
+        <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
+                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
+                <import name="de.julielab.jcore.types.jcore-basic-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
diff --git a/jcore-elasticsearch-consumer/src/test/resources/onefile-output/thefile-Eriks-MacBook-Air-2.local-1-0.json-Eriks-MacBook-Air-2.local-2-2.json b/jcore-elasticsearch-consumer/src/test/resources/onefile-output/thefile-Eriks-MacBook-Air-2.local-1-0.json-Eriks-MacBook-Air-2.local-2-2.json
deleted file mode 100644
index 5a085e8d3..000000000
--- a/jcore-elasticsearch-consumer/src/test/resources/onefile-output/thefile-Eriks-MacBook-Air-2.local-1-0.json-Eriks-MacBook-Air-2.local-2-2.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{"documentText":"This is one line that should not be interrupted."}
-{"documentText":"This is one line that should not be interrupted."}
-{"documentText":"This is one line that should not be interrupted."}
diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
index b92b32ad1..bffd2311d 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
@@ -249,7 +249,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         super.initialize(aContext);
 
         outputColumnNamesArray = (String[]) aContext.getConfigParameterValue(PARAM_OUTPUT_COLUMNS);
-        columnDefinitionDescriptions = (String[]) aContext.getConfigParameterValue(PARAM_COLUMN_DEFINITIONS);
+        columnDefinitionDescriptions = Optional.ofNullable((String[]) aContext.getConfigParameterValue(PARAM_COLUMN_DEFINITIONS)).orElse(new String[0]);
         typePrefix = (String) aContext.getConfigParameterValue(PARAM_TYPE_PREFIX);
 
         featureFilterDefinitions = (String[]) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FEATURE_FILTERS)).orElse(new String[0]);
diff --git a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
index e8895177a..16a94eb70 100644
--- a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
+++ b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
@@ -50,6 +50,9 @@
             </nameValuePair>
         </configurationParameterSettings>
         <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+            </imports>
         </typeSystemDescription>
         <typePriorities />
         <fsIndexCollection />

From af82f3dc5104362fa247fa27351fb12b68421a4d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 09:35:16 +0200
Subject: [PATCH 043/269] Adding optional plural normalization. Requires
 PennBioIEPOSTags to be set.

---
 .../chunking/ChunkerProvider.java             |   2 +
 .../chunking/ChunkerProviderImpl.java         |   5 +
 .../chunking/ChunkerProviderImplAlt.java      |  17 +-
 .../ConfigurableChunkerProviderImplAlt.java   | 587 +++++++++---------
 .../uima/GazetteerAnnotator.java              |  64 +-
 .../utils/StringNormalizerForChunking.java    |  25 +-
 .../StringNormalizerForChunkingTest.java      |  12 +-
 7 files changed, 360 insertions(+), 352 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProvider.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProvider.java
index 0395da7c8..0e43d4cd4 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProvider.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProvider.java
@@ -13,6 +13,8 @@ public interface ChunkerProvider {
 	public boolean getUseApproximateMatching();
 	
 	public boolean getNormalize();
+
+	public boolean getNormalizePlural();
 	
 	public boolean getTransliterate();
 	
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImpl.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImpl.java
index dc5613755..06171ed03 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImpl.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImpl.java
@@ -428,6 +428,11 @@ public boolean getNormalize() {
 		return false;
 	}
 
+	@Override
+	public boolean getNormalizePlural() {
+		return false;
+	}
+
 	@Override
 	public boolean getTransliterate() {
 		return false;
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
index 7e3daa924..23f4800d6 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
@@ -42,6 +42,12 @@ public class ChunkerProviderImplAlt implements ChunkerProvider, SharedResourceOb
 	 * switched on in the descriptor for the annotator itself!
 	 */
 	public final static String PARAM_NORMALIZE_TEXT = "NormalizeText";
+	/**
+	 * Only in effect when {@link #PARAM_NORMALIZE_TEXT} is set to <tt>true</tt>. If so, will normalize plurals
+	 * found in the text by removing the training 's'. Requires annotations of the type {@link de.julielab.jcore.types.PennBioIEPOSTag}
+	 * to be present in the CAS.
+	 */
+	public static final String PARAM_NORMALIZE_PLURAL = "NormalizePlural";
 	/**
 	 * Parameter to indicate whether text - dictionary entries for this class - should be transliterated, i.e. whether
 	 * accents and other character variations should be stripped. If this is switched on here, it must also be switched
@@ -54,6 +60,7 @@ public class ChunkerProviderImplAlt implements ChunkerProvider, SharedResourceOb
 	private boolean useApproximateMatching;
 	private boolean transliterate;
 	private boolean normalize;
+	private boolean normalizePlural;
 	private InputStream dictFile;
 	private InputStream stopFile;
 
@@ -71,6 +78,10 @@ public Chunker getChunker() {
 		return dictChunker;
 	}
 
+	public boolean getNormalizePlural() {
+		return normalizePlural;
+	}
+
 	public void load(DataResource resource) throws ResourceInitializationException {
 		LOGGER.info("Loading configuration file from URI \"{}\" (URL: \"{}\").", resource.getUri(), resource.getUrl());
 		Properties properties = new Properties();
@@ -118,7 +129,11 @@ public void load(DataResource resource) throws ResourceInitializationException {
 		normalize = false;
 		if (normalizeString != null)
 			normalize = new Boolean(normalizeString);
-		LOGGER.info("Normalize dictionary entries (i.e. completely strip dashes, parenthesis etc): {}", normalize);
+		LOGGER.info("Normalize dictionary entries and text (i.e. completely strip dashes, parenthesis etc): {}", normalize);
+
+		normalizePlural = Boolean.parseBoolean(properties.getProperty(PARAM_NORMALIZE_PLURAL, "false")) && normalize;
+		if (normalize)
+		LOGGER.info("Also normalize plural forms to singular: {}", normalizePlural);
 
 		String transliterateString = properties.getProperty(PARAM_TRANSLITERATE_TEXT);
 		transliterate = false;
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
index f0ae88711..aa1c07623 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
@@ -1,4 +1,3 @@
-
 package de.julielab.jcore.ae.lingpipegazetteer.chunking;
 
 import com.aliasi.chunk.Chunker;
@@ -21,6 +20,7 @@
 import java.io.*;
 import java.net.URI;
 import java.util.HashSet;
+import java.util.Optional;
 import java.util.Set;
 import java.util.zip.GZIPInputStream;
 
@@ -29,317 +29,328 @@
  * Also, this implementation expects a configurableDataResourceSpecifier for the external resource,
  * specifying the dictionary directly and providing the parameters via the normal UIMA resource meta data
  * mechanism.
- * 
+ *
  * @author faessler
- * 
  */
 public class ConfigurableChunkerProviderImplAlt implements ChunkerProvider, SharedResourceObject {
 
-	private static final Logger LOGGER = LoggerFactory.getLogger(ConfigurableChunkerProviderImplAlt.class);
-	public final static String PARAM_USE_APPROXIMATE_MATCHING = "UseApproximateMatching";
-	public final static String PARAM_CASE_SENSITIVE = "CaseSensitive";
-	public final static String PARAM_MAKE_VARIANTS = "MakeVariants";
-	public final static String PARAM_STOPWORD_FILE = "StopWordFile";
-	/**
-	 * Parameter to indicate whether text - dictionary entries for this class - should be normalized by completely
-	 * removing dashes, parenthesis, genitive 's and perhaps more. This is meant to replace the generation of term
-	 * variants and cannot be used together with variation generation. If this is switched on here, it must also be
-	 * switched on in the descriptor for the annotator itself!
-	 */
-	public final static String PARAM_NORMALIZE_TEXT = "NormalizeText";
-	/**
-	 * Parameter to indicate whether text - dictionary entries for this class - should be transliterated, i.e. whether
-	 * accents and other character variations should be stripped. If this is switched on here, it must also be switched
-	 * on in the descriptor of the annotator itself!
-	 */
-	public final static String PARAM_TRANSLITERATE_TEXT = "TransliterateText";
-
-	private Boolean generateVariants;
-	private Boolean caseSensitive;
-	private Boolean useApproximateMatching;
-	private Boolean transliterate;
-	private Boolean normalize;
-	private InputStream dictFile;
-	private InputStream stopFile;
-
-	private AbstractDictionary<String> dict;
-	private Chunker dictChunker = null;
-	private final double CHUNK_SCORE = 1.0;
-
-	private final int MIN_TERM_LENGTH = 3;
-	private final double APPROX_MATCH_THRESHOLD_SCORE = 100;
-	private Set<String> stopWords = new HashSet<String>();
-	private String stopwordFilePath;
+    public final static String PARAM_USE_APPROXIMATE_MATCHING = "UseApproximateMatching";
+    public final static String PARAM_CASE_SENSITIVE = "CaseSensitive";
+    public final static String PARAM_MAKE_VARIANTS = "MakeVariants";
+    public final static String PARAM_STOPWORD_FILE = "StopWordFile";
+    /**
+     * Parameter to indicate whether text - dictionary entries for this class - should be normalized by completely
+     * removing dashes, parenthesis, genitive 's and perhaps more. This is meant to replace the generation of term
+     * variants and cannot be used together with variation generation. If this is switched on here, it must also be
+     * switched on in the descriptor for the annotator itself!
+     */
+    public final static String PARAM_NORMALIZE_TEXT = "NormalizeText";
+    /**
+     * Only in effect when {@link #PARAM_NORMALIZE_TEXT} is set to <tt>true</tt>. If so, will normalize plurals
+     * found in the text by removing the training 's'. Requires annotations of the type {@link de.julielab.jcore.types.PennBioIEPOSTag}
+     * to be present in the CAS.
+     */
+    public static final String PARAM_NORMALIZE_PLURAL = "NormalizePlural";
+    /**
+     * Parameter to indicate whether text - dictionary entries for this class - should be transliterated, i.e. whether
+     * accents and other character variations should be stripped. If this is switched on here, it must also be switched
+     * on in the descriptor of the annotator itself!
+     */
+    public final static String PARAM_TRANSLITERATE_TEXT = "TransliterateText";
+    private static final Logger LOGGER = LoggerFactory.getLogger(ConfigurableChunkerProviderImplAlt.class);
+    private final double CHUNK_SCORE = 1.0;
+    private final int MIN_TERM_LENGTH = 3;
+    private final double APPROX_MATCH_THRESHOLD_SCORE = 100;
+    private Boolean generateVariants;
+    private Boolean caseSensitive;
+    private Boolean useApproximateMatching;
+    private Boolean transliterate;
+    private Boolean normalize;
+    private Boolean normalizePlural;
+    private InputStream dictFile;
+    private InputStream stopFile;
+    private AbstractDictionary<String> dict;
+    private Chunker dictChunker = null;
+    private Set<String> stopWords = new HashSet<String>();
+    private String stopwordFilePath;
     private URI resourceUri;
 
     public Chunker getChunker() {
-		return dictChunker;
-	}
+        return dictChunker;
+    }
 
-	public void load(DataResource resource) throws ResourceInitializationException {
+    public void load(DataResource resource) throws ResourceInitializationException {
 
         resourceUri = resource.getUri();
         LOGGER.info("Creating dictionary chunker with dictionary loaded from " + resourceUri);
 
-		ConfigurationParameterSettings settings = resource.getMetaData().getConfigurationParameterSettings();
-		stopwordFilePath = (String) settings.getParameterValue(PARAM_STOPWORD_FILE);
-		if (stopwordFilePath == null)
-			throw new ResourceInitializationException(ResourceInitializationException.CONFIG_SETTING_ABSENT,
-					new Object[] { PARAM_STOPWORD_FILE });
+        ConfigurationParameterSettings settings = resource.getMetaData().getConfigurationParameterSettings();
+        stopwordFilePath = (String) settings.getParameterValue(PARAM_STOPWORD_FILE);
+        if (stopwordFilePath == null)
+            throw new ResourceInitializationException(ResourceInitializationException.CONFIG_SETTING_ABSENT,
+                    new Object[]{PARAM_STOPWORD_FILE});
 
-		generateVariants = (Boolean) settings.getParameterValue(PARAM_MAKE_VARIANTS);
-		LOGGER.info("Generate variants: {}", generateVariants);
+        generateVariants = (Boolean) settings.getParameterValue(PARAM_MAKE_VARIANTS);
+        LOGGER.info("Generate variants: {}", generateVariants);
 
-		normalize = (Boolean) settings.getParameterValue(PARAM_NORMALIZE_TEXT);
-		LOGGER.info("Normalize dictionary entries (i.e. completely strip dashes, parenthesis etc): {}", normalize);
+        normalize = (Boolean) settings.getParameterValue(PARAM_NORMALIZE_TEXT);
+        LOGGER.info("Normalize dictionary entries (i.e. completely strip dashes, parenthesis etc): {}", normalize);
+        normalizePlural = Optional.ofNullable((Boolean) settings.getParameterValue(PARAM_NORMALIZE_PLURAL)).orElse(false) && normalize;
+        if (normalize)
+            LOGGER.info("Also normalize plural forms to singular: {}", normalizePlural);
 
-		transliterate = (Boolean) settings.getParameterValue(PARAM_TRANSLITERATE_TEXT);
-		LOGGER.info("Transliterate dictionary entries (i.e. transform accented characters to their base forms): {}",
-				transliterate);
+        transliterate = (Boolean) settings.getParameterValue(PARAM_TRANSLITERATE_TEXT);
+        LOGGER.info("Transliterate dictionary entries (i.e. transform accented characters to their base forms): {}",
+                transliterate);
 
-		caseSensitive = (Boolean) settings.getParameterValue(PARAM_CASE_SENSITIVE);
-		LOGGER.info("Case sensitive: {}", caseSensitive);
+        caseSensitive = (Boolean) settings.getParameterValue(PARAM_CASE_SENSITIVE);
+        LOGGER.info("Case sensitive: {}", caseSensitive);
 
-		useApproximateMatching = (Boolean) settings.getParameterValue(PARAM_USE_APPROXIMATE_MATCHING);
-		LOGGER.info("Use approximate matching: {}", useApproximateMatching);
+        useApproximateMatching = (Boolean) settings.getParameterValue(PARAM_USE_APPROXIMATE_MATCHING);
+        LOGGER.info("Use approximate matching: {}", useApproximateMatching);
 
-		if (normalize && generateVariants)
-			throw new ResourceInitializationException(
-					new IllegalStateException(
-							"MakeVariants and NormalizeText are both activated which is invalid. The two options work towards the same goal in two different ways, i.e. to recognize dictionary entry variants not given explicitly. However, the approaches are not compatible and you have to choose a single one."));
+        if (normalize && generateVariants)
+            throw new ResourceInitializationException(
+                    new IllegalStateException(
+                            "MakeVariants and NormalizeText are both activated which is invalid. The two options work towards the same goal in two different ways, i.e. to recognize dictionary entry variants not given explicitly. However, the approaches are not compatible and you have to choose a single one."));
 
 
-		try {
+        try {
             try {
                 dictFile = UriUtilities.getInputStreamFromUri(resource.getUri());
             } catch (Exception e) {
                 LOGGER.error("Could not load the dictionary from {}, see the following exception for details.", resource.getUri());
                 throw e;
             }
-			stopFile = readStreamFromFileSystemOrClassPath(stopwordFilePath);
-			initStopWords(stopFile);
-			readDictionary(dictFile);
-
-			LOGGER.info("Now creating chunker.");
-			long time = System.currentTimeMillis();
-			if (useApproximateMatching) {
-				final Set<Character> charsToDelete = new HashSet<>();
-				charsToDelete.add('-');
-				// charsToDelete.add('+');
-				// charsToDelete.add(',');
-				// charsToDelete.add('.');
-				// charsToDelete.add(':');
-				// charsToDelete.add(';');
-				// charsToDelete.add('?');
-				// charsToDelete.add('!');
-				// charsToDelete.add('*');
-				// charsToDelete.add('§');
-				// charsToDelete.add('$');
-				// charsToDelete.add('%');
-				// charsToDelete.add('&');
-				// charsToDelete.add('/');
-				// charsToDelete.add('\\');
-				// charsToDelete.add('(');
-				// charsToDelete.add(')');
-				// charsToDelete.add('<');
-				// charsToDelete.add('>');
-				// charsToDelete.add('[');
-				// charsToDelete.add(']');
-				// charsToDelete.add('=');
-				// charsToDelete.add('\'');
-				// charsToDelete.add('`');
-				// charsToDelete.add('´');
-				// charsToDelete.add('"');
-				// charsToDelete.add('#');
-
-				WeightedEditDistance editDistance = ApproxDictionaryChunker.TT_DISTANCE;
-				editDistance = new WeightedEditDistance() {
-
-					@Override
-					public double deleteWeight(char cDeleted) {
-						double ret;
-						if (cDeleted == '-')
-							ret = -5.0;
-						else if (cDeleted == ' ' || charsToDelete.contains(cDeleted))
-							ret = -10.0;
-						else
-							ret = -110.0;
-						return ret;
-					}
-
-					@Override
-					public double insertWeight(char cInserted) {
-						return deleteWeight(cInserted);
-					}
-
-					@Override
-					public double matchWeight(char cMatched) {
-						return 0.0;
-					}
-
-					@Override
-					public double substituteWeight(char cDeleted, char cInserted) {
-						if (cDeleted == ' ' && cInserted == '-')
-							return -2.0;
-						if (cDeleted == '-' && cInserted == ' ')
-							return -2.0;
-						if (cDeleted == ' ' && charsToDelete.contains(cInserted))
-							return -10.0;
-						if (charsToDelete.contains(cDeleted) && cInserted == ' ')
-							return -10.0;
-						return -110.0;
-					}
-
-					@Override
-					public double transposeWeight(char c1, char c2) {
-						return Double.NEGATIVE_INFINITY;
-					}
-				};
-
-				dictChunker =
-						new ApproxDictionaryChunker((TrieDictionary<String>) dict,
-								IndoEuropeanTokenizerFactory.INSTANCE, editDistance, APPROX_MATCH_THRESHOLD_SCORE);
-			} else {
-				dictChunker =
-						new ExactDictionaryChunker(dict, IndoEuropeanTokenizerFactory.INSTANCE, false, caseSensitive);
-			}
-			time = System.currentTimeMillis() - time;
-			LOGGER.info("Building the actual chunker from the dictionary took {}ms ({}s).", time, time / 1000);
-
-		} catch (Exception e) {
-			LOGGER.error("Exception while creating chunker instance from dictionary file {} with stopwords from {}", resource.getUri(), stopwordFilePath, e);
-		}
-	}
-
-	private void readDictionary(InputStream dictFileStream) throws IOException, AnalysisEngineProcessException {
-		long time = System.currentTimeMillis();
-		if (useApproximateMatching) {
-			dict = new TrieDictionary<String>();
-		} else {
-			dict = new MapDictionary<String>();
-		}
-		// now read from file and add entries
-		LOGGER.info("readDictionary() - adding entries from " + resourceUri.toString() + " to dictionary...");
-		BufferedReader bf = null;
-		try {
-			bf = new BufferedReader(new InputStreamReader(dictFileStream));
-			String line = "";
-
-			Transliterator transliterator = null;
-			if (transliterate)
-				transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
-
-			TokenizerFactory tokenizerFactory = null;
-			if (normalize)
-				tokenizerFactory = new IndoEuropeanTokenizerFactory();
-			while ((line = bf.readLine()) != null) {
-				String[] values = line.split("\t");
-				if (values.length != 2) {
-					LOGGER.error("readDictionary() - wrong format of line: " + line);
-					throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION, null);
-				}
-
-				String term = values[0].trim();
-
-				if (stopWords.contains(term.toLowerCase()))
-					continue;
-
-				if (normalize) {
-					term = StringNormalizerForChunking.normalizeString(term, tokenizerFactory).string;
-				}
-				if (transliterate)
-					term = transliterator.transform(term);
-				if (useApproximateMatching && !caseSensitive && !transliterate)
-					term = term.toLowerCase();
-
-				String label = values[1].trim();
-				if (term.length() < MIN_TERM_LENGTH)
-					continue;
-
-				if (generateVariants) {
-					if (true)
-						throw new NotImplementedException(
-								"In this alternative ChunkerProvider, generating variants will currently fail to adequately filter out stop words due to the transliteration and/or normalization algorithms. If you don't need those algorithms, just stick to the original ChunkerProviderImpl. Otherwise, this issue must be fixed (shouldnt be too difficult). Variants are also currently not treated with normalization/transliteration (but this is deemed to be two alternative ways to achieve a similar thing anyway)");
-				} else {
-					// This is a second stop-word-check but here the term has been transliterated and/or normalized. If
-					// somehow the result of this was a stop word, ignore it.
-					if (!stopWords.contains(term.toLowerCase()))
-						dict.addEntry(new DictionaryEntry<String>(term, label, CHUNK_SCORE));
-				}
-			}
-
-			time = System.currentTimeMillis() - time;
-			LOGGER.info("Reading dictionary took {}ms ({}s)", time, time / 1000);
-		} finally {
-			if (null != bf)
-				bf.close();
-		}
-	}
-
-	private void initStopWords(InputStream stopFileStream) throws IOException {
-		stopWords = new HashSet<String>();
-
-		LOGGER.info("readDictionary() - adding entries from " + stopwordFilePath + " to dictionary...");
-		BufferedReader bf = new BufferedReader(new InputStreamReader(stopFileStream));
-		String line = "";
-
-		try {
-			while ((line = bf.readLine()) != null) {
-				if (line.startsWith("#")) {
-					continue;
-				}
-				stopWords.add(line.trim().toLowerCase());
-			}
-			bf.close();
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-	}
-
-	@Override
-	public Set<String> getStopWords() {
-		return stopWords;
-	}
-
-	@Override
-	public boolean getUseApproximateMatching() {
-		return useApproximateMatching;
-	}
-
-	@Override
-	public boolean getNormalize() {
-		return normalize;
-	}
-
-	@Override
-	public boolean getTransliterate() {
-		return transliterate;
-	}
-
-	@Override
-	public boolean getCaseSensitive() {
-		return caseSensitive;
-
-	}
-
-	private InputStream readStreamFromFileSystemOrClassPath(String filePath) {
-		InputStream is = null;
-		File file = new File(filePath);
-		if (file.exists()) {
-			try {
-				is = new FileInputStream(file);
-			} catch (FileNotFoundException e) {
-				e.printStackTrace();
-			}
-		} else {
-			is = getClass().getResourceAsStream(filePath.startsWith("/") ? filePath : "/" + filePath);
-		}
-		if (filePath.endsWith(".gz") || filePath.endsWith(".gzip"))
-			try {
-				is = new GZIPInputStream(is);
-			} catch (IOException e) {
-				e.printStackTrace();
-			}
-		return is;
-	}
+            stopFile = readStreamFromFileSystemOrClassPath(stopwordFilePath);
+            initStopWords(stopFile);
+            readDictionary(dictFile);
+
+            LOGGER.info("Now creating chunker.");
+            long time = System.currentTimeMillis();
+            if (useApproximateMatching) {
+                final Set<Character> charsToDelete = new HashSet<>();
+                charsToDelete.add('-');
+                // charsToDelete.add('+');
+                // charsToDelete.add(',');
+                // charsToDelete.add('.');
+                // charsToDelete.add(':');
+                // charsToDelete.add(';');
+                // charsToDelete.add('?');
+                // charsToDelete.add('!');
+                // charsToDelete.add('*');
+                // charsToDelete.add('§');
+                // charsToDelete.add('$');
+                // charsToDelete.add('%');
+                // charsToDelete.add('&');
+                // charsToDelete.add('/');
+                // charsToDelete.add('\\');
+                // charsToDelete.add('(');
+                // charsToDelete.add(')');
+                // charsToDelete.add('<');
+                // charsToDelete.add('>');
+                // charsToDelete.add('[');
+                // charsToDelete.add(']');
+                // charsToDelete.add('=');
+                // charsToDelete.add('\'');
+                // charsToDelete.add('`');
+                // charsToDelete.add('´');
+                // charsToDelete.add('"');
+                // charsToDelete.add('#');
+
+                WeightedEditDistance editDistance = ApproxDictionaryChunker.TT_DISTANCE;
+                editDistance = new WeightedEditDistance() {
+
+                    @Override
+                    public double deleteWeight(char cDeleted) {
+                        double ret;
+                        if (cDeleted == '-')
+                            ret = -5.0;
+                        else if (cDeleted == ' ' || charsToDelete.contains(cDeleted))
+                            ret = -10.0;
+                        else
+                            ret = -110.0;
+                        return ret;
+                    }
+
+                    @Override
+                    public double insertWeight(char cInserted) {
+                        return deleteWeight(cInserted);
+                    }
+
+                    @Override
+                    public double matchWeight(char cMatched) {
+                        return 0.0;
+                    }
+
+                    @Override
+                    public double substituteWeight(char cDeleted, char cInserted) {
+                        if (cDeleted == ' ' && cInserted == '-')
+                            return -2.0;
+                        if (cDeleted == '-' && cInserted == ' ')
+                            return -2.0;
+                        if (cDeleted == ' ' && charsToDelete.contains(cInserted))
+                            return -10.0;
+                        if (charsToDelete.contains(cDeleted) && cInserted == ' ')
+                            return -10.0;
+                        return -110.0;
+                    }
+
+                    @Override
+                    public double transposeWeight(char c1, char c2) {
+                        return Double.NEGATIVE_INFINITY;
+                    }
+                };
+
+                dictChunker =
+                        new ApproxDictionaryChunker((TrieDictionary<String>) dict,
+                                IndoEuropeanTokenizerFactory.INSTANCE, editDistance, APPROX_MATCH_THRESHOLD_SCORE);
+            } else {
+                dictChunker =
+                        new ExactDictionaryChunker(dict, IndoEuropeanTokenizerFactory.INSTANCE, false, caseSensitive);
+            }
+            time = System.currentTimeMillis() - time;
+            LOGGER.info("Building the actual chunker from the dictionary took {}ms ({}s).", time, time / 1000);
+
+        } catch (Exception e) {
+            LOGGER.error("Exception while creating chunker instance from dictionary file {} with stopwords from {}", resource.getUri(), stopwordFilePath, e);
+        }
+    }
+
+    private void readDictionary(InputStream dictFileStream) throws IOException, AnalysisEngineProcessException {
+        long time = System.currentTimeMillis();
+        if (useApproximateMatching) {
+            dict = new TrieDictionary<String>();
+        } else {
+            dict = new MapDictionary<String>();
+        }
+        // now read from file and add entries
+        LOGGER.info("readDictionary() - adding entries from " + resourceUri.toString() + " to dictionary...");
+        BufferedReader bf = null;
+        try {
+            bf = new BufferedReader(new InputStreamReader(dictFileStream));
+            String line = "";
+
+            Transliterator transliterator = null;
+            if (transliterate)
+                transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
+
+            TokenizerFactory tokenizerFactory = null;
+            if (normalize)
+                tokenizerFactory = new IndoEuropeanTokenizerFactory();
+            while ((line = bf.readLine()) != null) {
+                String[] values = line.split("\t");
+                if (values.length != 2) {
+                    LOGGER.error("readDictionary() - wrong format of line: " + line);
+                    throw new AnalysisEngineProcessException(AnalysisEngineProcessException.ANNOTATOR_EXCEPTION, null);
+                }
+
+                String term = values[0].trim();
+
+                if (stopWords.contains(term.toLowerCase()))
+                    continue;
+
+                if (normalize) {
+                    term = StringNormalizerForChunking.normalizeString(term, tokenizerFactory).string;
+                }
+                if (transliterate)
+                    term = transliterator.transform(term);
+                if (useApproximateMatching && !caseSensitive && !transliterate)
+                    term = term.toLowerCase();
+
+                String label = values[1].trim();
+                if (term.length() < MIN_TERM_LENGTH)
+                    continue;
+
+                if (generateVariants) {
+                    if (true)
+                        throw new NotImplementedException(
+                                "In this alternative ChunkerProvider, generating variants will currently fail to adequately filter out stop words due to the transliteration and/or normalization algorithms. If you don't need those algorithms, just stick to the original ChunkerProviderImpl. Otherwise, this issue must be fixed (shouldnt be too difficult). Variants are also currently not treated with normalization/transliteration (but this is deemed to be two alternative ways to achieve a similar thing anyway)");
+                } else {
+                    // This is a second stop-word-check but here the term has been transliterated and/or normalized. If
+                    // somehow the result of this was a stop word, ignore it.
+                    if (!stopWords.contains(term.toLowerCase()))
+                        dict.addEntry(new DictionaryEntry<String>(term, label, CHUNK_SCORE));
+                }
+            }
+
+            time = System.currentTimeMillis() - time;
+            LOGGER.info("Reading dictionary took {}ms ({}s)", time, time / 1000);
+        } finally {
+            if (null != bf)
+                bf.close();
+        }
+    }
+
+    private void initStopWords(InputStream stopFileStream) throws IOException {
+        stopWords = new HashSet<String>();
+
+        LOGGER.info("readDictionary() - adding entries from " + stopwordFilePath + " to dictionary...");
+        BufferedReader bf = new BufferedReader(new InputStreamReader(stopFileStream));
+        String line = "";
+
+        try {
+            while ((line = bf.readLine()) != null) {
+                if (line.startsWith("#")) {
+                    continue;
+                }
+                stopWords.add(line.trim().toLowerCase());
+            }
+            bf.close();
+        } catch (IOException e) {
+            e.printStackTrace();
+        }
+    }
+
+    @Override
+    public Set<String> getStopWords() {
+        return stopWords;
+    }
+
+    @Override
+    public boolean getUseApproximateMatching() {
+        return useApproximateMatching;
+    }
+
+    @Override
+    public boolean getNormalize() {
+        return normalize;
+    }
+
+    @Override
+    public boolean getNormalizePlural() {
+        return normalizePlural;
+    }
+
+    @Override
+    public boolean getTransliterate() {
+        return transliterate;
+    }
+
+    @Override
+    public boolean getCaseSensitive() {
+        return caseSensitive;
+
+    }
+
+    private InputStream readStreamFromFileSystemOrClassPath(String filePath) {
+        InputStream is = null;
+        File file = new File(filePath);
+        if (file.exists()) {
+            try {
+                is = new FileInputStream(file);
+            } catch (FileNotFoundException e) {
+                e.printStackTrace();
+            }
+        } else {
+            is = getClass().getResourceAsStream(filePath.startsWith("/") ? filePath : "/" + filePath);
+        }
+        if (filePath.endsWith(".gz") || filePath.endsWith(".gzip"))
+            try {
+                is = new GZIPInputStream(is);
+            } catch (IOException e) {
+                e.printStackTrace();
+            }
+        return is;
+    }
 }
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index 1a9220007..afec25926 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -25,6 +25,7 @@
 import com.aliasi.tokenizer.IndoEuropeanTokenizerFactory;
 import com.aliasi.tokenizer.TokenizerFactory;
 import com.ibm.icu.text.Transliterator;
+import de.julielab.java.utilities.spanutils.OffsetSet;
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.ChunkerProvider;
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.OverlappingChunk;
 import de.julielab.jcore.ae.lingpipegazetteer.utils.StringNormalizerForChunking;
@@ -32,12 +33,14 @@
 import de.julielab.jcore.types.Abbreviation;
 import de.julielab.jcore.types.AbbreviationLongform;
 import de.julielab.jcore.types.ConceptMention;
+import de.julielab.jcore.types.PennBioIEPOSTag;
 import de.julielab.jcore.types.mantra.Entity;
 import de.julielab.jcore.utility.JCoReAnnotationTools;
 import de.julielab.jcore.utility.index.IndexTermGenerator;
 import de.julielab.jcore.utility.index.JCoReHashMapAnnotationIndex;
 import de.julielab.jcore.utility.index.TermGenerators;
 import de.julielab.jcore.utility.index.TermGenerators.LongOffsetIndexTermGenerator;
+import org.apache.commons.lang3.Range;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -53,6 +56,8 @@
 import org.slf4j.LoggerFactory;
 
 import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
 
 public class GazetteerAnnotator extends JCasAnnotator_ImplBase {
 
@@ -213,52 +218,6 @@ public int compare(Chunk o1, Chunk o2) {
         return overlappingChunks;
     }
 
-    // enum ParenthesesType {
-    // ROUND_CLOSED {
-    // @Override
-    // boolean isOpen() {
-    // return false;
-    // }
-    //
-    // },
-    // BRACKET_CLOSED {
-    // @Override
-    // boolean isOpen() {
-    // return false;
-    // }
-    // },
-    // CURLY_CLOSED {
-    // @Override
-    // boolean isOpen() {
-    // return false;
-    // }
-    //
-    // },
-    // ROUND_OPENED {
-    // @Override
-    // boolean isOpen() {
-    // return true;
-    // }
-    // },
-    // BRACKET_OPENED {
-    // @Override
-    // boolean isOpen() {
-    // return true;
-    // }
-    // },
-    // CURLY_OPENED {
-    // @Override
-    // boolean isOpen() {
-    // return true;
-    // }
-    // };
-    // abstract boolean isOpen();
-    //
-    // boolean isClose() {
-    // return !isOpen();
-    // };
-    // }
-
     public void initialize(UimaContext aContext) throws ResourceInitializationException {
         LOGGER.info("calls to initialize: " + initializeCount);
 
@@ -314,8 +273,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
                 checkAcronyms);
         // filter stop words
 
-        Boolean normalizeBoolean = provider.getNormalize();// (Boolean)
-        // aContext.getConfigParameterValue(PARAM_NORMALIZE_TEXT);
+        Boolean normalizeBoolean = provider.getNormalize();
         if (normalizeBoolean) {
             normalizationTokenFactory = new IndoEuropeanTokenizerFactory();
         }
@@ -357,8 +315,14 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             docText = transliterator.transform(docText);
         NormalizedString normalizedDocText = null;
         if (provider.getNormalize()) {
-            normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory,
-                    transliterator);
+            boolean hasPosTags = aJCas.getAnnotationIndex(PennBioIEPOSTag.type).iterator().hasNext();
+            if (provider.getNormalizePlural()) {
+                OffsetSet pluralOffsets = StreamSupport.stream(Spliterators.spliterator(aJCas.<PennBioIEPOSTag>getAnnotationIndex(PennBioIEPOSTag.type).iterator(), 0, 0), false).filter(tag -> tag.getValue().equals("NNS")).map(tag -> Range.between(tag.getBegin(), tag.getEnd())).collect(Collectors.toCollection(OffsetSet::new));
+                normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory, true, pluralOffsets, transliterator);
+            }else {
+                normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory,
+                        transliterator);
+            }
         }
 
         IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
index e51c41eb9..a081858fd 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
@@ -1,8 +1,11 @@
 package de.julielab.jcore.ae.lingpipegazetteer.utils;
 
+import com.aliasi.tokenizer.PorterStemmerTokenizerFactory;
 import com.aliasi.tokenizer.Tokenizer;
 import com.aliasi.tokenizer.TokenizerFactory;
 import com.ibm.icu.text.Transliterator;
+import de.julielab.java.utilities.spanutils.OffsetSet;
+import org.apache.commons.lang3.Range;
 
 import java.util.*;
 
@@ -88,10 +91,10 @@ public static NormalizedString normalizeString(String str) {
      * @param tokenizerFactory
      * @return
      */
-    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory,
+    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory, boolean normalizePlural, OffsetSet pluralPositions,
                                                    Transliterator transliterator) {
-        // boolean stemming = tokenizerFactory instanceof
-        // PorterStemmerTokenizerFactory;
+        boolean stemming = tokenizerFactory instanceof
+                PorterStemmerTokenizerFactory;
 
         NormalizedString ns = new NormalizedString();
 
@@ -141,8 +144,10 @@ public static NormalizedString normalizeString(String str, TokenizerFactory toke
                 if (transliterator != null)
                     token = transliterator.transform(token);
                 // plural s, only when no stemming is done
-                // if (!stemming && token.endsWith("s"))
-                // token = token.substring(0, token.length() - 1);
+                // an even better normalization would be to use the lemma, of course
+                Range<Integer> tokenOffsets = Range.between(tokenizer.lastTokenStartPosition(), tokenizer.lastTokenEndPosition());
+                if (normalizePlural && !stemming && token.endsWith("s") && pluralPositions.locate(tokenOffsets).isOverlappedBy(tokenOffsets))
+                    token = token.substring(0, token.length() - 1);
                 sb.append(token);
                 int newStartOffset = sb.length() - token.length();
                 int newEndOffset = sb.length();
@@ -162,8 +167,16 @@ private static int sumOfStack(Deque<String> stack) {
         return sum;
     }
 
+    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory, Transliterator transliterator) {
+        return normalizeString(str, tokenizerFactory, false, null, transliterator);
+    }
+
     public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory) {
-        return normalizeString(str, tokenizerFactory, null);
+        return normalizeString(str, tokenizerFactory, false, null, null);
+    }
+
+    public static NormalizedString normalizeString(String str, boolean normalizePlural, OffsetSet pluralPositions, TokenizerFactory tokenizerFactory) {
+        return normalizeString(str, tokenizerFactory, normalizePlural, pluralPositions, null);
     }
 
     public enum Mode {
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
index fef412a2e..a1bbadf8c 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
@@ -5,11 +5,14 @@
 import com.aliasi.tokenizer.PorterStemmerTokenizerFactory;
 import com.aliasi.tokenizer.TokenizerFactory;
 import com.ibm.icu.text.Transliterator;
+import de.julielab.java.utilities.spanutils.OffsetSet;
 import de.julielab.jcore.ae.lingpipegazetteer.utils.StringNormalizerForChunking;
 import de.julielab.jcore.ae.lingpipegazetteer.utils.StringNormalizerForChunking.NormalizedString;
-import org.junit.Ignore;
+import org.apache.commons.lang3.Range;
 import org.junit.Test;
 
+import java.util.List;
+
 import static org.junit.Assert.*;
 
 public class StringNormalizerForChunkingTest {
@@ -154,16 +157,11 @@ public void testNewlines() {
 	}
 
 	@Test
-	@Ignore
-	/**
-	 * Ignored because the plural ignore introduced too much errors on test data
-	 * so it was removed from the algorithm.
-	 */
 	public void testNormalizePlural() {
 		String str;
 		str = "glutathione transferases are evil";
 		TokenizerFactory tokenizerFactory = new IndoEuropeanTokenizerFactory();
-		NormalizedString ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
+		NormalizedString ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, true, new OffsetSet(List.of(Range.between(12, 24))), null);
 		assertEquals("glutathione transferase are evil", ns.string);
 	}
 }

From 1e3472eab5a0c02c73b70fe0f7cf3c5d10ab29e3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 09:43:37 +0200
Subject: [PATCH 044/269] The stop words given to the chunker provider are now
 also used by the annotator to filter chunks (this was previously a hard-coded
 list of words).

---
 .../uima/GazetteerAnnotator.java              |  70 ++--
 .../resources/normalizegazetteer.properties   |   3 +-
 .../test/resources/reducedStopWordList.txt    | 320 ++++++++++++++++++
 3 files changed, 357 insertions(+), 36 deletions(-)
 create mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/reducedStopWordList.txt

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index afec25926..539c0a918 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -227,41 +227,41 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         try {
             provider = (ChunkerProvider) getContext().getResourceObject(CHUNKER_RESOURCE_NAME);
             gazetteer = provider.getChunker();
-//			stopWords = provider.getStopWords();
-            String[] stopwordArray = {"a", "about", "above", "across", "after", "afterwards", "again", "against",
-                    "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among",
-                    "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything",
-                    "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become",
-                    "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside",
-                    "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot",
-                    "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do",
-                    "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere",
-                    "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except",
-                    "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly",
-                    "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has",
-                    "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
-                    "herself", "high", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in",
-                    "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
-                    "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill",
-                    "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name",
-                    "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
-                    "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only",
-                    "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
-                    "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed",
-                    "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere",
-                    "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes",
-                    "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them",
-                    "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein",
-                    "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three",
-                    "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards",
-                    "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we",
-                    "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas",
-                    "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who",
-                    "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet",
-                    "you", "your", "yours", "yourself", "yourselves",};
-            stopWords = new HashSet<>();
-            for (String sw : stopwordArray)
-                stopWords.add(sw);
+			stopWords = provider.getStopWords();
+//            String[] stopwordArray = {"a", "about", "above", "across", "after", "afterwards", "again", "against",
+//                    "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among",
+//                    "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything",
+//                    "anyway", "anywhere", "are", "around", "as", "at", "back", "be", "became", "because", "become",
+//                    "becomes", "becoming", "been", "before", "beforehand", "behind", "being", "below", "beside",
+//                    "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "cannot",
+//                    "cant", "co", "computer", "con", "could", "couldnt", "cry", "de", "describe", "detail", "do",
+//                    "done", "down", "due", "during", "each", "eg", "eight", "either", "eleven", "else", "elsewhere",
+//                    "empty", "enough", "etc", "even", "ever", "every", "everyone", "everything", "everywhere", "except",
+//                    "few", "fifteen", "fify", "fill", "find", "fire", "first", "five", "for", "former", "formerly",
+//                    "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", "has",
+//                    "hasnt", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers",
+//                    "herself", "high", "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in",
+//                    "inc", "indeed", "interest", "into", "is", "it", "its", "itself", "keep", "last", "latter",
+//                    "latterly", "least", "less", "ltd", "made", "many", "may", "me", "meanwhile", "might", "mill",
+//                    "mine", "more", "moreover", "most", "mostly", "move", "much", "must", "my", "myself", "name",
+//                    "namely", "neither", "never", "nevertheless", "next", "nine", "no", "nobody", "none", "noone",
+//                    "nor", "not", "nothing", "now", "nowhere", "of", "off", "often", "on", "once", "one", "only",
+//                    "onto", "or", "other", "others", "otherwise", "our", "ours", "ourselves", "out", "over", "own",
+//                    "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", "seem", "seemed",
+//                    "seeming", "seems", "serious", "several", "she", "should", "show", "side", "since", "sincere",
+//                    "six", "sixty", "so", "some", "somehow", "someone", "something", "sometime", "sometimes",
+//                    "somewhere", "still", "such", "system", "take", "ten", "than", "that", "the", "their", "them",
+//                    "themselves", "then", "thence", "there", "thereafter", "thereby", "therefore", "therein",
+//                    "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", "three",
+//                    "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards",
+//                    "twelve", "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we",
+//                    "well", "were", "what", "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas",
+//                    "whereby", "wherein", "whereupon", "wherever", "whether", "which", "while", "whither", "who",
+//                    "whoever", "whole", "whom", "whose", "why", "will", "with", "within", "without", "would", "yet",
+//                    "you", "your", "yours", "yourself", "yourselves",};
+//            stopWords = new HashSet<>();
+//            for (String sw : stopwordArray)
+//                stopWords.add(sw);
         } catch (ResourceAccessException e) {
             LOGGER.error("Exception while initializing", e);
         }
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.properties b/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.properties
index 88c7883d4..91ac661e7 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.properties
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.properties
@@ -1,5 +1,6 @@
 DictionaryFile=src/test/resources/dictionary.tst
-StopWordFile=src/test/resources/general_english_words
+#StopWordFile=src/test/resources/general_english_words
+StopWordFile=src/test/resources/reducedStopWordList.txt
 NormalizeText=true
 UseApproximateMatching=true
 MakeVariants=false
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/reducedStopWordList.txt b/jcore-lingpipegazetteer-ae/src/test/resources/reducedStopWordList.txt
new file mode 100644
index 000000000..b0385b7e1
--- /dev/null
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/reducedStopWordList.txt
@@ -0,0 +1,320 @@
+about
+above
+across
+after
+afterwards
+again
+against
+almost
+alone
+along
+already
+also
+although
+always
+am
+among
+amoungst
+amount
+an
+and
+another
+any
+anyhow
+anyone
+anything
+anywhere
+are
+around
+as
+at
+back
+be
+became
+because
+become
+becoming
+been
+before
+beforehand
+behind
+being
+below
+beside
+between
+beyond
+bill
+both
+bottom
+but
+by
+call
+can
+cannot
+co
+computer
+con
+could
+couldnt
+cry
+de
+describe
+detail
+do
+down
+due
+during
+each
+eg
+eight
+either
+eleven
+else
+elsewhere
+enough
+etc
+even
+ever
+every
+everyone
+everything
+everywhere
+except
+fifteen
+fify
+fill
+find
+fire
+first
+five
+for
+former
+formerly
+found
+four
+from
+front
+full
+further
+get
+give
+go
+had
+has
+have
+he
+hence
+her
+here
+hereafter
+hereby
+herein
+hereupon
+hers
+high
+him
+himself
+his
+how
+however
+hundred
+i
+ie
+if
+in
+indeed
+interest
+into
+is
+it
+its
+itself
+keep
+last
+latter
+least
+less
+ltd
+made
+many
+may
+me
+meanwhile
+might
+mill
+more
+moreover
+most
+mostly
+move
+much
+must
+my
+myself
+name
+neither
+never
+nevertheless
+next
+nine
+no
+nobody
+none
+noone
+not
+nothing
+now
+nowhere
+of
+off
+often
+on
+once
+one
+only
+or
+other
+others
+otherwise
+our
+ours
+ourselves
+out
+over
+own
+per
+perhaps
+please
+put
+rather
+re
+same
+see
+seem
+seemed
+seems
+serious
+several
+she
+should
+show
+side
+since
+sincere
+sixty
+so
+some
+somehow
+someone
+something
+sometime
+sometimes
+still
+such
+system
+take
+ten
+than
+that
+the
+their
+them
+then
+thence
+there
+thereafter
+thereby
+therefore
+therein
+these
+they
+thick
+thin
+third
+this
+those
+though
+three
+throughout
+thru
+thus
+to
+together
+too
+top
+toward
+towards
+twenty
+two
+un
+under
+until
+up
+upon
+us
+very
+via
+was
+we
+were
+what
+whatever
+when
+whence
+whenever
+where
+whereafter
+whereas
+wherein
+whereupon
+wherever
+whether
+which
+while
+whither
+who
+whole
+whom
+whose
+why
+will
+with
+within
+without
+would
+yet
+your
+yours
+yourself
+yourselves
+a
+all
+amongst
+anyway
+becomes
+besides
+cant
+done
+empty
+few
+forty
+hasnt
+herself
+inc
+latterly
+mine
+namely
+noer
+onto
+part
+seeming
+six
+somewhere
+themselves
+thereupon
+through
+twelve
+well
+whereby
+whoever
+you

From 5b9aef6cf2b10dca5dcd17dec285686dd25a263f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 10:17:44 +0200
Subject: [PATCH 045/269] Travis build hangs after the tests. Removing
 multithreading to check if that's the cause.

---
 .travis.yml                                   |  2 +-
 .../uima/GazetteerAnnotatorTest.java          | 28 +++++++++++++++++--
 .../src/test/resources/normalizePlural.dict   |  1 +
 .../normalizepluralgazetteer.properties       |  9 ++++++
 4 files changed, 36 insertions(+), 4 deletions(-)
 create mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict
 create mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties

diff --git a/.travis.yml b/.travis.yml
index 172756b0e..3b3b4c4e0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ before_install:
   - #./travis-deployment/install-flair-nightly.sh
   - export BOTO_CONFIG=/dev/null
 install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V
-script: mvn -T 1C test -B
+script: mvn test -B
 
 cache:
   directories:
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 7134ae3e7..556b4f0ee 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -30,6 +30,7 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.ExternalResourceFactory;
 import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.resource.ExternalResourceDescription;
@@ -45,10 +46,8 @@
 import org.xml.sax.SAXException;
 
 import java.io.*;
-import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
-import java.util.Set;
+import java.util.*;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.assertj.core.api.Assertions.assertThat;
@@ -359,6 +358,29 @@ public void testAnnotatorWithTextNormalization()
 
 	}
 
+	@Test
+	public void testAnnotatorWithPluralNormalization()
+			throws ResourceInitializationException, AnalysisEngineProcessException {
+		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
+				ChunkerProviderImplAlt.class, new File("src/test/resources/normalizepluralgazetteer.properties"));
+		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
+				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
+
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
+				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
+				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
+		JCas jCas = annotator.newJCas();
+
+		jCas.setDocumentText("High-density lipoprotein (HDL) is one of the five major groups of lipoproteins.");
+		PennBioIEPOSTag tag = new PennBioIEPOSTag(jCas, 74, 86);
+		tag.setValue("NNS");
+		tag.addToIndexes();
+		annotator.process(jCas);
+
+		Collection<EntityMention> entityMentions = JCasUtil.select(jCas, EntityMention.class);
+		assertEquals("Expected a single entity", 1, entityMentions.size());
+	}
+
 	@Test
 	public void testAnnotateAcronymsWithFullFormEntity() throws Exception {
 		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict b/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict
new file mode 100644
index 000000000..713dbb370
--- /dev/null
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict
@@ -0,0 +1 @@
+lipoproteins	Group
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties b/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties
new file mode 100644
index 000000000..2100ebeaf
--- /dev/null
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties
@@ -0,0 +1,9 @@
+DictionaryFile=src/test/resources/dictionary.tst
+#StopWordFile=src/test/resources/general_english_words
+StopWordFile=src/test/resources/reducedStopWordList.txt
+NormalizeText=true
+NormalizePlural=true
+UseApproximateMatching=true
+MakeVariants=false
+CaseSensitive=false
+

From c32844dbc5d0e2f927cea05479afee81f3faf8df Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 10:25:29 +0200
Subject: [PATCH 046/269] Added a test for the plural normalization.

---
 .../lingpipegazetteer/utils/StringNormalizerForChunking.java | 2 +-
 .../ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java    | 5 ++++-
 .../src/test/resources/normalizePlural.dict                  | 2 +-
 .../src/test/resources/normalizepluralgazetteer.properties   | 3 +--
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
index a081858fd..e1c997196 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
@@ -146,7 +146,7 @@ public static NormalizedString normalizeString(String str, TokenizerFactory toke
                 // plural s, only when no stemming is done
                 // an even better normalization would be to use the lemma, of course
                 Range<Integer> tokenOffsets = Range.between(tokenizer.lastTokenStartPosition(), tokenizer.lastTokenEndPosition());
-                if (normalizePlural && !stemming && token.endsWith("s") && pluralPositions.locate(tokenOffsets).isOverlappedBy(tokenOffsets))
+                if (normalizePlural && !stemming && token.endsWith("s") && !pluralPositions.isEmpty() && pluralPositions.locate(tokenOffsets).isOverlappedBy(tokenOffsets))
                     token = token.substring(0, token.length() - 1);
                 sb.append(token);
                 int newStartOffset = sb.length() - token.length();
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 556b4f0ee..f7b1a6e8f 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -378,7 +378,10 @@ public void testAnnotatorWithPluralNormalization()
 		annotator.process(jCas);
 
 		Collection<EntityMention> entityMentions = JCasUtil.select(jCas, EntityMention.class);
-		assertEquals("Expected a single entity", 1, entityMentions.size());
+		assertEquals("Expected a single entity", 2, entityMentions.size());
+		Iterator<EntityMention> iterator = entityMentions.iterator();
+		assertEquals("Unexpected covered entity text", "lipoprotein", iterator.next().getCoveredText());
+		assertEquals("Unexpected covered entity text", "lipoproteins", iterator.next().getCoveredText());
 	}
 
 	@Test
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict b/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict
index 713dbb370..a59e0435f 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/normalizePlural.dict
@@ -1 +1 @@
-lipoproteins	Group
+lipoprotein	Group
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties b/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties
index 2100ebeaf..025fd2fa7 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/normalizepluralgazetteer.properties
@@ -1,5 +1,4 @@
-DictionaryFile=src/test/resources/dictionary.tst
-#StopWordFile=src/test/resources/general_english_words
+DictionaryFile=src/test/resources/normalizePlural.dict
 StopWordFile=src/test/resources/reducedStopWordList.txt
 NormalizeText=true
 NormalizePlural=true

From bbbae04639257793bea92172f6b7c46b7c0333b9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 10:51:09 +0200
Subject: [PATCH 047/269] The hanging build was due to the flair NER component
 having an error. This was probably because torch 1.7 has issues with python
 3.6. So here we bumped python to 3.7.

---
 .travis.yml                                          |  6 +++---
 ...e-lingpipe-gazetteer-ae-configurable-resource.xml | 12 ++++++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 3b3b4c4e0..51738bd09 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,8 +6,8 @@ addons:
     sources:
       - deadsnakes
     packages:
-      - python3.6
-      - python3.6-dev
+      - python3.7
+      - python3.7-dev
 
 env:
   global:
@@ -35,7 +35,7 @@ before_install:
   - #./travis-deployment/install-flair-nightly.sh
   - export BOTO_CONFIG=/dev/null
 install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V
-script: mvn test -B
+script: mvn -T 2C test -B
 
 cache:
   directories:
diff --git a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
index 16a94eb70..c070abd9e 100644
--- a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
+++ b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
@@ -108,6 +108,12 @@
                                 <multiValued>false</multiValued>
                                 <mandatory>true</mandatory>
                             </configurationParameter>
+                            <configurationParameter>
+                                <name>NormalizePlural</name>
+                                <type>Boolean</type>
+                                <multiValued>false</multiValued>
+                                <mandatory>true</mandatory>
+                            </configurationParameter>
                             <configurationParameter>
                                 <name>TransliterateText</name>
                                 <type>Boolean</type>
@@ -152,6 +158,12 @@
                                     <boolean>true</boolean>
                                 </value>
                             </nameValuePair>
+                            <nameValuePair>
+                                <name>NormalizePlural</name>
+                                <value>
+                                    <boolean>false</boolean>
+                                </value>
+                            </nameValuePair>
                             <nameValuePair>
                                 <name>TransliterateText</name>
                                 <value>

From 49472f4874c6fb7893014a151248e12c846d98d2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 10:54:28 +0200
Subject: [PATCH 048/269] Fixing the python version in the PYTHON variable in
 .travis.yml.

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 51738bd09..f376d31aa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -19,7 +19,7 @@ env:
   - # GPG_KEY_NAME
   - secure: pxYxmA/9xS/9DO6rUAhlbAtYQMmG633jSwG8OIVCnnoQSXS4UILJgNl7Q6dQsAuT27tk+/fin0kXTnxWqCe0URb3c3XgNQwfGAuz1JIYVPHvezoDQLLRQA6LRgqd7GuvBDsyXJvBANozGKJYJVfoeT9gqFosFuMdRZ88eQm+ltX7zVKyMiz2rqKYPoSFInNxDGMOaIQ+RZdf8ai8rLY3E11PxsMC0LgypEDbuC7d9Q+Tu89YfUeuRly0hAuxmW++RrMgeeAs/7BndmZqcHVpkrcX6Drq8nZ2cj0ev4IDJelV/Nd17Vjfg7HgfJ4/d9S+PCg4KhvOY/y9Xad8geIIzXLFD9ZgcaK7MT9+BFGYXj7ExizFSc+Ico5Q822RJA1XZWfc/EgnY+7jEZCCMz/ceHx8oSh0ce1VbPl7c+O+jMXUMQC69Gpys57XC48rdPn0bbjc4/jpSOq46Xv7YdcGuA2BcWEEeQ0WAbi9IDcevpCXiZ7kng5hHTCpfaYVhn63KAIAMKf7tu6C78wFZR63F8Gf4x/jKE37QqvHV3uOzD7ar6nTAuy/ukZK0p4zyeIYe25PnS9K4kpolT1I12i7/l/7MO9NPFdB0aOCBHUNPBEkifwceltX6RP4PDIKdtCEQ4vcqrRNvhtAhO9Vo1udkyaeFx5swbY3j11CjzcfrBE=
   - # GPG_PASSPHRASE
-  - PYTHON=/usr/bin/python3.6
+  - PYTHON=/usr/bin/python3.7
 
 before_install:
   - |

From 4669d5e2d267307b346fc668d02cf3cebc56e87a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 11:23:32 +0200
Subject: [PATCH 049/269] The original error with flair seems fixed, still the
 build hangs again. Removing multithreading once more to find the cause.

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f376d31aa..d15ad0a30 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ before_install:
   - #./travis-deployment/install-flair-nightly.sh
   - export BOTO_CONFIG=/dev/null
 install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V
-script: mvn -T 2C test -B
+script: mvn test -B
 
 cache:
   directories:

From 943c6cfe941c25f18ef33955bd1b8b348c2dc184 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 11:52:46 +0200
Subject: [PATCH 050/269] Turns out, still had a flair issue because now torch
 1.8 was used which changed something with LSTMs. Now explicitly installing
 flair 0.6.1 and torch 1.7.1.

---
 .travis.yml                  | 2 +-
 jcore-flair-ner-ae/README.md | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index d15ad0a30..2a924b36b 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -31,7 +31,7 @@ before_install:
     if ! find "$HOME/pip-cache" -mindepth 1 -print -quit 2>/dev/null | grep -q .; then
       $PYTHON -m pip download --destination-directory="$HOME/pip-cache" flair
     fi
-    sudo -H $PYTHON -m pip install --find-links="$HOME/pip-cache" flair==0.6.1
+    sudo -H $PYTHON -m pip install --find-links="$HOME/pip-cache" flair==0.6.1 torch==1.7.1
   - #./travis-deployment/install-flair-nightly.sh
   - export BOTO_CONFIG=/dev/null
 install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V
diff --git a/jcore-flair-ner-ae/README.md b/jcore-flair-ner-ae/README.md
index a06e8a4d7..69d4b0ee0 100644
--- a/jcore-flair-ner-ae/README.md
+++ b/jcore-flair-ner-ae/README.md
@@ -12,6 +12,8 @@ The python executable lookup works as follows:
 2. Otherwise, if the environment variable `PYTHON` is set, this value is used.
 3. Otherwise, the `python` command is used.
 
+Tested with flair 0.6.1 and PyTorch 1.7.1.
+
 **1. Parameters**
 
 | Parameter Name | Parameter Type | Mandatory | Multivalued | Description |

From dcea03bc951f992bfe2c3862ada5732e6a8eb4d9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 12:21:37 +0200
Subject: [PATCH 051/269] Travis build passed, enabling multithreading again.

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 2a924b36b..bce762cc1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ before_install:
   - #./travis-deployment/install-flair-nightly.sh
   - export BOTO_CONFIG=/dev/null
 install: mvn install -DskipTests=true -Dmaven.javadoc.skip=true -B -V
-script: mvn test -B
+script: mvn -T 2C test -B
 
 cache:
   directories:

From 9bbba6a38ac40576624c7f19264574c375a66465 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Apr 2021 14:36:37 +0200
Subject: [PATCH 052/269] NPE check

---
 .../jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java     | 1 -
 .../ae/lingpipegazetteer/utils/StringNormalizerForChunking.java | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index 539c0a918..e663228b3 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -315,7 +315,6 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             docText = transliterator.transform(docText);
         NormalizedString normalizedDocText = null;
         if (provider.getNormalize()) {
-            boolean hasPosTags = aJCas.getAnnotationIndex(PennBioIEPOSTag.type).iterator().hasNext();
             if (provider.getNormalizePlural()) {
                 OffsetSet pluralOffsets = StreamSupport.stream(Spliterators.spliterator(aJCas.<PennBioIEPOSTag>getAnnotationIndex(PennBioIEPOSTag.type).iterator(), 0, 0), false).filter(tag -> tag.getValue().equals("NNS")).map(tag -> Range.between(tag.getBegin(), tag.getEnd())).collect(Collectors.toCollection(OffsetSet::new));
                 normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory, true, pluralOffsets, transliterator);
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
index e1c997196..9e50f845a 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
@@ -146,7 +146,7 @@ public static NormalizedString normalizeString(String str, TokenizerFactory toke
                 // plural s, only when no stemming is done
                 // an even better normalization would be to use the lemma, of course
                 Range<Integer> tokenOffsets = Range.between(tokenizer.lastTokenStartPosition(), tokenizer.lastTokenEndPosition());
-                if (normalizePlural && !stemming && token.endsWith("s") && !pluralPositions.isEmpty() && pluralPositions.locate(tokenOffsets).isOverlappedBy(tokenOffsets))
+                if (normalizePlural && !stemming && token.endsWith("s") && pluralPositions != null && !pluralPositions.isEmpty() && pluralPositions.locate(tokenOffsets).isOverlappedBy(tokenOffsets))
                     token = token.substring(0, token.length() - 1);
                 sb.append(token);
                 int newStartOffset = sb.length() - token.length();

From a41c662a1ea88b6492aebdb94bc52cfa9c73174d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 4 May 2021 14:31:50 +0200
Subject: [PATCH 053/269] Emergency fix of invalid offsets. Not fixing the
 actual issue (multi byte encoding) but just avoiding offsets outside of the
 range of the document text.

---
 .../lingpipegazetteer/uima/GazetteerAnnotator.java |  7 +++++--
 .../utils/StringNormalizerForChunking.java         | 14 ++++++++++++--
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index e663228b3..35e02f576 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -532,8 +532,11 @@ private void add2Cas(JCas aJCas, Chunk chunk, NormalizedString normalizedDocText
             return;
         }
 
-        int start = provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.start()) : chunk.start();
-        int end = provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.end()) : chunk.end();
+        // The Math.min(, Math.max(0, )) application is a security measure. I rare cases they are issues with multi
+        // byte character encodings. This security measure won't correct the underlying error but avoid errors
+        // due to invalid offsets.
+        int start = Math.min(aJCas.getDocumentText().length(), Math.max(0, provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.start()) : chunk.start()));
+        int end = Math.min(aJCas.getDocumentText().length(), Math.max(0,provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.end()) : chunk.end()));
 
         try {
             if (mantraMode) {
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
index 9e50f845a..b12b5de39 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
@@ -146,8 +146,18 @@ public static NormalizedString normalizeString(String str, TokenizerFactory toke
                 // plural s, only when no stemming is done
                 // an even better normalization would be to use the lemma, of course
                 Range<Integer> tokenOffsets = Range.between(tokenizer.lastTokenStartPosition(), tokenizer.lastTokenEndPosition());
-                if (normalizePlural && !stemming && token.endsWith("s") && pluralPositions != null && !pluralPositions.isEmpty() && pluralPositions.locate(tokenOffsets).isOverlappedBy(tokenOffsets))
-                    token = token.substring(0, token.length() - 1);
+                try {
+                    if (normalizePlural && !stemming && token.endsWith("s") && pluralPositions != null && !pluralPositions.isEmpty() && Optional.ofNullable(pluralPositions.locate(tokenOffsets)).orElse(Range.between(0, 0)).isOverlappedBy(tokenOffsets))
+                        token = token.substring(0, token.length() - 1);
+                } catch (Exception e) {
+                    System.out.println("normalizePlural: " + normalizePlural);
+                    System.out.println("stemming: " + stemming);
+                    System.out.println("Token: " + token);
+                    System.out.println("PluralPositions: " + pluralPositions);
+                    System.out.println("TokenOffsets: " + tokenOffsets);
+                    System.out.println("pluralPositions.locate(tokenOffsets): " + pluralPositions.locate(tokenOffsets));
+                    e.printStackTrace();
+                }
                 sb.append(token);
                 int newStartOffset = sb.length() - token.length();
                 int newEndOffset = sb.length();

From 5f6a218044dd222cc9ce67595792bca9d1db643a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 3 Jun 2021 15:37:57 +0200
Subject: [PATCH 054/269] Created code to fetch the hashes of existing XMI
 documents in the XMI table.

However, the code is currently in the wrong place. It must go the XML reader. We need to compute the hash directly after parsing the document text from XML so we can then compare it to the hashes in the database. When we would use the document text in the XMI reader this would always be the same as the database hash because we have read the document from the very same database table.
So, next up is code movement to the XML reader.
---
 jcore-types/pom.xml                           |   3 +
 .../jcore/types/jcore-casflow-types.xml       |  28 +++++
 .../flowcontroller/AnnotationDefinedFlow.java |  39 ++++++
 .../AnnotationDefinedFlowController.java      |  23 ++++
 .../xmi/flowcontroller/FixedInnerFlow.java    |  40 ++++++
 .../HashComparisonFlowController.java         | 117 ++++++++++++++++++
 .../HashComparisonOuterFlow.java              |  72 +++++++++++
 .../flowcontroller/FlowControllerTest.java    | 101 +++++++++++++++
 .../src/test/resources/logback-test.xml       |   1 +
 .../jcore/consumer/xmi/XMIDBWriter.java       |   2 +
 .../jcore/consumer/xmi/XmiDataInserter.java   |  17 +--
 11 files changed, 436 insertions(+), 7 deletions(-)
 create mode 100644 jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
 create mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java
 create mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java
 create mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java
 create mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java
 create mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java
 create mode 100644 jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java

diff --git a/jcore-types/pom.xml b/jcore-types/pom.xml
index e9571839f..99b9f0134 100644
--- a/jcore-types/pom.xml
+++ b/jcore-types/pom.xml
@@ -36,6 +36,9 @@
                                 <typeSystemInclude>
                                     src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
                                 </typeSystemInclude>
+								<typeSystemInclude>
+									src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
+								</typeSystemInclude>
 							</typeSystemIncludes>
 
 							<!-- OPTIONAL -->
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
new file mode 100644
index 000000000..6d3e20b4c
--- /dev/null
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
@@ -0,0 +1,28 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <name>JCoRe CAS Flow Types</name>
+    <description>This is a type system to facilitate the routing of CASes through AggregateAnalysisEngines via
+        FlowControllers. The types herein serve to indicate which components should be visited for the CAS
+        carrying annotations of this type.
+    </description>
+    <version>2.6.0-SNAPSHOT</version>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+    <types>
+        <typeDescription>
+            <name>de.julielab.jcore.types.casflow.ToVisit</name>
+            <description>Contains a list of delegate analysis engine names that the CAS, having this annotation, should
+                visit. Other components will be skipped. The names must the delegate keys specified in the aggregate
+                descriptor.
+            </description>
+            <supertypeName>uima.tcas.Annotation</supertypeName>
+            <features>
+                <featureDescription>
+                    <name>delegateKeys</name>
+                    <description>The keys of the delegates to visit. The keys are the names given to the delegate analysis engines in the aggregate.</description>
+                    <rangeTypeName>uima.cas.StringArray</rangeTypeName>
+                    <elementType>uima.cas.String</elementType>
+                </featureDescription>
+            </features>
+        </typeDescription>
+    </types>
+</typeSystemDescription>
\ No newline at end of file
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java
new file mode 100644
index 000000000..c48c75193
--- /dev/null
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java
@@ -0,0 +1,39 @@
+package de.julielab.jcore.reader.xmi.flowcontroller;
+
+import de.julielab.jcore.types.casflow.ToVisit;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.analysis_engine.metadata.FixedFlow;
+import org.apache.uima.analysis_engine.metadata.FlowConstraints;
+import org.apache.uima.flow.JCasFlow_ImplBase;
+import org.apache.uima.flow.SimpleStep;
+import org.apache.uima.flow.Step;
+
+/**
+ * <p>Returns steps according an existing {@link ToVisit} annotation of the CAS or, if not present, the default aggregate flow.</p>
+ */
+public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
+    private String[] toVisitKeys;
+    private String[] fixedFlow;
+    private int currentPos;
+
+    public AnnotationDefinedFlow(ToVisit toVisit, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
+        if (!(flowConstraints instanceof FixedFlow))
+            throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the FixedFlow to determine the default processing order. However, the flow constraints are of type " + flowConstraints.getClass().getCanonicalName()));
+        this.fixedFlow = toVisit != null ? ((FixedFlow) flowConstraints).getFixedFlow() : null;
+        this.toVisitKeys = toVisit.getDelegateKeys().toArray();
+        this.currentPos = 0;
+    }
+
+    /**
+     * <p>Routes the CAS to the next component defined by the CAS'es {@link ToVisit} annotation or,
+     * if <tt>ToVisit</tt> was not found, to the next component as defined by the default fixed flow.</p>
+     *
+     * @return The next component to visit or the next default flow component.
+     */
+    @Override
+    public Step next() {
+        String nextAEKey = toVisitKeys != null ? toVisitKeys[currentPos] : fixedFlow[currentPos];
+        ++currentPos;
+        return new SimpleStep(nextAEKey);
+    }
+}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java
new file mode 100644
index 000000000..359d8eb7d
--- /dev/null
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java
@@ -0,0 +1,23 @@
+package de.julielab.jcore.reader.xmi.flowcontroller;
+
+import de.julielab.jcore.types.casflow.ToVisit;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.flow.Flow;
+import org.apache.uima.flow.JCasFlowController_ImplBase;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * <p>Routes CASes through an aggregate analysis engine according to the {@link ToVisit} annotation present in the CAS.</p>
+ * <p>If there is not <tt>ToVisit</tt> annotation, the default (fixed) flow will be used. Thus, the fixed flow constraint
+ * must be set on the aggregate engine.</p>
+ */
+public class AnnotationDefinedFlowController extends JCasFlowController_ImplBase {
+    @Override
+    public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
+        boolean exists = JCasUtil.exists(jCas, ToVisit.class);
+        ToVisit toVisit = exists ? JCasUtil.selectSingle(jCas, ToVisit.class) : null;
+        // When toVisit is null, the default, fixed flow is used.
+        return new AnnotationDefinedFlow(toVisit, getContext().getAggregateMetadata().getFlowConstraints());
+    }
+}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java
new file mode 100644
index 000000000..21d84a60d
--- /dev/null
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java
@@ -0,0 +1,40 @@
+package de.julielab.jcore.reader.xmi.flowcontroller;
+
+import org.apache.uima.flow.FinalStep;
+import org.apache.uima.flow.JCasFlow_ImplBase;
+import org.apache.uima.flow.SimpleStep;
+import org.apache.uima.flow.Step;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <p>This flow is supposed to route the output CASes of the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplier} in
+ * a fixed, sequential manner through the aggregate engine. It just skips the first delegate - the multiplier itself - then continues with the rest.</p>
+ */
+public class FixedInnerFlow extends JCasFlow_ImplBase {
+    private final static Logger log = LoggerFactory.getLogger(FixedInnerFlow.class);
+    private int currentPosition;
+    private String[] fixedFlow;
+
+    public FixedInnerFlow(String[] fixedFlow) {
+        this.fixedFlow = fixedFlow;
+        this.currentPosition = 0;
+    }
+
+    public Step next() {
+        Step step = null;
+        for (; currentPosition < fixedFlow.length && step == null; currentPosition++) {
+            String aeKey = fixedFlow[currentPosition];
+            // The first analysis engine is the multiplier
+            if (currentPosition > 0) {
+                log.trace("Inner next AE is: " + aeKey);
+                step = new SimpleStep(aeKey);
+            }
+        }
+        if (step == null) {
+            // no appropriate AEs to call - end of flow
+            log.trace("Inner flow Complete.");
+        }
+        return step == null ? new FinalStep() : step;
+    }
+}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java
new file mode 100644
index 000000000..717566675
--- /dev/null
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java
@@ -0,0 +1,117 @@
+package de.julielab.jcore.reader.xmi.flowcontroller;
+
+import de.julielab.costosys.configuration.FieldConfig;
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
+import de.julielab.jcore.reader.db.DBReader;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.flow.Flow;
+import org.apache.uima.flow.FlowControllerContext;
+import org.apache.uima.flow.JCasFlowController_ImplBase;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+/**
+ * <p>Prereque</p>
+ * <p>Expects a jCas as being output by the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplierReader}, i.e. the CAS
+ * should contain a {@link de.julielab.jcore.types.casmultiplier.RowBatch} annotation. Then, Retrieves the sha256 hashes for
+ * the passed documents from the database.</p>
+ */
+@ResourceMetaData(name = "JCoRe Hash Comparison Flow Controller", description = "This flow controller aims to skip processing for CASes that already exist in the database and haven't changed with regards to a newly read version. For this purpose, the sha256 hash of the CAS document text is compared to the the existing hash in the database for the same document ID. If the hashes match, the text is the same and, thus, the annotations will be the same.")
+public class HashComparisonFlowController extends JCasFlowController_ImplBase {
+    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
+    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
+    private final static Logger log = LoggerFactory.getLogger(HashComparisonFlowController.class);
+    @ConfigurationParameter(name = DBReader.PARAM_COSTOSYS_CONFIG_NAME, description = "Path to the CoStoSys configuration XML file that specifies the database this pipeline writes to, i.e. the same file that the DB XMI Writer is using. If there is no DB Writer in use, this flow controller is not applicable.")
+    private String costosysConfig;
+    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, description = "Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
+    private String documentItemToHash;
+    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, description = "String parameter indicating the name of the " +
+            "table where the XMI data will be stored. The name must be schema qualified.")
+    private String docTableParamValue;
+
+    private DataBaseConnector dbc;
+
+    @Override
+    public void initialize(FlowControllerContext aContext) throws ResourceInitializationException {
+        this.costosysConfig = (String) aContext.getConfigParameterValue(DBReader.PARAM_COSTOSYS_CONFIG_NAME);
+        this.documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
+        try {
+            dbc = new DataBaseConnector(this.costosysConfig);
+        } catch (FileNotFoundException e) {
+            log.error("Could not create the CoStoSys DatabaseConnector:", e);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
+        RowBatch rowBatch;
+        try {
+            rowBatch = JCasUtil.selectSingle(jCas, RowBatch.class);
+        } catch (IllegalArgumentException e) {
+            log.error("Could not select the RowBatch annotation from the JCas:", e);
+            throw new AnalysisEngineProcessException(e);
+        }
+        Map<String, String> id2hash = fetchCurrentHashesFromDatabase(rowBatch);
+        return new HashComparisonOuterFlow(id2hash, documentItemToHash, getContext().getAggregateMetadata().getFlowConstraints());
+    }
+
+    /**
+     * <p>Fetches the hashes of the currently stored documents in the database.</p>
+     * @param rowBatch The annotation specifying which documents should be fetched by the multiplier and then be processed by the aggregate.
+     * @return A map from a string representation of the RowBatches document IDs to the hashes for the respective IDs.
+     * @throws AnalysisEngineProcessException If the SQL request fails.
+     */
+    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
+        String dataTable = dbc.getNextDataTable(rowBatch.getTableName());
+        String hashColumn = documentItemToHash + "_sha256";
+        // Extract the document IDs in this RowBatch. The IDs could be composite keys.
+        List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
+        Iterator<FeatureStructure> documentIDsIt = rowBatch.getIdentifiers().iterator();
+        while (documentIDsIt.hasNext()) {
+            StringArray pkArray = (StringArray) documentIDsIt.next();
+            documentIds.add(pkArray.toStringArray());
+        }
+        Map<String, String> id2hash = new HashMap<>(documentIds.size());
+        // This is the map we want to fill that lets us look up the hash of the document text by document ID.
+        String sql = null;
+        // Query the database for the document IDs in the current RowBatch and retrieve hashes.
+        try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+            FieldConfig activeTableFieldConfiguration = dbc.getActiveTableFieldConfiguration();
+            String idQuery = documentIds.stream()
+                    .map(key -> Arrays.stream(key).map(part -> "%s='" + part + '"').toArray(String[]::new))
+                    .map(activeTableFieldConfiguration::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
+                    .collect(Collectors.joining(" OR "));
+            sql = String.format("SELECT %s,%s FROM %s WHERE %s", activeTableFieldConfiguration.getPrimaryKeyString(), hashColumn, dataTable, idQuery);
+            ResultSet rs = conn.createStatement().executeQuery(sql);
+            while (rs.next()) {
+                StringBuilder pkSb = new StringBuilder();
+                for (int i = 0; i < activeTableFieldConfiguration.getPrimaryKey().length; i++)
+                    pkSb.append(rs.getString(i)).append(',');
+                // Remove training comma
+                pkSb.deleteCharAt(pkSb.length());
+                String hash = rs.getString(activeTableFieldConfiguration.getPrimaryKey().length);
+                id2hash.put(pkSb.toString(), hash);
+            }
+        } catch (SQLException e) {
+            log.error("Could not retrieve hashes from the database. SQL query was {}:", sql, e);
+            throw new AnalysisEngineProcessException(e);
+        }
+        return id2hash;
+    }
+}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java
new file mode 100644
index 000000000..09178fa29
--- /dev/null
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java
@@ -0,0 +1,72 @@
+package de.julielab.jcore.reader.xmi.flowcontroller;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.analysis_engine.metadata.FixedFlow;
+import org.apache.uima.analysis_engine.metadata.FlowConstraints;
+import org.apache.uima.flow.*;
+import org.apache.uima.jcas.JCas;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+
+/**
+ * <p>Note: This flow can only be used in an aggregate analysis engine where the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplier} is the first component.</p>
+ * <p>This flow is created by the {@link HashComparisonFlowController} and routes the CAS that was filled by the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplierReader}.
+ * This CAS contains an instance of {@link de.julielab.jcore.types.casmultiplier.RowBatch} which contains the information which documents should be read
+ * from which database table.</p>
+ * <p>Within this flow, the reader CAS is passed to the multiplier, the first component. For CASes created by the multiplier,
+ * the method {@link #newCasProduced(JCas, String)} is called for which a new flow concerning the processing order of the
+ * multiplier-created CASes within the aggregate is determined.</p>
+ */
+public class HashComparisonOuterFlow extends JCasFlow_ImplBase {
+    private final static Logger log = LoggerFactory.getLogger(HashComparisonOuterFlow.class);
+    private String[] fixedFlow;
+    private int currentPosition;
+    private Map<String, String> id2hash;
+    private String documentItemToHash;
+
+    public HashComparisonOuterFlow(Map<String, String> id2hash, String documentItemToHash, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
+        this.id2hash = id2hash;
+        this.documentItemToHash = documentItemToHash;
+        if (!(flowConstraints instanceof FixedFlow)) {
+            throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the original FixedFlow to know the order of the delegate engines but the given flow is of type " + flowConstraints.getClass()));
+        }
+        FixedFlow fixedFlow = (FixedFlow) flowConstraints;
+        this.fixedFlow = fixedFlow.getFixedFlow();
+        this.currentPosition = 0;
+    }
+
+    @Override
+    protected Flow newCasProduced(JCas newCas, String producedBy) throws AnalysisEngineProcessException {
+        String newHash = getHash(newCas);
+        return new FixedInnerFlow(fixedFlow);
+    }
+
+    private String getHash(JCas newCas) {
+        final String documentText = newCas.getDocumentText();
+        final byte[] sha = DigestUtils.sha256(documentText.getBytes());
+        return Base64.encodeBase64String(sha);
+    }
+
+    public Step next() {
+        Step step = null;
+        for (; currentPosition < fixedFlow.length && step == null; currentPosition++) {
+            String aeKey = fixedFlow[currentPosition];
+
+            // The outer flow only passes the CAS to the CAS multiplier. The multiplier creates more CASes which
+            // are then passed to newCasProduced() and are then routed by the InnerFlow.
+            if (currentPosition == 0) {
+                log.trace("Outer next AE is: " + aeKey);
+                step = new SimpleStep(aeKey);
+            }
+        }
+        if (step == null) {
+            // no appropriate AEs to call - end of flow
+            log.trace("Outer flow Complete.");
+        }
+        return step == null ? new FinalStep() : step;
+    }
+}
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java
new file mode 100644
index 000000000..5c3d69e64
--- /dev/null
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java
@@ -0,0 +1,101 @@
+package de.julielab.jcore.reader.xmi.flowcontroller;
+
+import de.julielab.jcore.types.Header;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.FlowControllerFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.flow.FlowControllerDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class FlowControllerTest {
+    @Test
+    public void testFlowController() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types");
+        RowBatch rowBatch = new RowBatch(jCas);
+        for (int i = 0; i < 10; i++) {
+            StringArray id = new StringArray(jCas, 1);
+            id.set(0, String.valueOf(i));
+            rowBatch.setIdentifiers(JCoReTools.addToFSArray(rowBatch.getIdentifiers(), id));
+        }
+        rowBatch.addToIndexes();
+
+        FlowControllerDescription flowControllerDescription = FlowControllerFactory.createFlowControllerDescription(HashComparisonFlowController.class);
+        AnalysisEngineDescription multiplierDesc = AnalysisEngineFactory.createEngineDescription(TestMultiplier.class);
+        AnalysisEngineDescription testAeDesc1 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 1");
+        AnalysisEngineDescription testAeDesc2 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 2");
+        AnalysisEngineDescription aaeWithFlowController = AnalysisEngineFactory.createEngineDescription(flowControllerDescription, multiplierDesc, testAeDesc1, testAeDesc2);
+        AnalysisEngine aae = AnalysisEngineFactory.createEngine(aaeWithFlowController);
+
+        aae.process(jCas);
+    }
+
+    public static class TestAE extends JCasAnnotator_ImplBase {
+        private final static Logger log = LoggerFactory.getLogger(TestAE.class);
+
+        @ConfigurationParameter(name = "name")
+        private String name;
+
+        @Override
+        public void initialize(UimaContext context) throws ResourceInitializationException {
+            name = (String) context.getConfigParameterValue("name");
+        }
+
+        @Override
+        public void process(JCas jCas) throws AnalysisEngineProcessException {
+            log.debug("Running AE: {}", name);
+            log.debug("JCas text: " + jCas.getDocumentText());
+        }
+    }
+
+    public static class TestMultiplier extends JCasMultiplier_ImplBase {
+        private List<String> idsToRead = new ArrayList<>();
+        private int currentIndex;
+        @Override
+        public void process(JCas jCas) throws AnalysisEngineProcessException {
+            RowBatch rowbatch = JCasUtil.selectSingle(jCas, RowBatch.class);
+            idsToRead.clear();
+            currentIndex = 0;
+            for (int i = 0; i < rowbatch.getIdentifiers().size() && rowbatch.getIdentifiers(i) != null; i++) {
+                // In this test, the document IDs consist only of a single element
+                idsToRead.add(rowbatch.getIdentifiers(i).get(0));
+            }
+        }
+
+        @Override
+        public boolean hasNext() throws AnalysisEngineProcessException {
+            return currentIndex < idsToRead.size();
+        }
+
+        @Override
+        public AbstractCas next() throws AnalysisEngineProcessException {
+            JCas emptyJCas = getEmptyJCas();
+            Header header = new Header(emptyJCas);
+            String docId = idsToRead.get(currentIndex);
+            header.setDocId(docId);
+            header.addToIndexes();
+            emptyJCas.setDocumentText("ID: " + docId);
+            ++currentIndex;
+            return emptyJCas;
+        }
+    }
+
+}
diff --git a/jcore-xmi-db-reader/src/test/resources/logback-test.xml b/jcore-xmi-db-reader/src/test/resources/logback-test.xml
index 37c8a721c..b8337ca9b 100644
--- a/jcore-xmi-db-reader/src/test/resources/logback-test.xml
+++ b/jcore-xmi-db-reader/src/test/resources/logback-test.xml
@@ -10,6 +10,7 @@
     </appender>
     <logger name="de.julielab.jcore.reader.xmi" level="INFO"/>
     <logger name="de.julielab.xml.binary.BinaryJeDISNodeDecoder" level="DEBUG"/>
+    <logger name="de.julielab.jcore.reader.xmi.flowcontroller" level="TRACE"/>
     <root level="INFO">
         <appender-ref ref="STDOUT" />
     </root>
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index 380c0b232..004c085d9 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -115,6 +115,7 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     public static final String PARAM_FEATURES_TO_MAP_DRYRUN = "BinaryFeaturesToMapDryRun";
     public static final String PARAM_BINARY_FEATURES_BLACKLIST = "BinaryFeaturesBlacklist";
     public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
+    public static final String PARAM_SKIP_MATCHING_HASH = "SkipMatchingHash";
     private static final Logger log = LoggerFactory.getLogger(XMIDBWriter.class);
     // The mappings are keyed by the costosys.xml path and the table schema, see 'mappingCacheKey'.
     // The idea is to save costly database connections by sharing updating mapping across threads.
@@ -249,6 +250,7 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     private String[] binaryFeaturesBlacklistParameter;
     @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "Possible values: document_text. If this parameter is set to a valid value, the SHA256 hash for the given value will be calculated, base64 encoded and added to each document as a new column in the document table. The column will be named after the parameter value, suffixed by '_sha256'.")
     private String documentItemToHash;
+    @ConfigurationParameter(name =PARAM_SKIP_MATCHING_HASH, mandatory = false, description = "Only in effect, if: " + PARAM_ADD_SHA_HASH + " is active; if the target XMI table has also been read from by the XMI DB reader and the reader has been configured to read the document's current hash value. Then, compares the hash value retrieved and relied by the XMI DB reader to the  ")
     private Map<DocumentId, String> shaMap;
     private String mappingCacheKey;
     private DocumentReleaseCheckpoint docReleaseCheckpoint;
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
index 31fb146ef..080ffd613 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
@@ -57,19 +57,20 @@ public XmiDataInserter(Set<String> annotationModuleColumnNames,
      * update. It will just be inserted otherwise (throwing an error if there
      * will be a primary key constraint violation, i.e. duplicates).
      *
-     * @param serializedCASes
+     * @param annotationModules
      * @param storeBaseDocument
      * @param deleteObsolete
      * @param shaMap
      * @throws XmiDataInsertionException
      * @throws AnalysisEngineProcessException
      */
-    public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> serializedCASes, String subsetTableName, Boolean storeBaseDocument, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
+    public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> annotationModules, String subsetTableName, Boolean storeBaseDocument, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
         if (log.isTraceEnabled()) {
-            log.trace("Sending XMI data for {} tables to the database", serializedCASes.size());
-            log.trace("Sending {} XMI data items", serializedCASes.size());
+            log.trace("Sending XMI data for {} tables to the database", annotationModules.size());
+            log.trace("Sending {} XMI data items", annotationModules.size());
         }
-        final Map<DocumentId, List<XmiData>> dataByDoc = serializedCASes.stream().collect(Collectors.groupingBy(XmiData::getDocId));
+        final Map<DocumentId, List<XmiData>> dataByDoc = annotationModules.stream().collect(Collectors.groupingBy(XmiData::getDocId));
+        // Collect all document IDs we want to add something for into the database. This can be annotations or the hash.
         final Set<DocumentId> documentIdsWithValues = shaMap != null ? Sets.union(dataByDoc.keySet(), shaMap.keySet()) : dataByDoc.keySet();
         class RowIterator implements Iterator<Map<String, Object>> {
 
@@ -163,15 +164,17 @@ public void remove() {
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
             conn.setAutoCommit(false);
 
+            // This is the private in-line defined class from above. All values are already contained in the class
+            // definition.
             RowIterator iterator = new RowIterator();
             try {
                 if (updateMode) {
                     log.debug("Updating {} XMI CAS data in database table '{}'.",
-                            serializedCASes.size(), xmiTableName);
+                            annotationModules.size(), xmiTableName);
                     dbc.updateFromRowIterator(iterator, xmiTableName, false, storeBaseDocument, schemaDocument);
                 } else {
                     log.debug("Inserting {} XMI CAS data into database table '{}'.",
-                            serializedCASes.size(), xmiTableName);
+                            annotationModules.size(), xmiTableName);
                     dbc.importFromRowIterator(iterator, xmiTableName, false, schemaDocument);
                 }
             } catch (Exception e) {

From 8d0b7a4dd57971e521ce9375a215abb4bd4d33bd Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 4 Jun 2021 10:19:52 +0200
Subject: [PATCH 055/269] Adding DB hash equality check to the
 `XMLDBMultiplier`, the `AnnotationDefinedFlowController` and flow controller
 support for the `DescriptorCreator`.

All those changes head towards the possibility to skip most of the pipeline in case the document is already present in the database without changes (as determined by the hash value).
---
 jcore-descriptor-creator/component.meta       |  20 +++
 .../jcore/misc/DescriptorCreator.java         |  49 +++---
 jcore-flow-controllers/pom.xml                |  71 +++++++++
 .../AnnotationDefinedFlow.java                |  64 ++++++++
 .../AnnotationDefinedFlowController.java      |   4 +-
 .../annotationdefined}/FixedInnerFlow.java    |   2 +-
 .../HashComparisonFlowController.java         | 117 ++++++++++++++
 .../HashComparisonOuterFlow.java              |  72 +++++++++
 ...core-annotation-defined-flowcontroller.xml |  19 +++
 .../AnnotationDefinedFlowControllerTest.java  | 143 ++++++++++++++++++
 .../jcore/types/jcore-casflow-types.xml       |   2 +-
 .../flowcontroller/AnnotationDefinedFlow.java |  39 -----
 .../HashComparisonFlowController.java         | 117 --------------
 .../HashComparisonOuterFlow.java              |  72 ---------
 .../flowcontroller/FlowControllerTest.java    | 101 -------------
 .../jcore/reader/xml/XMLDBMultiplier.java     | 125 ++++++++++++++-
 pom.xml                                       |  50 +++---
 scripts/createMetaDescriptors.py              |  13 +-
 18 files changed, 695 insertions(+), 385 deletions(-)
 create mode 100644 jcore-descriptor-creator/component.meta
 create mode 100644 jcore-flow-controllers/pom.xml
 create mode 100644 jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
 rename {jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller => jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined}/AnnotationDefinedFlowController.java (62%)
 rename {jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller => jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined}/FixedInnerFlow.java (96%)
 create mode 100644 jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonFlowController.java
 create mode 100644 jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonOuterFlow.java
 create mode 100644 jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
 create mode 100644 jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java
 delete mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java
 delete mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java
 delete mode 100644 jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java
 delete mode 100644 jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java

diff --git a/jcore-descriptor-creator/component.meta b/jcore-descriptor-creator/component.meta
new file mode 100644
index 000000000..6eae55fd0
--- /dev/null
+++ b/jcore-descriptor-creator/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "reader"
+    ],
+    "description": "A simple project for the automatic creation of descriptors for UIMAfit-enabled components.",
+    "descriptors": [
+        {
+            "category": "reader",
+            "location": "de.julielab.jcore.reader.testreader.desc.de.julielab.jcore.reader.testreader.TestReader"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-descriptor-creator",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe Descriptor Creator"
+}
diff --git a/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java b/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java
index 92c3178a1..69253935b 100644
--- a/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java
+++ b/jcore-descriptor-creator/src/main/java/de/julielab/jcore/misc/DescriptorCreator.java
@@ -1,17 +1,6 @@
 package de.julielab.jcore.misc;
 
-import static java.util.stream.Collectors.joining;
-import static java.util.stream.Collectors.toList;
-
-import java.io.*;
-import java.lang.reflect.Modifier;
-import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.function.Predicate;
-import java.util.stream.Stream;
-
+import de.julielab.java.utilities.FileUtilities;
 import io.github.classgraph.ClassGraph;
 import io.github.classgraph.ScanResult;
 import org.apache.commons.lang.StringUtils;
@@ -21,14 +10,26 @@
 import org.apache.uima.collection.CollectionReaderDescription;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.FlowControllerFactory;
 import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
+import org.apache.uima.flow.FlowController;
+import org.apache.uima.flow.FlowControllerDescription;
 import org.apache.uima.resource.ResourceCreationSpecifier;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
 
-import de.julielab.java.utilities.FileUtilities;
+import java.io.*;
+import java.lang.reflect.Modifier;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Stream;
+
+import static java.util.stream.Collectors.joining;
+import static java.util.stream.Collectors.toList;
 
 public class DescriptorCreator {
 
@@ -56,19 +57,27 @@ public void run(String basePackage) throws Exception {
     public void run(String basePackage, String outputRoot) throws Exception {
         List<Class<? extends CollectionReader>> readers = findSubclasses(CollectionReader.class.getCanonicalName());
         List<Class<? extends AnalysisComponent>> aes = findSubclasses(AnalysisComponent.class.getCanonicalName());
+        List<Class<? extends FlowController>> flowControllers = findSubclasses(FlowController.class.getCanonicalName());
 
-        readers = readers.stream().filter(c -> c.getPackage().getName().startsWith(basePackage) && (c.getPackage().getName().endsWith("reader") || c.getName().toLowerCase().endsWith("reader")))
+        // Now filter all found classes for being in the target package and adhering to the naming conventions.
+        readers = readers.stream().filter(c -> c.getPackage().getName().startsWith(basePackage) && (c.getPackage().getName().contains("reader") || c.getName().toLowerCase().contains("reader")))
                 .collect(toList());
         // Since consumers and also multipliers can be or are AnalysisComponents, we may list all component categories here.
         // Also, remove abstract classes
         aes = aes.stream().filter(c -> !Modifier.isAbstract(c.getModifiers())).
                 filter(c -> c.getPackage().getName().startsWith(basePackage) &&
-                          (c.getPackage().getName().endsWith("ae") || c.getName().toLowerCase().endsWith("ae") || c.getName().toLowerCase().endsWith("annotator")
-                        || c.getPackage().getName().endsWith("consumer") || c.getName().toLowerCase().endsWith("consumer") || c.getName().toLowerCase().endsWith("writer")
-                        || c.getPackage().getName().endsWith("multiplier") || c.getName().toLowerCase().endsWith("multiplier"))
+                          (c.getPackage().getName().contains("ae") || c.getName().toLowerCase().contains("ae") || c.getName().toLowerCase().contains("annotator")
+                        || c.getPackage().getName().contains("consumer") || c.getName().toLowerCase().contains("consumer") || c.getName().toLowerCase().contains("writer")
+                        || c.getPackage().getName().contains("multiplier") || c.getName().toLowerCase().contains("multiplier"))
                 ).collect(toList());
 
-        if (readers.isEmpty() && aes.isEmpty()) {
+        flowControllers = flowControllers.stream().filter(c -> !Modifier.isAbstract((c.getModifiers()))).
+                filter(c -> c.getPackage().getName().startsWith(basePackage) &&
+                        (c.getPackage().getName().contains("flow") || c.getPackage().getName().toLowerCase().contains("flow")))
+                .collect(toList());
+
+
+        if (readers.isEmpty() && aes.isEmpty() && flowControllers.isEmpty()) {
             log.warn("No JCoRe UIMA component classes were found.");
         } else {
             Stream<String> typeDescNamesStream = Stream.of(TypeSystemDescriptionFactory.scanTypeDescriptors()).
@@ -88,6 +97,10 @@ public void run(String basePackage, String outputRoot) throws Exception {
                 AnalysisEngineDescription d = AnalysisEngineFactory.createEngineDescription(cls, tsd);
                 writeComponentDescriptor(outputRoot, cls, d, "analysis engine / consumer");
             }
+            for (Class<? extends FlowController> cls : flowControllers) {
+                FlowControllerDescription d = FlowControllerFactory.createFlowControllerDescription(cls);
+                writeComponentDescriptor(outputRoot, cls, d, "flow controller");
+            }
         }
     }
 
diff --git a/jcore-flow-controllers/pom.xml b/jcore-flow-controllers/pom.xml
new file mode 100644
index 000000000..a316b81ad
--- /dev/null
+++ b/jcore-flow-controllers/pom.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>jcore-base</artifactId>
+        <groupId>de.julielab</groupId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>jcore-flow-controllers</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.jetbrains</groupId>
+            <artifactId>annotations</artifactId>
+            <version>RELEASE</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+    </dependencies>
+
+    <name>JCoRe Flow Controllers</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-flow-controllers</url>
+    <description>Flow controllers can be used to control the route a (J)CAS takes through an aggregate analysis engine.
+        This project contains Flow Controllers developed at the JULIE Lab.
+    </description>
+    <licenses>
+        <license>
+            <name>BSD-2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
+</project>
\ No newline at end of file
diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
new file mode 100644
index 000000000..0243a7f36
--- /dev/null
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
@@ -0,0 +1,64 @@
+package de.julielab.jcore.flow.annotationdefined;
+
+import de.julielab.jcore.types.casflow.ToVisit;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.analysis_engine.metadata.FixedFlow;
+import org.apache.uima.analysis_engine.metadata.FlowConstraints;
+import org.apache.uima.flow.FinalStep;
+import org.apache.uima.flow.JCasFlow_ImplBase;
+import org.apache.uima.flow.SimpleStep;
+import org.apache.uima.flow.Step;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * <p>Returns steps according an existing {@link ToVisit} annotation of the CAS or, if not present, the default aggregate flow.</p>
+ * <p>This is, for example, used by the <tt>XMLDBMultiplier</tt> to let CASes skip large parts of the pipeline when
+ * the currently read document already exists in the database.</p>
+ */
+public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
+    private String[] toVisitKeys;
+    private String[] fixedFlow;
+    private int currentPos;
+
+    /**
+     * <p>Creates a flow that follows to entries in {@link ToVisit#getDelegateKeys()} of <tt>toVisit</tt> or, if
+     * <tt>toVisit</tt> is null, falls back to the default fixed flow.</p>
+     * <p>If <tt>toVisit</tt> is not null but the <tt>delegateKeys</tt> are null or empty, no component in the aggregate using this flow will process the respective CAS.</p>
+     * @param toVisit An annotation containing the keys of the delegate AEs to visit. May be null which case the default fixed flow will be used.
+     * @param flowConstraints The default fixed flow of the aggregate analysis engine.
+     * @throws AnalysisEngineProcessException If <tt>flowConstraints</tt> is not a fixed flow.
+     */
+    public AnnotationDefinedFlow(@Nullable ToVisit toVisit, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
+        if (!(flowConstraints instanceof FixedFlow))
+            throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the FixedFlow to determine the default processing order. However, the flow constraints are of type " + flowConstraints.getClass().getCanonicalName()));
+        this.fixedFlow = ((FixedFlow) flowConstraints).getFixedFlow();
+        // We have the following cases:
+        // 1. There are given keys to visit, use them.
+        // 2. There are no keys given but the ToVisit annotation is not null, skip all components.
+        // 3. There is not ToVisit annotation at all, use the default fixed flow.
+        if(toVisit != null && toVisit.getDelegateKeys() != null)
+            toVisitKeys = toVisit.getDelegateKeys().toArray();
+        else if (toVisit != null)
+            toVisitKeys = new String[0];
+        else
+            toVisitKeys = null;
+        this.currentPos = 0;
+    }
+
+    /**
+     * <p>Routes the CAS to the next component defined by the CAS'es {@link ToVisit} annotation or,
+     * if <tt>ToVisit</tt> was not found, to the next component as defined by the default fixed flow.</p>
+     *
+     * @return The next component to visit or the next default flow component.
+     */
+    @Override
+    public Step next() {
+        // If toVisitKeys was not given, we just use the fixedFlow.
+        if ((toVisitKeys == null && currentPos < fixedFlow.length) || (toVisitKeys != null && currentPos < toVisitKeys.length)) {
+            String nextAEKey = toVisitKeys != null ? toVisitKeys[currentPos] : fixedFlow[currentPos];
+            ++currentPos;
+            return new SimpleStep(nextAEKey);
+        }
+        return new FinalStep();
+    }
+}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
similarity index 62%
rename from jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java
rename to jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
index 359d8eb7d..77a803e23 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlowController.java
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
@@ -1,7 +1,8 @@
-package de.julielab.jcore.reader.xmi.flowcontroller;
+package de.julielab.jcore.flow.annotationdefined;
 
 import de.julielab.jcore.types.casflow.ToVisit;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.flow.Flow;
 import org.apache.uima.flow.JCasFlowController_ImplBase;
@@ -12,6 +13,7 @@
  * <p>If there is not <tt>ToVisit</tt> annotation, the default (fixed) flow will be used. Thus, the fixed flow constraint
  * must be set on the aggregate engine.</p>
  */
+@ResourceMetaData(name = "JCoRe Annotation Defined Flow Controller", description = "This flow controller relies on an annotation of type ToVisit to be present in the CAS. If there is no such annotation, the default fixed flow of the aggregate engine using this flow controller is used. Otherwise, die names of the components to pass the CAS to are taken from the annotation. If the annotation exists but defines to components to be visited by the CAS, no components are visited at all.", vendor = "JULIE Lab, Germany", version = "placeholder")
 public class AnnotationDefinedFlowController extends JCasFlowController_ImplBase {
     @Override
     public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/FixedInnerFlow.java
similarity index 96%
rename from jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java
rename to jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/FixedInnerFlow.java
index 21d84a60d..eeae85f0a 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/FixedInnerFlow.java
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/FixedInnerFlow.java
@@ -1,4 +1,4 @@
-package de.julielab.jcore.reader.xmi.flowcontroller;
+package de.julielab.jcore.flow.annotationdefined;
 
 import org.apache.uima.flow.FinalStep;
 import org.apache.uima.flow.JCasFlow_ImplBase;
diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonFlowController.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonFlowController.java
new file mode 100644
index 000000000..bdbf88c9c
--- /dev/null
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonFlowController.java
@@ -0,0 +1,117 @@
+//package de.julielab.jcore.flow.annotationdefined;
+//
+//import de.julielab.costosys.configuration.FieldConfig;
+//import de.julielab.costosys.dbconnection.CoStoSysConnection;
+//import de.julielab.costosys.dbconnection.DataBaseConnector;
+//import de.julielab.jcore.reader.db.DBReader;
+//import de.julielab.jcore.types.casmultiplier.RowBatch;
+//import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+//import org.apache.uima.cas.FeatureStructure;
+//import org.apache.uima.fit.descriptor.ConfigurationParameter;
+//import org.apache.uima.fit.descriptor.ResourceMetaData;
+//import org.apache.uima.fit.util.JCasUtil;
+//import org.apache.uima.flow.Flow;
+//import org.apache.uima.flow.FlowControllerContext;
+//import org.apache.uima.flow.JCasFlowController_ImplBase;
+//import org.apache.uima.jcas.JCas;
+//import org.apache.uima.jcas.cas.StringArray;
+//import org.apache.uima.resource.ResourceInitializationException;
+//import org.slf4j.Logger;
+//import org.slf4j.LoggerFactory;
+//
+//import java.io.FileNotFoundException;
+//import java.sql.ResultSet;
+//import java.sql.SQLException;
+//import java.util.*;
+//import java.util.stream.Collectors;
+//
+///**
+// * <p>Prereque</p>
+// * <p>Expects a jCas as being output by the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplierReader}, i.e. the CAS
+// * should contain a {@link de.julielab.jcore.types.casmultiplier.RowBatch} annotation. Then, Retrieves the sha256 hashes for
+// * the passed documents from the database.</p>
+// */
+//@ResourceMetaData(name = "JCoRe Hash Comparison Flow Controller", description = "This flow controller aims to skip processing for CASes that already exist in the database and haven't changed with regards to a newly read version. For this purpose, the sha256 hash of the CAS document text is compared to the the existing hash in the database for the same document ID. If the hashes match, the text is the same and, thus, the annotations will be the same.")
+//public class HashComparisonFlowController extends JCasFlowController_ImplBase {
+//    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
+//    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
+//    private final static Logger log = LoggerFactory.getLogger(HashComparisonFlowController.class);
+//    @ConfigurationParameter(name = DBReader.PARAM_COSTOSYS_CONFIG_NAME, description = "Path to the CoStoSys configuration XML file that specifies the database this pipeline writes to, i.e. the same file that the DB XMI Writer is using. If there is no DB Writer in use, this flow controller is not applicable.")
+//    private String costosysConfig;
+//    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, description = "Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
+//    private String documentItemToHash;
+//    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, description = "String parameter indicating the name of the " +
+//            "table where the XMI data will be stored. The name must be schema qualified.")
+//    private String docTableParamValue;
+//
+//    private DataBaseConnector dbc;
+//
+//    @Override
+//    public void initialize(FlowControllerContext aContext) throws ResourceInitializationException {
+//        this.costosysConfig = (String) aContext.getConfigParameterValue(DBReader.PARAM_COSTOSYS_CONFIG_NAME);
+//        this.documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
+//        try {
+//            dbc = new DataBaseConnector(this.costosysConfig);
+//        } catch (FileNotFoundException e) {
+//            log.error("Could not create the CoStoSys DatabaseConnector:", e);
+//            throw new ResourceInitializationException(e);
+//        }
+//    }
+//
+//    @Override
+//    public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
+//        RowBatch rowBatch;
+//        try {
+//            rowBatch = JCasUtil.selectSingle(jCas, RowBatch.class);
+//        } catch (IllegalArgumentException e) {
+//            log.error("Could not select the RowBatch annotation from the JCas:", e);
+//            throw new AnalysisEngineProcessException(e);
+//        }
+//        Map<String, String> id2hash = fetchCurrentHashesFromDatabase(rowBatch);
+//        return new HashComparisonOuterFlow(id2hash, documentItemToHash, getContext().getAggregateMetadata().getFlowConstraints());
+//    }
+//
+//    /**
+//     * <p>Fetches the hashes of the currently stored documents in the database.</p>
+//     * @param rowBatch The annotation specifying which documents should be fetched by the multiplier and then be processed by the aggregate.
+//     * @return A map from a string representation of the RowBatches document IDs to the hashes for the respective IDs.
+//     * @throws AnalysisEngineProcessException If the SQL request fails.
+//     */
+//    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
+//        String dataTable = dbc.getNextDataTable(rowBatch.getTableName());
+//        String hashColumn = documentItemToHash + "_sha256";
+//        // Extract the document IDs in this RowBatch. The IDs could be composite keys.
+//        List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
+//        Iterator<FeatureStructure> documentIDsIt = rowBatch.getIdentifiers().iterator();
+//        while (documentIDsIt.hasNext()) {
+//            StringArray pkArray = (StringArray) documentIDsIt.next();
+//            documentIds.add(pkArray.toStringArray());
+//        }
+//        Map<String, String> id2hash = new HashMap<>(documentIds.size());
+//        // This is the map we want to fill that lets us look up the hash of the document text by document ID.
+//        String sql = null;
+//        // Query the database for the document IDs in the current RowBatch and retrieve hashes.
+//        try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+//            FieldConfig activeTableFieldConfiguration = dbc.getActiveTableFieldConfiguration();
+//            String idQuery = documentIds.stream()
+//                    .map(key -> Arrays.stream(key).map(part -> "%s='" + part + '"').toArray(String[]::new))
+//                    .map(activeTableFieldConfiguration::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
+//                    .collect(Collectors.joining(" OR "));
+//            sql = String.format("SELECT %s,%s FROM %s WHERE %s", activeTableFieldConfiguration.getPrimaryKeyString(), hashColumn, dataTable, idQuery);
+//            ResultSet rs = conn.createStatement().executeQuery(sql);
+//            while (rs.next()) {
+//                StringBuilder pkSb = new StringBuilder();
+//                for (int i = 0; i < activeTableFieldConfiguration.getPrimaryKey().length; i++)
+//                    pkSb.append(rs.getString(i)).append(',');
+//                // Remove training comma
+//                pkSb.deleteCharAt(pkSb.length());
+//                String hash = rs.getString(activeTableFieldConfiguration.getPrimaryKey().length);
+//                id2hash.put(pkSb.toString(), hash);
+//            }
+//        } catch (SQLException e) {
+//            log.error("Could not retrieve hashes from the database. SQL query was {}:", sql, e);
+//            throw new AnalysisEngineProcessException(e);
+//        }
+//        return id2hash;
+//    }
+//}
diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonOuterFlow.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonOuterFlow.java
new file mode 100644
index 000000000..896b52892
--- /dev/null
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/HashComparisonOuterFlow.java
@@ -0,0 +1,72 @@
+//package de.julielab.jcore.flow.annotationdefined;
+//
+//import org.apache.commons.codec.binary.Base64;
+//import org.apache.commons.codec.digest.DigestUtils;
+//import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+//import org.apache.uima.analysis_engine.metadata.FixedFlow;
+//import org.apache.uima.analysis_engine.metadata.FlowConstraints;
+//import org.apache.uima.flow.*;
+//import org.apache.uima.jcas.JCas;
+//import org.slf4j.Logger;
+//import org.slf4j.LoggerFactory;
+//
+//import java.util.Map;
+//
+///**
+// * <p>Note: This flow can only be used in an aggregate analysis engine where the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplier} is the first component.</p>
+// * <p>This flow is created by the {@link HashComparisonFlowController} and routes the CAS that was filled by the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplierReader}.
+// * This CAS contains an instance of {@link de.julielab.jcore.types.casmultiplier.RowBatch} which contains the information which documents should be read
+// * from which database table.</p>
+// * <p>Within this flow, the reader CAS is passed to the multiplier, the first component. For CASes created by the multiplier,
+// * the method {@link #newCasProduced(JCas, String)} is called for which a new flow concerning the processing order of the
+// * multiplier-created CASes within the aggregate is determined.</p>
+// */
+//public class HashComparisonOuterFlow extends JCasFlow_ImplBase {
+//    private final static Logger log = LoggerFactory.getLogger(HashComparisonOuterFlow.class);
+//    private String[] fixedFlow;
+//    private int currentPosition;
+//    private Map<String, String> id2hash;
+//    private String documentItemToHash;
+//
+//    public HashComparisonOuterFlow(Map<String, String> id2hash, String documentItemToHash, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
+//        this.id2hash = id2hash;
+//        this.documentItemToHash = documentItemToHash;
+//        if (!(flowConstraints instanceof FixedFlow)) {
+//            throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the original FixedFlow to know the order of the delegate engines but the given flow is of type " + flowConstraints.getClass()));
+//        }
+//        FixedFlow fixedFlow = (FixedFlow) flowConstraints;
+//        this.fixedFlow = fixedFlow.getFixedFlow();
+//        this.currentPosition = 0;
+//    }
+//
+//    @Override
+//    protected Flow newCasProduced(JCas newCas, String producedBy) throws AnalysisEngineProcessException {
+//        String newHash = getHash(newCas);
+//        return new FixedInnerFlow(fixedFlow);
+//    }
+//
+//    private String getHash(JCas newCas) {
+//        final String documentText = newCas.getDocumentText();
+//        final byte[] sha = DigestUtils.sha256(documentText.getBytes());
+//        return Base64.encodeBase64String(sha);
+//    }
+//
+//    public Step next() {
+//        Step step = null;
+//        for (; currentPosition < fixedFlow.length && step == null; currentPosition++) {
+//            String aeKey = fixedFlow[currentPosition];
+//
+//            // The outer flow only passes the CAS to the CAS multiplier. The multiplier creates more CASes which
+//            // are then passed to newCasProduced() and are then routed by the InnerFlow.
+//            if (currentPosition == 0) {
+//                log.trace("Outer next AE is: " + aeKey);
+//                step = new SimpleStep(aeKey);
+//            }
+//        }
+//        if (step == null) {
+//            // no appropriate AEs to call - end of flow
+//            log.trace("Outer flow Complete.");
+//        }
+//        return step == null ? new FinalStep() : step;
+//    }
+//}
diff --git a/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml b/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
new file mode 100644
index 000000000..2babe5cd5
--- /dev/null
+++ b/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<flowControllerDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <implementationName>de.julielab.jcore.flow.annotationdefined.AnnotationDefinedFlowController</implementationName>
+    <processingResourceMetaData>
+        <name>JCoRe Annotation Defined Flow Controller</name>
+        <description>This flow controller relies on an annotation of type ToVisit to be present in the CAS. If there is no such annotation, the default fixed flow of the aggregate engine using this flow controller is used. Otherwise, die names of the components to pass the CAS to are taken from the annotation. If the annotation exists but defines to components to be visited by the CAS, no components are visited at all.</description>
+        <version>placeholder</version>
+        <vendor>JULIE Lab, Germany</vendor>
+        <configurationParameters/>
+        <configurationParameterSettings/>
+        <capabilities/>
+        <operationalProperties>
+            <modifiesCas>false</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </processingResourceMetaData>
+</flowControllerDescription>
\ No newline at end of file
diff --git a/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java b/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java
new file mode 100644
index 000000000..f5daabd0f
--- /dev/null
+++ b/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java
@@ -0,0 +1,143 @@
+package de.julielab.jcore.flow.annotationdefined;
+
+import de.julielab.jcore.types.Token;
+import de.julielab.jcore.types.casflow.ToVisit;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.FlowControllerFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.flow.FlowControllerDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.Test;
+
+import static java.util.Arrays.asList;
+import static org.assertj.core.api.Assertions.assertThat;
+public class AnnotationDefinedFlowControllerTest {
+    @Test
+    public void testFlowControllerSingleKey() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-casflow-types");
+        ToVisit toVisit = new ToVisit(jCas);
+        StringArray toVisitKeys = new StringArray(jCas, 1);
+        toVisitKeys.set(0, "TestAE 2");
+        toVisit.setDelegateKeys(toVisitKeys);
+        toVisit.addToIndexes();
+
+        AnalysisEngine aae = createTestAAE();
+
+        aae.process(jCas);
+
+        FSIterator<Token> it = jCas.<Token>getAnnotationIndex(Token.type).iterator();
+        assertThat(it).toIterable().extracting(Token::getComponentId).containsExactly("TestAE 2");
+    }
+
+    @Test
+    public void testFlowControllerNoKey() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-casflow-types");
+        ToVisit toVisit = new ToVisit(jCas);
+        StringArray toVisitKeys = new StringArray(jCas, 0);
+        toVisit.setDelegateKeys(toVisitKeys);
+        toVisit.addToIndexes();
+
+        AnalysisEngine aae = createTestAAE();
+
+        aae.process(jCas);
+
+        FSIterator<Token> it = jCas.<Token>getAnnotationIndex(Token.type).iterator();
+        assertThat(it).isExhausted();
+    }
+
+    @Test
+    public void testFlowControllerNullKey() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-casflow-types");
+        ToVisit toVisit = new ToVisit(jCas);
+        toVisit.addToIndexes();
+
+        AnalysisEngine aae = createTestAAE();
+
+        aae.process(jCas);
+
+        FSIterator<Token> it = jCas.<Token>getAnnotationIndex(Token.type).iterator();
+        assertThat(it).isExhausted();
+    }
+
+    @Test
+    public void testFlowControllerNoVisitAnnotation() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-casflow-types");
+
+        AnalysisEngine aae = createTestAAE();
+
+        aae.process(jCas);
+
+        FSIterator<Token> it = jCas.<Token>getAnnotationIndex(Token.type).iterator();
+        assertThat(it).toIterable().extracting(Token::getComponentId).containsExactly("TestAE 1", "TestAE 2", "TestAE 3");
+    }
+
+    private AnalysisEngine createTestAAE() throws ResourceInitializationException {
+        FlowControllerDescription flowControllerDescription = FlowControllerFactory.createFlowControllerDescription(AnnotationDefinedFlowController.class);
+        AnalysisEngineDescription testAeDesc1 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 1");
+        AnalysisEngineDescription testAeDesc2 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 2");
+        AnalysisEngineDescription testAeDesc3 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 3");
+        AnalysisEngineDescription aaeWithFlowController = AnalysisEngineFactory.createEngineDescription(asList(testAeDesc1, testAeDesc2, testAeDesc3), asList("TestAE 1", "TestAE 2", "TestAE 3"), null, null,
+                flowControllerDescription);
+        AnalysisEngine aae = AnalysisEngineFactory.createEngine(aaeWithFlowController);
+        return aae;
+    }
+
+    public static class TestAE extends JCasAnnotator_ImplBase {
+        @ConfigurationParameter(name = "name")
+        private String name;
+
+        @Override
+        public void initialize(UimaContext context) {
+            name = (String) context.getConfigParameterValue("name");
+        }
+
+        @Override
+        public void process(JCas jCas) {
+            // Indicate that this jCas was processed by this component.
+            Token token = new Token(jCas);
+            token.setComponentId(name);
+            token.addToIndexes();
+        }
+    }
+
+//    public static class TestMultiplier extends JCasMultiplier_ImplBase {
+//        private List<String> idsToRead = new ArrayList<>();
+//        private int currentIndex;
+//        @Override
+//        public void process(JCas jCas) throws AnalysisEngineProcessException {
+//            RowBatch rowbatch = JCasUtil.selectSingle(jCas, RowBatch.class);
+//            idsToRead.clear();
+//            currentIndex = 0;
+//            for (int i = 0; i < rowbatch.getIdentifiers().size() && rowbatch.getIdentifiers(i) != null; i++) {
+//                // In this test, the document IDs consist only of a single element
+//                idsToRead.add(rowbatch.getIdentifiers(i).get(0));
+//            }
+//        }
+//
+//        @Override
+//        public boolean hasNext() throws AnalysisEngineProcessException {
+//            return currentIndex < idsToRead.size();
+//        }
+//
+//        @Override
+//        public AbstractCas next() throws AnalysisEngineProcessException {
+//            JCas emptyJCas = getEmptyJCas();
+//            Header header = new Header(emptyJCas);
+//            String docId = idsToRead.get(currentIndex);
+//            header.setDocId(docId);
+//            header.addToIndexes();
+//            emptyJCas.setDocumentText("ID: " + docId);
+//            ++currentIndex;
+//            return emptyJCas;
+//        }
+//    }
+
+}
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
index 6d3e20b4c..1f371bdf6 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
@@ -18,7 +18,7 @@
             <features>
                 <featureDescription>
                     <name>delegateKeys</name>
-                    <description>The keys of the delegates to visit. The keys are the names given to the delegate analysis engines in the aggregate.</description>
+                    <description>The keys of the delegates to visit. The keys are the names given to the delegate analysis engines in the aggregate. An empty or null array indicates that no component should be visited.</description>
                     <rangeTypeName>uima.cas.StringArray</rangeTypeName>
                     <elementType>uima.cas.String</elementType>
                 </featureDescription>
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java
deleted file mode 100644
index c48c75193..000000000
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/AnnotationDefinedFlow.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package de.julielab.jcore.reader.xmi.flowcontroller;
-
-import de.julielab.jcore.types.casflow.ToVisit;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.analysis_engine.metadata.FixedFlow;
-import org.apache.uima.analysis_engine.metadata.FlowConstraints;
-import org.apache.uima.flow.JCasFlow_ImplBase;
-import org.apache.uima.flow.SimpleStep;
-import org.apache.uima.flow.Step;
-
-/**
- * <p>Returns steps according an existing {@link ToVisit} annotation of the CAS or, if not present, the default aggregate flow.</p>
- */
-public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
-    private String[] toVisitKeys;
-    private String[] fixedFlow;
-    private int currentPos;
-
-    public AnnotationDefinedFlow(ToVisit toVisit, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
-        if (!(flowConstraints instanceof FixedFlow))
-            throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the FixedFlow to determine the default processing order. However, the flow constraints are of type " + flowConstraints.getClass().getCanonicalName()));
-        this.fixedFlow = toVisit != null ? ((FixedFlow) flowConstraints).getFixedFlow() : null;
-        this.toVisitKeys = toVisit.getDelegateKeys().toArray();
-        this.currentPos = 0;
-    }
-
-    /**
-     * <p>Routes the CAS to the next component defined by the CAS'es {@link ToVisit} annotation or,
-     * if <tt>ToVisit</tt> was not found, to the next component as defined by the default fixed flow.</p>
-     *
-     * @return The next component to visit or the next default flow component.
-     */
-    @Override
-    public Step next() {
-        String nextAEKey = toVisitKeys != null ? toVisitKeys[currentPos] : fixedFlow[currentPos];
-        ++currentPos;
-        return new SimpleStep(nextAEKey);
-    }
-}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java
deleted file mode 100644
index 717566675..000000000
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonFlowController.java
+++ /dev/null
@@ -1,117 +0,0 @@
-package de.julielab.jcore.reader.xmi.flowcontroller;
-
-import de.julielab.costosys.configuration.FieldConfig;
-import de.julielab.costosys.dbconnection.CoStoSysConnection;
-import de.julielab.costosys.dbconnection.DataBaseConnector;
-import de.julielab.jcore.reader.db.DBReader;
-import de.julielab.jcore.types.casmultiplier.RowBatch;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.FeatureStructure;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.util.JCasUtil;
-import org.apache.uima.flow.Flow;
-import org.apache.uima.flow.FlowControllerContext;
-import org.apache.uima.flow.JCasFlowController_ImplBase;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.StringArray;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.FileNotFoundException;
-import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.*;
-import java.util.stream.Collectors;
-
-/**
- * <p>Prereque</p>
- * <p>Expects a jCas as being output by the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplierReader}, i.e. the CAS
- * should contain a {@link de.julielab.jcore.types.casmultiplier.RowBatch} annotation. Then, Retrieves the sha256 hashes for
- * the passed documents from the database.</p>
- */
-@ResourceMetaData(name = "JCoRe Hash Comparison Flow Controller", description = "This flow controller aims to skip processing for CASes that already exist in the database and haven't changed with regards to a newly read version. For this purpose, the sha256 hash of the CAS document text is compared to the the existing hash in the database for the same document ID. If the hashes match, the text is the same and, thus, the annotations will be the same.")
-public class HashComparisonFlowController extends JCasFlowController_ImplBase {
-    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
-    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
-    private final static Logger log = LoggerFactory.getLogger(HashComparisonFlowController.class);
-    @ConfigurationParameter(name = DBReader.PARAM_COSTOSYS_CONFIG_NAME, description = "Path to the CoStoSys configuration XML file that specifies the database this pipeline writes to, i.e. the same file that the DB XMI Writer is using. If there is no DB Writer in use, this flow controller is not applicable.")
-    private String costosysConfig;
-    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, description = "Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
-    private String documentItemToHash;
-    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, description = "String parameter indicating the name of the " +
-            "table where the XMI data will be stored. The name must be schema qualified.")
-    private String docTableParamValue;
-
-    private DataBaseConnector dbc;
-
-    @Override
-    public void initialize(FlowControllerContext aContext) throws ResourceInitializationException {
-        this.costosysConfig = (String) aContext.getConfigParameterValue(DBReader.PARAM_COSTOSYS_CONFIG_NAME);
-        this.documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
-        try {
-            dbc = new DataBaseConnector(this.costosysConfig);
-        } catch (FileNotFoundException e) {
-            log.error("Could not create the CoStoSys DatabaseConnector:", e);
-            throw new ResourceInitializationException(e);
-        }
-    }
-
-    @Override
-    public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
-        RowBatch rowBatch;
-        try {
-            rowBatch = JCasUtil.selectSingle(jCas, RowBatch.class);
-        } catch (IllegalArgumentException e) {
-            log.error("Could not select the RowBatch annotation from the JCas:", e);
-            throw new AnalysisEngineProcessException(e);
-        }
-        Map<String, String> id2hash = fetchCurrentHashesFromDatabase(rowBatch);
-        return new HashComparisonOuterFlow(id2hash, documentItemToHash, getContext().getAggregateMetadata().getFlowConstraints());
-    }
-
-    /**
-     * <p>Fetches the hashes of the currently stored documents in the database.</p>
-     * @param rowBatch The annotation specifying which documents should be fetched by the multiplier and then be processed by the aggregate.
-     * @return A map from a string representation of the RowBatches document IDs to the hashes for the respective IDs.
-     * @throws AnalysisEngineProcessException If the SQL request fails.
-     */
-    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
-        String dataTable = dbc.getNextDataTable(rowBatch.getTableName());
-        String hashColumn = documentItemToHash + "_sha256";
-        // Extract the document IDs in this RowBatch. The IDs could be composite keys.
-        List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
-        Iterator<FeatureStructure> documentIDsIt = rowBatch.getIdentifiers().iterator();
-        while (documentIDsIt.hasNext()) {
-            StringArray pkArray = (StringArray) documentIDsIt.next();
-            documentIds.add(pkArray.toStringArray());
-        }
-        Map<String, String> id2hash = new HashMap<>(documentIds.size());
-        // This is the map we want to fill that lets us look up the hash of the document text by document ID.
-        String sql = null;
-        // Query the database for the document IDs in the current RowBatch and retrieve hashes.
-        try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
-            FieldConfig activeTableFieldConfiguration = dbc.getActiveTableFieldConfiguration();
-            String idQuery = documentIds.stream()
-                    .map(key -> Arrays.stream(key).map(part -> "%s='" + part + '"').toArray(String[]::new))
-                    .map(activeTableFieldConfiguration::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
-                    .collect(Collectors.joining(" OR "));
-            sql = String.format("SELECT %s,%s FROM %s WHERE %s", activeTableFieldConfiguration.getPrimaryKeyString(), hashColumn, dataTable, idQuery);
-            ResultSet rs = conn.createStatement().executeQuery(sql);
-            while (rs.next()) {
-                StringBuilder pkSb = new StringBuilder();
-                for (int i = 0; i < activeTableFieldConfiguration.getPrimaryKey().length; i++)
-                    pkSb.append(rs.getString(i)).append(',');
-                // Remove training comma
-                pkSb.deleteCharAt(pkSb.length());
-                String hash = rs.getString(activeTableFieldConfiguration.getPrimaryKey().length);
-                id2hash.put(pkSb.toString(), hash);
-            }
-        } catch (SQLException e) {
-            log.error("Could not retrieve hashes from the database. SQL query was {}:", sql, e);
-            throw new AnalysisEngineProcessException(e);
-        }
-        return id2hash;
-    }
-}
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java
deleted file mode 100644
index 09178fa29..000000000
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/flowcontroller/HashComparisonOuterFlow.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package de.julielab.jcore.reader.xmi.flowcontroller;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.codec.digest.DigestUtils;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.analysis_engine.metadata.FixedFlow;
-import org.apache.uima.analysis_engine.metadata.FlowConstraints;
-import org.apache.uima.flow.*;
-import org.apache.uima.jcas.JCas;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Map;
-
-/**
- * <p>Note: This flow can only be used in an aggregate analysis engine where the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplier} is the first component.</p>
- * <p>This flow is created by the {@link HashComparisonFlowController} and routes the CAS that was filled by the {@link de.julielab.jcore.reader.xmi.XmiDBMultiplierReader}.
- * This CAS contains an instance of {@link de.julielab.jcore.types.casmultiplier.RowBatch} which contains the information which documents should be read
- * from which database table.</p>
- * <p>Within this flow, the reader CAS is passed to the multiplier, the first component. For CASes created by the multiplier,
- * the method {@link #newCasProduced(JCas, String)} is called for which a new flow concerning the processing order of the
- * multiplier-created CASes within the aggregate is determined.</p>
- */
-public class HashComparisonOuterFlow extends JCasFlow_ImplBase {
-    private final static Logger log = LoggerFactory.getLogger(HashComparisonOuterFlow.class);
-    private String[] fixedFlow;
-    private int currentPosition;
-    private Map<String, String> id2hash;
-    private String documentItemToHash;
-
-    public HashComparisonOuterFlow(Map<String, String> id2hash, String documentItemToHash, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
-        this.id2hash = id2hash;
-        this.documentItemToHash = documentItemToHash;
-        if (!(flowConstraints instanceof FixedFlow)) {
-            throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the original FixedFlow to know the order of the delegate engines but the given flow is of type " + flowConstraints.getClass()));
-        }
-        FixedFlow fixedFlow = (FixedFlow) flowConstraints;
-        this.fixedFlow = fixedFlow.getFixedFlow();
-        this.currentPosition = 0;
-    }
-
-    @Override
-    protected Flow newCasProduced(JCas newCas, String producedBy) throws AnalysisEngineProcessException {
-        String newHash = getHash(newCas);
-        return new FixedInnerFlow(fixedFlow);
-    }
-
-    private String getHash(JCas newCas) {
-        final String documentText = newCas.getDocumentText();
-        final byte[] sha = DigestUtils.sha256(documentText.getBytes());
-        return Base64.encodeBase64String(sha);
-    }
-
-    public Step next() {
-        Step step = null;
-        for (; currentPosition < fixedFlow.length && step == null; currentPosition++) {
-            String aeKey = fixedFlow[currentPosition];
-
-            // The outer flow only passes the CAS to the CAS multiplier. The multiplier creates more CASes which
-            // are then passed to newCasProduced() and are then routed by the InnerFlow.
-            if (currentPosition == 0) {
-                log.trace("Outer next AE is: " + aeKey);
-                step = new SimpleStep(aeKey);
-            }
-        }
-        if (step == null) {
-            // no appropriate AEs to call - end of flow
-            log.trace("Outer flow Complete.");
-        }
-        return step == null ? new FinalStep() : step;
-    }
-}
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java
deleted file mode 100644
index 5c3d69e64..000000000
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/flowcontroller/FlowControllerTest.java
+++ /dev/null
@@ -1,101 +0,0 @@
-package de.julielab.jcore.reader.xmi.flowcontroller;
-
-import de.julielab.jcore.types.Header;
-import de.julielab.jcore.types.casmultiplier.RowBatch;
-import de.julielab.jcore.utility.JCoReTools;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.AbstractCas;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
-import org.apache.uima.fit.factory.FlowControllerFactory;
-import org.apache.uima.fit.factory.JCasFactory;
-import org.apache.uima.fit.util.JCasUtil;
-import org.apache.uima.flow.FlowControllerDescription;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.StringArray;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.List;
-
-public class FlowControllerTest {
-    @Test
-    public void testFlowController() throws Exception {
-        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types");
-        RowBatch rowBatch = new RowBatch(jCas);
-        for (int i = 0; i < 10; i++) {
-            StringArray id = new StringArray(jCas, 1);
-            id.set(0, String.valueOf(i));
-            rowBatch.setIdentifiers(JCoReTools.addToFSArray(rowBatch.getIdentifiers(), id));
-        }
-        rowBatch.addToIndexes();
-
-        FlowControllerDescription flowControllerDescription = FlowControllerFactory.createFlowControllerDescription(HashComparisonFlowController.class);
-        AnalysisEngineDescription multiplierDesc = AnalysisEngineFactory.createEngineDescription(TestMultiplier.class);
-        AnalysisEngineDescription testAeDesc1 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 1");
-        AnalysisEngineDescription testAeDesc2 = AnalysisEngineFactory.createEngineDescription(TestAE.class, "name", "TestAE 2");
-        AnalysisEngineDescription aaeWithFlowController = AnalysisEngineFactory.createEngineDescription(flowControllerDescription, multiplierDesc, testAeDesc1, testAeDesc2);
-        AnalysisEngine aae = AnalysisEngineFactory.createEngine(aaeWithFlowController);
-
-        aae.process(jCas);
-    }
-
-    public static class TestAE extends JCasAnnotator_ImplBase {
-        private final static Logger log = LoggerFactory.getLogger(TestAE.class);
-
-        @ConfigurationParameter(name = "name")
-        private String name;
-
-        @Override
-        public void initialize(UimaContext context) throws ResourceInitializationException {
-            name = (String) context.getConfigParameterValue("name");
-        }
-
-        @Override
-        public void process(JCas jCas) throws AnalysisEngineProcessException {
-            log.debug("Running AE: {}", name);
-            log.debug("JCas text: " + jCas.getDocumentText());
-        }
-    }
-
-    public static class TestMultiplier extends JCasMultiplier_ImplBase {
-        private List<String> idsToRead = new ArrayList<>();
-        private int currentIndex;
-        @Override
-        public void process(JCas jCas) throws AnalysisEngineProcessException {
-            RowBatch rowbatch = JCasUtil.selectSingle(jCas, RowBatch.class);
-            idsToRead.clear();
-            currentIndex = 0;
-            for (int i = 0; i < rowbatch.getIdentifiers().size() && rowbatch.getIdentifiers(i) != null; i++) {
-                // In this test, the document IDs consist only of a single element
-                idsToRead.add(rowbatch.getIdentifiers(i).get(0));
-            }
-        }
-
-        @Override
-        public boolean hasNext() throws AnalysisEngineProcessException {
-            return currentIndex < idsToRead.size();
-        }
-
-        @Override
-        public AbstractCas next() throws AnalysisEngineProcessException {
-            JCas emptyJCas = getEmptyJCas();
-            Header header = new Header(emptyJCas);
-            String docId = idsToRead.get(currentIndex);
-            header.setDocId(docId);
-            header.addToIndexes();
-            emptyJCas.setDocumentText("ID: " + docId);
-            ++currentIndex;
-            return emptyJCas;
-        }
-    }
-
-}
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index b7e041f2d..d310158cf 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -1,23 +1,32 @@
 package de.julielab.jcore.reader.xml;
 
+import de.julielab.costosys.configuration.FieldConfig;
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
 import de.julielab.jcore.reader.db.DBMultiplier;
 import de.julielab.jcore.reader.db.DBReader;
 import de.julielab.jcore.reader.xmlmapper.mapper.XMLMapper;
+import de.julielab.jcore.types.casflow.ToVisit;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.types.ext.DBProcessingMetaData;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.lang3.tuple.Pair;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.*;
 import java.util.stream.Collectors;
 
 @ResourceMetaData(name = "JCoRe XML Database Multiplier", description = "This CAS multiplier receives information about " +
@@ -29,9 +38,13 @@
         "JeDIS."
         , vendor = "JULIE Lab Jena, Germany", copyright = "JULIE Lab Jena, Germany")
 public class XMLDBMultiplier extends DBMultiplier {
-private final static Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
     public static final String PARAM_ROW_MAPPING = Initializer.PARAM_ROW_MAPPING;
     public static final String PARAM_MAPPING_FILE = Initializer.PARAM_MAPPING_FILE;
+    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
+    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
+    public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
+
+    private final static Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
     /**
      * Mapper which maps medline XML to a CAS with the specified UIMA type system
      * via an XML configuration file.
@@ -41,8 +54,18 @@ public class XMLDBMultiplier extends DBMultiplier {
     protected String[] rowMappingArray;
     @ConfigurationParameter(name = PARAM_MAPPING_FILE, description = XMLDBReader.DESC_MAPPING_FILE)
     protected String mappingFileStr;
+    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
+    private String documentItemToHash;
+    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
+            "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
+    private String xmiStorageDataTable;
+    @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.")
+    private String[] toVisitKeys;
+
+
     private Row2CasMapper row2CasMapper;
     private CasPopulator casPopulator;
+    private Map<String, String> docId2HashMap;
     private boolean initialized;
 
 
@@ -51,7 +74,9 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         super.initialize(aContext);
         mappingFileStr = (String) aContext.getConfigParameterValue(PARAM_MAPPING_FILE);
         rowMappingArray = (String[]) aContext.getConfigParameterValue(PARAM_ROW_MAPPING);
-
+        xmiStorageDataTable = (String) aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT);
+        documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
+        toVisitKeys = (String[]) aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
         // We don't know yet which tables to read. Thus, we leave the row mapping out.
         // We will now once the DBMultiplier#process(JCas) will have been run.
         Initializer initializer = new Initializer(mappingFileStr, null, null);
@@ -59,6 +84,11 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         initialized = false;
     }
 
+    @Override
+    public void process(JCas aJCas) throws AnalysisEngineProcessException {
+        super.process(aJCas);
+        docId2HashMap = fetchCurrentHashesFromDatabase(JCasUtil.selectSingle(aJCas, RowBatch.class));
+    }
 
     @Override
     public AbstractCas next() throws AnalysisEngineProcessException {
@@ -78,6 +108,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                 }
                 byte[][] documentData = documentDataIterator.next();
                 populateCas(jCas, documentData);
+                setToVisitAnnotation(jCas);
             }
         } catch (Exception e) {
             log.error("Exception occurred: ", e);
@@ -86,6 +117,41 @@ public AbstractCas next() throws AnalysisEngineProcessException {
         return jCas;
     }
 
+    /**
+     * <p>Creates a {@link ToVisit} annotation based on document text hash comparison and the defined parameter values.</p>
+     * <p>Computes the hash of the newly read CAS and compares it to the hash for the same document retrieved from the
+     * database, if present. If there was a hash in the database and the hash values are equal, creates the <tt>ToVisit</tt>
+     * annotation and adds the toVisitKeys passed in the configuration of this component.</p>
+     *
+     * @param jCas The newly read JCas.
+     */
+    private void setToVisitAnnotation(JCas jCas) {
+        if (xmiStorageDataTable != null) {
+            DBProcessingMetaData dbProcessingMetaData = JCasUtil.selectSingle(jCas, DBProcessingMetaData.class);
+            StringArray pkArray = dbProcessingMetaData.getPrimaryKey();
+            String pkString = String.join(",", pkArray.toArray());
+            String existingHash = docId2HashMap.get(pkString);
+            if (existingHash != null) {
+                String newHash = getHash(jCas);
+                if (existingHash.equals(newHash)) {
+                    ToVisit toVisit = new ToVisit(jCas);
+                    if (toVisitKeys != null && toVisitKeys.length != 0) {
+                        StringArray keysArray = new StringArray(jCas, toVisitKeys.length);
+                        keysArray.copyFromArray(toVisitKeys, 0, 0, toVisitKeys.length);
+                        toVisit.setDelegateKeys(keysArray);
+                    }
+                    toVisit.addToIndexes();
+                }
+            }
+        }
+    }
+
+    private String getHash(JCas newCas) {
+        final String documentText = newCas.getDocumentText();
+        final byte[] sha = DigestUtils.sha256(documentText.getBytes());
+        return Base64.encodeBase64String(sha);
+    }
+
     private void populateCas(JCas jCas, byte[][] documentData) throws AnalysisEngineProcessException {
         try {
             casPopulator.populateCas(jCas, documentData,
@@ -100,4 +166,51 @@ protected List<Map<String, Object>> getAllRetrievedColumns() {
         Pair<Integer, List<Map<String, String>>> numColumnsAndFields = dbc.getNumColumnsAndFields(tables.length > 1, schemaNames);
         return numColumnsAndFields.getRight().stream().map(HashMap<String, Object>::new).collect(Collectors.toList());
     }
+
+    /**
+     * <p>Fetches the hashes of the currently stored documents in the database.</p>
+     *
+     * @param rowBatch The annotation specifying which documents should be fetched by the multiplier and then be processed by the aggregate.
+     * @return A map from a string representation of the RowBatches document IDs to the hashes for the respective IDs.
+     * @throws AnalysisEngineProcessException If the SQL request fails.
+     */
+    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
+        if (xmiStorageDataTable != null) {
+            String hashColumn = documentItemToHash + "_sha256";
+            // Extract the document IDs in this RowBatch. The IDs could be composite keys.
+            List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
+            Iterator<FeatureStructure> documentIDsIt = rowBatch.getIdentifiers().iterator();
+            while (documentIDsIt.hasNext()) {
+                StringArray pkArray = (StringArray) documentIDsIt.next();
+                documentIds.add(pkArray.toStringArray());
+            }
+            Map<String, String> id2hash = new HashMap<>(documentIds.size());
+            // This is the map we want to fill that lets us look up the hash of the document text by document ID.
+            String sql = null;
+            // Query the database for the document IDs in the current RowBatch and retrieve hashes.
+            try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+                FieldConfig activeTableFieldConfiguration = dbc.getActiveTableFieldConfiguration();
+                String idQuery = documentIds.stream()
+                        .map(key -> Arrays.stream(key).map(part -> "%s='" + part + '"').toArray(String[]::new))
+                        .map(activeTableFieldConfiguration::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
+                        .collect(Collectors.joining(" OR "));
+                sql = String.format("SELECT %s,%s FROM %s WHERE %s", activeTableFieldConfiguration.getPrimaryKeyString(), hashColumn, xmiStorageDataTable, idQuery);
+                ResultSet rs = conn.createStatement().executeQuery(sql);
+                while (rs.next()) {
+                    StringBuilder pkSb = new StringBuilder();
+                    for (int i = 0; i < activeTableFieldConfiguration.getPrimaryKey().length; i++)
+                        pkSb.append(rs.getString(i)).append(',');
+                    // Remove training comma
+                    pkSb.deleteCharAt(pkSb.length());
+                    String hash = rs.getString(activeTableFieldConfiguration.getPrimaryKey().length);
+                    id2hash.put(pkSb.toString(), hash);
+                }
+            } catch (SQLException e) {
+                log.error("Could not retrieve hashes from the database. SQL query was {}:", sql, e);
+                throw new AnalysisEngineProcessException(e);
+            }
+            return id2hash;
+        }
+        return null;
+    }
 }
diff --git a/pom.xml b/pom.xml
index 8768f3a25..87deb6229 100644
--- a/pom.xml
+++ b/pom.xml
@@ -70,7 +70,9 @@
   </dependencies>
       
   <modules>
-            
+
+    <module>jcore-annotation-adder-ae</module>
+
     <module>jcore-ace-reader</module>
             
     <module>jcore-acronym-ae</module>
@@ -78,7 +80,11 @@
     <module>jcore-acronym-writer</module>
             
     <module>jcore-banner-ae</module>
-            
+
+    <module>jcore-bc2gm-reader</module>
+
+    <module>jcore-bc2gmformat-writer</module>
+
     <module>jcore-biolemmatizer-ae</module>
             
     <module>jcore-bionlpformat-consumer</module>
@@ -91,12 +97,16 @@
             
     <module>jcore-coordination-baseline-ae</module>
 
+    <module>jcore-cord19-reader</module>
+
     <module>jcore-coreference-writer</module>
 
     <module>jcore-ct-reader</module>
-            
+
+    <module>jcore-db-checkpoint-ae</module>
+
     <module>jcore-descriptor-creator</module>
-            
+
     <module>jcore-dta-reader</module>
             
     <module>jcore-ec-code-ae</module>
@@ -112,6 +122,10 @@
     <module>jcore-file-reader</module>
             
     <module>jcore-flair-ner-ae</module>
+
+    <module>jcore-flair-token-embedding-ae</module>
+
+    <module>jcore-flow-controllers</module>
             
     <module>jcore-iexml-consumer</module>
             
@@ -134,7 +148,9 @@
     <module>jcore-likelihood-assignment-ae</module>
             
     <module>jcore-likelihood-detection-ae</module>
-            
+
+    <module>jcore-line-multiplier</module>
+
     <module>jcore-lingpipegazetteer-ae</module>
             
     <module>jcore-lingpipe-porterstemmer-ae</module>
@@ -154,6 +170,8 @@
     <module>jcore-muc7-reader</module>
             
     <module>jcore-mutationfinder-ae</module>
+
+    <module>jcore-neo4j-relations-consumer</module>
             
     <module>jcore-opennlp-chunk-ae</module>
             
@@ -164,7 +182,9 @@
     <module>jcore-opennlp-sentence-ae</module>
             
     <module>jcore-opennlp-token-ae</module>
-            
+
+    <module>jcore-ppd-writer</module>
+
     <module>jcore-pmc-reader</module>
             
     <module>jcore-pubtator-reader</module>
@@ -191,24 +211,6 @@
             
     <module>jedis-parent</module>
             
-    <module>jcore-db-checkpoint-ae</module>
-            
-    <module>jcore-ppd-writer</module>
-            
-    <module>jcore-bc2gmformat-writer</module>
-            
-    <module>jcore-bc2gm-reader</module>
-            
-    <module>jcore-annotation-adder-ae</module>
-            
-    <module>jcore-flair-token-embedding-ae</module>
-            
-    <module>jcore-line-multiplier</module>
-            
-    <module>jcore-cord19-reader</module>
-          
-    <module>jcore-neo4j-relations-consumer</module>
-      
   </modules>
       
   <scm>
diff --git a/scripts/createMetaDescriptors.py b/scripts/createMetaDescriptors.py
index 2da6f7de0..6c940f20b 100755
--- a/scripts/createMetaDescriptors.py
+++ b/scripts/createMetaDescriptors.py
@@ -12,15 +12,14 @@
 		-v: The version of the repository
 		-u: If the repository does not yet exist: If is updateable or not
 """
+import fnmatch
+import json
 import os
-import sys
 import re
-from os.path import expanduser
-import json
-import fnmatch
+import sys
 import xml.etree.ElementTree as ET
+from os.path import expanduser
 from xml.etree.ElementTree import ParseError
-from collections import Counter
 
 # For testing we define in and out names so we can create new versions and compare
 META_DESC_IN_NAME = "component.meta"
@@ -66,6 +65,8 @@ def getArtifactInfo(pomFile):
 			category = "consumer"
 		if (artifactId.endswith("writer")):
 			category = "consumer"
+		if (artifactId.endswith("flowcontroller")):
+			category = "flowcontroller"
 
 		artifact = {}
 		artifact["artifactId"]  = artifactId
@@ -111,6 +112,8 @@ def getDescriptors(projectpath):
 						category = "consumer"
 				if descriptorRoot.tag.endswith("casConsumerDescription"):
 					category = "consumer"
+				if descriptorRoot.tag.endswith("flowControllerDescription"):
+					category = "flowcontroller"
 				if category != None:
 					# From the complete file name, exclude the system dependent part. That is, make the path relative to the
 					# project directory's src/main/resources directory.

From f1488a0cc9e764c3755f9c018efe1dec1d9eca80 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 4 Jun 2021 15:49:54 +0200
Subject: [PATCH 056/269] Currently adding a test for the hash value
 comparison. Not finished yet.

---
 jcore-flow-controllers/component.meta         |  20 ++++
 jcore-flow-controllers/pom.xml                |   4 +-
 .../AnnotationDefinedFlowControllerTest.java  |  35 +-----
 jcore-xml-db-reader/pom.xml                   |  13 ++-
 .../jcore/reader/xml/XMLDBMultiplierTest.java | 107 ++++++++++++++++++
 jedis-parent/pom.xml                          |   4 +-
 6 files changed, 143 insertions(+), 40 deletions(-)
 create mode 100644 jcore-flow-controllers/component.meta
 create mode 100644 jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java

diff --git a/jcore-flow-controllers/component.meta b/jcore-flow-controllers/component.meta
new file mode 100644
index 000000000..d8f783bd8
--- /dev/null
+++ b/jcore-flow-controllers/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "flowcontroller"
+    ],
+    "description": "Flow controllers can be used to control the route a (J)CAS takes through an aggregate analysis engine. This project contains Flow Controllers developed at the JULIE Lab.",
+    "descriptors": [
+        {
+            "category": "flowcontroller",
+            "location": "de.julielab.jcore.flow.annotationdefined.desc.jcore-annotation-defined-flowcontroller"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-flow-controllers",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe Flow Controllers"
+}
diff --git a/jcore-flow-controllers/pom.xml b/jcore-flow-controllers/pom.xml
index a316b81ad..d31933489 100644
--- a/jcore-flow-controllers/pom.xml
+++ b/jcore-flow-controllers/pom.xml
@@ -22,8 +22,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java b/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java
index f5daabd0f..228e94a49 100644
--- a/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java
+++ b/jcore-flow-controllers/src/test/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowControllerTest.java
@@ -15,7 +15,7 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.resource.ResourceInitializationException;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import static java.util.Arrays.asList;
 import static org.assertj.core.api.Assertions.assertThat;
@@ -107,37 +107,4 @@ public void process(JCas jCas) {
             token.addToIndexes();
         }
     }
-
-//    public static class TestMultiplier extends JCasMultiplier_ImplBase {
-//        private List<String> idsToRead = new ArrayList<>();
-//        private int currentIndex;
-//        @Override
-//        public void process(JCas jCas) throws AnalysisEngineProcessException {
-//            RowBatch rowbatch = JCasUtil.selectSingle(jCas, RowBatch.class);
-//            idsToRead.clear();
-//            currentIndex = 0;
-//            for (int i = 0; i < rowbatch.getIdentifiers().size() && rowbatch.getIdentifiers(i) != null; i++) {
-//                // In this test, the document IDs consist only of a single element
-//                idsToRead.add(rowbatch.getIdentifiers(i).get(0));
-//            }
-//        }
-//
-//        @Override
-//        public boolean hasNext() throws AnalysisEngineProcessException {
-//            return currentIndex < idsToRead.size();
-//        }
-//
-//        @Override
-//        public AbstractCas next() throws AnalysisEngineProcessException {
-//            JCas emptyJCas = getEmptyJCas();
-//            Header header = new Header(emptyJCas);
-//            String docId = idsToRead.get(currentIndex);
-//            header.setDocId(docId);
-//            header.addToIndexes();
-//            emptyJCas.setDocumentText("ID: " + docId);
-//            ++currentIndex;
-//            return emptyJCas;
-//        }
-//    }
-
 }
diff --git a/jcore-xml-db-reader/pom.xml b/jcore-xml-db-reader/pom.xml
index 145fcc69a..8447584a9 100644
--- a/jcore-xml-db-reader/pom.xml
+++ b/jcore-xml-db-reader/pom.xml
@@ -64,8 +64,17 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-db-test-utilities</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
         </dependency>
     </dependencies>
     <url>https://github.com/JULIELab/jcore-base/jcore-xml-db-reader</url>
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
new file mode 100644
index 000000000..910c682ba
--- /dev/null
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -0,0 +1,107 @@
+package de.julielab.jcore.reader.xml;
+
+
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
+import de.julielab.jcore.db.test.DBTestUtils;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.configuration2.ex.ConfigurationException;
+import org.apache.uima.UIMAException;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.containers.PostgreSQLContainer;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+public class XMLDBMultiplierTest {
+
+    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer("postgres:11.12");
+    private static String costosysConfig;
+    private static final String SOURCE_XML_TABLE = "source_xml_table";
+    private static final String TARGET_XMI_TABLE = "target_xmi_table";
+    private static final String PMID_FIELD_NAME = "pmid";
+    private static final String DOCID_FIELD_NAME = "docid";
+    private static final String XML_FIELD_NAME = "xml";
+    private static final String BASE_DOCUMENT_FIELD_NAME = "base_document";
+    private static final String HASH_FIELD_NAME = "documentText_sha256";
+    private static final String MAX_XMI_ID_FIELD_NAME = "max_xmi_id";
+    private static final String SOFA_MAPPING_FIELD_NAME = "sofa_mapping";
+
+    @BeforeAll
+    public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
+        postgres.start();
+        DBTestUtils.createAndSetHiddenConfig(Path.of("src", "test", "resources", "hiddenConfig").toString(), postgres);
+
+        DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
+        dbc.setActiveTableSchema("medline_2016_nozip");
+        costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2016_nozip", 1, postgres);
+        new File(costosysConfig).deleteOnExit();
+        try(CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+            prepareSourceXMLTable(dbc, conn);
+            prepareTargetXMITable(dbc, conn);
+        }
+        assertThat(dbc.getNumRows(SOURCE_XML_TABLE)).isEqualTo(10);
+        assertThat(dbc.getNumRows(TARGET_XMI_TABLE)).isEqualTo(5);
+        dbc.close();
+    }
+
+    private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
+        String xmlFmt = "<xml><docid>%d</docid></xml>";
+        dbc.createTable(SOURCE_XML_TABLE, "Test table for hash comparison test.");
+        String sql = String.format("INSERT INTO %s (%s,%s) VALUES (?,?)", SOURCE_XML_TABLE, PMID_FIELD_NAME, XML_FIELD_NAME);
+        PreparedStatement ps = conn.prepareStatement(sql);
+        for (int i = 0; i < 10; i++) {
+            String xml = String.format(xmlFmt, i);
+            ps.setString(1, String.valueOf(i));
+            ps.setString(2, xml);
+            ps.addBatch();
+        }
+        ps.executeBatch();
+    }
+
+    private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
+        // Note that the root is "xmi" and not "xml"
+        String xmlFmt = "<xmi><docid>%d</docid></xmi>";
+        dbc.createTable(TARGET_XMI_TABLE, "xmi_text", "Test table for hash comparison test.");
+        dbc.assureColumnsExist(TARGET_XMI_TABLE, List.of(HASH_FIELD_NAME), "text");
+        String sql = String.format("INSERT INTO %s (%s,%s,%s,%s,%s) VALUES (?,XMLPARSE(CONTENT ?),?,?,?)", TARGET_XMI_TABLE, DOCID_FIELD_NAME, BASE_DOCUMENT_FIELD_NAME, HASH_FIELD_NAME, MAX_XMI_ID_FIELD_NAME, SOFA_MAPPING_FIELD_NAME);
+        PreparedStatement ps = conn.prepareStatement(sql);
+        // Note that we only add half of the documents compared to the source XML import. This way we test
+        // if the code behaves right when the target document does not yet exist at all.
+        for (int i = 0; i < 5; i++) {
+            String xml = String.format(xmlFmt, i);
+            ps.setString(1, String.valueOf(i));
+            ps.setString(2, xml);
+            ps.setString(3, getHash(xml));
+            ps.setInt(4, 0);
+            ps.setString(5, "dummy");
+            ps.addBatch();
+        }
+        ps.executeBatch();
+    }
+
+    @AfterAll
+    public static void tearDown() {
+        postgres.stop();
+    }
+
+    private static String getHash(String str) {
+        final byte[] sha = DigestUtils.sha256(str.getBytes());
+        return Base64.encodeBase64String(sha);
+    }
+
+    @Test
+    public void testHashComparison() {
+
+    }
+
+
+}
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 71ffa5ceb..48d783b6d 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -17,12 +17,12 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>costosys</artifactId>
-                <version>1.5.1</version>
+                <version>1.5.2-SNAPSHOT</version>
             </dependency>
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>jcore-db-test-utilities</artifactId>
-                <version>2.5.0</version>
+                <version>2.5.1-SNAPSHOT</version>
             </dependency>
             <dependency>
                 <groupId>de.julielab</groupId>

From 50c442f00b60eaf64f8ef6fa58fa9a9ffc41c4a7 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sun, 6 Jun 2021 11:52:48 +0200
Subject: [PATCH 057/269] XML Mapper: Adding documentation regarding the
 mapping file structure.

---
 jcore-xml-mapper/README.md                    | 99 ++++++++++++++++++-
 .../medlineMappingFileStructuredAbstract.xml  |  3 +-
 2 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/jcore-xml-mapper/README.md b/jcore-xml-mapper/README.md
index 5bcf986c3..6eacab174 100644
--- a/jcore-xml-mapper/README.md
+++ b/jcore-xml-mapper/README.md
@@ -3,7 +3,7 @@ NOTE: This is not a UIMA component but rather a library used by some JCoRe compo
 This is a generic XML mapper to create CAS instances reflecting contents of XML documents.
 
 ### Objective
-The JULIE Lab XMLMapper is a mapper which maps XML elements from an XML document onto (UIMA) Types or Type Features. For that task it uses a mapping file, which comes as an input.
+The JULIE Lab XMLMapper is a mapper which maps XML elements from an XML document onto (UIMA) types or type features. For that task it uses a mapping file, which comes as an input.
 Examples for mapping files are found in some [jcore-projects](https://github.com/JULIELab/jcore-projects) components,
 for example the [jcore-pubmed-reader](https://github.com/JULIELab/jcore-projects/tree/master/jcore-pubmed-reader), its
 MEDLINE-pendant or the database versions of both. 
@@ -14,4 +14,101 @@ The input and output of an AE is done via annotation objects. The classes corres
 ### Using the AE - Descriptor Configuration
 In UIMA, each component is configured by a descriptor in XML. Such a preconfigured descriptor is available under `src/main/resources/de/julielab/jcore/ ` but it can be further edited if so desired; see [UIMA SDK User's Guide](https://uima.apache.org/downloads/releaseDocs/2.1.0-incubating/docs/html/tools/tools.html#ugr.tools.cde) for further information.
 
+### Mapping File Syntax
+Please note that this section is incomplete. The mapping file of the [jcore-pubmed-reader](https://github.com/JULIELab/jcore-projects/tree/master/jcore-pubmed-reader) includes examples for all supported features.
+
+The basic structure of the mapping file consists of the `<mappings>` root element, a `<documentText>` root child element and an arbitrary number of `<tsType>` ('type system type', referring to the UIMA type system to be employed) root child elements:
+
+```xml
+<mappings>
+    <documentText>
+        ...
+    </documentText>
+    <tsType>
+        ...        
+    </tsType>
+    <tsType>
+        ...
+    </tsType>
+    ...
+</mappings>
+```
+
+## Document Text
+The CAS document text is populated with the `<documentText>` mapping element. It defines an arbitrary number of `<documentPart>` elements of whose mapping values the document text will be comprised, in the order of the `<documentPart>` elements in the mapping file. Each document part is given a manually defined ID which can be referred to in order to create a UIMA annotation covering the respective document part text. The location of the actual character data in the mapped document XML files is specified via XPath.
+
+```xml
+<documentText>
+	<partOfDocumentText id="0">
+		<xPath>/MedlineCitation/Article/ArticleTitle</xPath>
+	</partOfDocumentText>
+	<partOfDocumentText id="1">
+		<xPath>/MedlineCitation/Article/Abstract</xPath>
+	</partOfDocumentText>
+</documentText>
+```
+
+This example collects the article title, and the abstract of a MEDLINE XML document for the CAS document text.
+
+The `<partOfDocumentText>` may have an optional child element named `<externalParser>`. This is useful or even a necessity when the document structure for this element is not static, i.e. has a varying number of children. In such a case, a user-delivered class on the classpath can be specified. This class must implement the `de.julielab.jcore.reader.xmlmapper.mapper.DocumentTextPartParser` interface and received the document XML element that the XPath in the mapping file points to. It then returns a list of strings using to comprise the respective part of the document text:
+
+```xml
+<partOfDocumentText id="1">
+	<xPath>/MedlineCitation/Article/Abstract</xPath>
+	<externalParser>
+		de.julielab.jcore.reader.xmlmapper.mapper.StructuredAbstractParser
+	</externalParser>
+</partOfDocumentText>
+```
+
+The `StructuredAbstractParser` is able to parse the child elements of `/MedlineCitation/Article/Abstract`, namely `AbstractText` elements which also have attributes, `Label` and `NlmCategory`. Those are details to the MEDLINE XML format and are just use here as an example use case for external parsers.
+
+## UIMA Type Annotations
+
+Annotations are added with the `<tsType>` element. Its main children are `<tsFullClassName>` and `<tsFeature>`, defining the actual type to be instantiated and any feature values that should be added to the type. Since a UIMA type feature can itself be a type, `<tsFeature>` elements can be nested. Then, the `<xpath>` child of a `<tsFeature>` element is resolved *relative* the `<xpath>` of the parent `<tsFeature>` element. Thus, when the parent `<tsFeature>` element does not specify an `<xpath>` element, which is perfectly legal, the given xpath is resolved from the XML document root:
+
+```xml
+<tsType>
+    <tsFullClassName>fully qualified UIMA type name</tsFullClassName>
+    <tsFeature>
+        <tsFeatureName>feature name of the type</tsFeatureName>
+        <isType>true if the feature value is a UIMA feature structure (annotation) itself</isType>
+        <tsFullClassName>
+            The value data type of the feature as it is passed to the setter for this feature in Java code.
+            This can also be an array type, e.g. org.apache.uima.jcas.cas.FSArray.
+        </tsFullClassName>
+        <tsFeature>
+            <tsFeatureName>optional if the parent tsFullClassName is an array type</tsFeatureName>
+            <isType>true</isType>
+            <xPath>
+                absolute xpath since the parent does not specify an xpath
+            </xPath>
+            <tsFullClassName>
+                fully qualified UIMA type name of this nested type
+            </tsFullClassName>
+    
+            <tsFeature>
+                <tsFeatureName>name of this feature relative to the parent fsFullClassName type</tsFeatureName>
+                <xPath>relative xpath to the parent xpath</xPath>
+                <tsFullClassName>a primitive data type (or a string) since this is not a UIMA type itself (missing isType element).</tsFullClassName>
+            </tsFeature>
+        </tsFeature>
+    </tsFeature>
+</tsType>
+```
+The above example showcases the structure of a nested annotation, i.e. a feature path. The outer type will have another type as feature value which in turn has a primitive value as the final feature value.
+
+**Important** The `<xpath>` values are evaluated for *all occurrences* of the respective XPath in the XML document. Thus, the above annotations will be created for all XPath matches. This holds true for every level of `<xpath>` specifications. This allows collecting child XML document elements into arrays. An outer xpath points to the collection document elements, and an inner xpath points the children.
+
+The `<tsFeature>` element again accepts the child element `<externalParser>`. In this case, the external parser needs to implement the `de.julielab.jcore.reader.xmlmapper.typeParser.TypeParser` interface. It might be helpful to extend the class `de.julielab.jcore.reader.xmlmapper.typeParser.StandardTypeParser` and use its `parseSingleType` method.
+
+Finally, the `<tsFeature>` element accepts the `<offset>` child element which can point to a part of document text, thus create an annotation for the respective document text part as identified by its ID:
+
+```xml
+<offset>
+	<partOfDocumentText>
+		<id>0</id>
+	</partOfDocumentText>
+</offset>
+```
 
diff --git a/jcore-xml-mapper/src/test/resources/medlineMappingFileStructuredAbstract.xml b/jcore-xml-mapper/src/test/resources/medlineMappingFileStructuredAbstract.xml
index 9badb769f..eca924537 100644
--- a/jcore-xml-mapper/src/test/resources/medlineMappingFileStructuredAbstract.xml
+++ b/jcore-xml-mapper/src/test/resources/medlineMappingFileStructuredAbstract.xml
@@ -5,7 +5,8 @@
 		</partOfDocumentText>
 		<partOfDocumentText id="1">
 			<xPath>/MedlineCitation/Article/Abstract</xPath>
-			<externalParser>de.julielab.jcore.reader.xmlmapper.mapper.StructuredAbstractParser
+			<externalParser>
+				de.julielab.jcore.reader.xmlmapper.mapper.StructuredAbstractParser
 			</externalParser>
 		</partOfDocumentText>
 		<partOfDocumentText id="2">

From 29b8b7b0c3888108fa6b4aedf4ca599bda13bd85 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sun, 6 Jun 2021 14:14:28 +0200
Subject: [PATCH 058/269] Adapted some error messages. Removed fest reflect
 from the `StandardTypeBuilder` and the pom.xml and replaced it with default
 Java reflect.

---
 jcore-xml-mapper/README.md                    |  8 +++----
 jcore-xml-mapper/pom.xml                      | 10 --------
 .../xmlmapper/genericTypes/TypeFactory.java   | 23 ++++++++-----------
 .../typeBuilder/StandardTypeBuilder.java      |  9 +++++---
 4 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/jcore-xml-mapper/README.md b/jcore-xml-mapper/README.md
index 6eacab174..e1fa47aac 100644
--- a/jcore-xml-mapper/README.md
+++ b/jcore-xml-mapper/README.md
@@ -35,7 +35,7 @@ The basic structure of the mapping file consists of the `<mappings>` root elemen
 ```
 
 ## Document Text
-The CAS document text is populated with the `<documentText>` mapping element. It defines an arbitrary number of `<documentPart>` elements of whose mapping values the document text will be comprised, in the order of the `<documentPart>` elements in the mapping file. Each document part is given a manually defined ID which can be referred to in order to create a UIMA annotation covering the respective document part text. The location of the actual character data in the mapped document XML files is specified via XPath.
+The CAS document text is populated with the `<documentText>` mapping element. It defines an arbitrary number of `<documentPart>` elements of whose mapping values the document text will be comprised, in the order of the `<documentPart>` elements in the mapping file. Each document part is given a mandatory, manually defined ID which can be referred to in order to create a UIMA annotation covering the respective document part text. The location of the actual character data in the mapped document XML files is specified via XPath.
 
 ```xml
 <documentText>
@@ -106,9 +106,9 @@ Finally, the `<tsFeature>` element accepts the `<offset>` child element which ca
 
 ```xml
 <offset>
-	<partOfDocumentText>
-		<id>0</id>
-	</partOfDocumentText>
+    <partOfDocumentText>
+        <id>0</id>
+    </partOfDocumentText>
 </offset>
 ```
 
diff --git a/jcore-xml-mapper/pom.xml b/jcore-xml-mapper/pom.xml
index ded5b9a9f..7264e7079 100644
--- a/jcore-xml-mapper/pom.xml
+++ b/jcore-xml-mapper/pom.xml
@@ -22,16 +22,6 @@
             <artifactId>jcore-types</artifactId>
             <version>${jcore-types-version}</version>
         </dependency>
-        <dependency>
-            <groupId>org.easytesting</groupId>
-            <artifactId>fest-reflect</artifactId>
-            <version>1.2</version>
-        </dependency>
-        <dependency>
-            <groupId>org.easytesting</groupId>
-            <artifactId>fest-util</artifactId>
-            <version>1.1.4</version>
-        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julie-xml-tools</artifactId>
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeFactory.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeFactory.java
index f9408edad..0a36ccc70 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeFactory.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeFactory.java
@@ -65,11 +65,11 @@ public class TypeFactory {
 	/**
 	 * creates a new instance of the TypeFactory
 	 * 
-	 * @param mappingFile
+	 * @param mappingFileData The mapping file contents.
 	 */
 	public TypeFactory(byte[] mappingFileData) {
 		this.mappingFileData = mappingFileData;
-		types = new ArrayList<TypeTemplate>();
+		types = new ArrayList<>();
 		this.documentTextParser = new DocumentTextHandler();
 	}
 
@@ -132,7 +132,7 @@ public List<TypeTemplate> createTemplates() throws CollectionException {
 					}
 				} else {
 					if (!nodeName.equals(ROOT)) {
-						LOGGER.warn("unknown tag in mapping file: " + nodeName + "!!");
+						LOGGER.warn("unknown tag in mapping file (note that element names are case sensitive): " + nodeName);
 					}
 				}
 			}
@@ -156,7 +156,8 @@ private void fillDocumentParser(XMLEventReader reader) throws XMLStreamException
 							id = Integer.parseInt(next.getValue());
 							documentTextParser.addPartOfDocumentTextXPath(id);
 						}
-					} else {
+					}
+					else {
 						LOGGER.error("no id for " + PART_OF_DOCUMENT_TEXT);
 						throw new RuntimeException();
 					}
@@ -172,7 +173,7 @@ private void fillDocumentParser(XMLEventReader reader) throws XMLStreamException
 					if (xpath.length() > 0 && id >= 0) {
 						documentTextParser.setXPathForPartOfDocumentText(id, xpath);
 					} else {
-						LOGGER.error("Unkown data in " + DOCUMENT_TEXT + "/" + VALUE_X_PATH + " tag ");
+						LOGGER.error("Unknown data in " + DOCUMENT_TEXT + "/" + VALUE_X_PATH + " tag ");
 					}
 				} else if (nodeName.equals(EXTERNAL_PARSER)){
 					event = reader.nextEvent();
@@ -183,7 +184,7 @@ private void fillDocumentParser(XMLEventReader reader) throws XMLStreamException
 					if (externalParserClassName.length() > 0 && id >= 0) {
 						documentTextParser.setExternalParserForPartOfDocument(id, externalParserClassName);
 					} else {
-						LOGGER.error("Unkown data in " + DOCUMENT_TEXT + "/" + VALUE_X_PATH + " tag ");
+						LOGGER.error("Unknown data in " + DOCUMENT_TEXT + "/" + VALUE_X_PATH + " tag ");
 					}
 				}
 				else {
@@ -242,15 +243,11 @@ private TypeTemplate parseType(XMLEventReader reader) throws XMLStreamException,
 						type.addAdditionalData(event.asCharacters().getData().trim(), index);
 					}
 				} else {
-					LOGGER.warn("unknown tag in mapping file: " + nodeName + "!!");
+					LOGGER.warn("unknown tag in mapping file (note that element names are case sensitive): " + nodeName);
 				}
 			}
 			event = reader.nextEvent();
 		}
-		// reflection type anlegen
-		// iteration über alle features
-		// if(feature.type==null)
-		// über getter bestimmen
 		return type;
 	}
 
@@ -273,7 +270,7 @@ private void parseOffset(TypeTemplate type, XMLEventReader reader) throws XMLStr
 							}
 						}
 					} else {
-						LOGGER.error("Unknown element in mapping file: " + nodeName);
+						LOGGER.error("Unknown element in mapping file (note that element names are case sensitive): " + nodeName);
 					}
 				}
 			}
@@ -315,7 +312,7 @@ private FeatureTemplate parseFeature(XMLEventReader reader) throws XMLStreamExce
 					FeatureTemplate newFeature = parseFeature(reader);
 					feature.addFeature(newFeature);
 				} else {
-					LOGGER.warn("unknown tag in mapping file: " + nodeName + "!!");
+					LOGGER.warn("unknown tag in mapping file (note that element names are case sensitive): " + nodeName);
 				}
 			}
 			event = reader.nextEvent();
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java
index ca3bbec18..9fa1c46b2 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java
@@ -27,7 +27,6 @@
 
 import java.util.HashMap;
 
-import static org.fest.reflect.core.Reflection.constructor;
 import static org.fest.reflect.core.Reflection.method;
 
 /**
@@ -120,7 +119,11 @@ private Annotation buildSingleInstance(ConcreteType concreteType, JCas jcas) thr
 		if (concreteType.getConcreteFeatures() != null) {
 			// Create the UIMA type corresponding to the type description in
 			// concreteType.
-			type = (Annotation) constructor().withParameterTypes(JCas.class).in(typeClass).newInstance(jcas);
+			try {
+				type = (Annotation) typeClass.getConstructor(JCas.class).newInstance(jcas);
+			} catch (Exception e){
+				throw new CollectionException(e);
+			}
 
 			// For each feature this type has, set the corret feature value.
 			for (ConcreteFeature concreteFeature : concreteType.getConcreteFeatures()) {
@@ -151,7 +154,7 @@ private Annotation buildSingleInstance(ConcreteType concreteType, JCas jcas) thr
 								.invoke(parseValueStringToValueType(concreteFeature.getValue(), concreteFeature.getFullClassName()));
 					} else if (concreteFeature.getFullClassName().equals("String") || concreteFeature.getFullClassName().equals("java.lang.String")) {
 						featureClass = Class.forName(concreteFeature.getFullClassName());
-						method(methodName).withParameterTypes(featureClass).in(type).invoke(concreteFeature.getValue());
+						typeClass.getMethod(methodName, featureClass).invoke(type, concreteFeature.getValue());
 					} else {
 						String featureClassName = concreteFeature.getFullClassName();
 						if (StringUtils.isBlank(featureClassName))

From 4e29844a7a2809cc65bb9dfc0ac16c21d320a721 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sun, 6 Jun 2021 15:18:31 +0200
Subject: [PATCH 059/269] Continued with the test for hash comparison. Not yet
 finished.

---
 .../jcore/reader/db/DBMultiplier.java         |  2 +
 .../jcore/reader/xml/XMLDBMultiplier.java     | 29 ++++--
 .../jcore/reader/xml/XMLDBMultiplierTest.java | 97 +++++++++++++++++--
 .../src/test/resources/test-mappingfile.xml   | 17 ++++
 4 files changed, 127 insertions(+), 18 deletions(-)
 create mode 100644 jcore-xml-db-reader/src/test/resources/test-mappingfile.xml

diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
index 195e30de7..17040c15e 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
@@ -9,6 +9,7 @@
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.descriptor.OperationalProperties;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
@@ -34,6 +35,7 @@
         "populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.",
         vendor = "JULIE Lab Jena, Germany", copyright = "JULIE Lab Jena, Germany")
 @OperationalProperties(outputsNewCases = true)
+@TypeCapability(inputs = {"de.julielab.jcore.types.casmultiplier.RowBatch"})
 public abstract class DBMultiplier extends JCasMultiplier_ImplBase {
 
     protected DataBaseConnector dbc;
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index d310158cf..d6c1f7186 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -17,6 +17,7 @@
 import org.apache.uima.cas.FeatureStructure;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.StringArray;
@@ -37,11 +38,13 @@
         "CAS with them via the 'RowMapping' parameter. This component is part of the Jena Document Information System, " +
         "JeDIS."
         , vendor = "JULIE Lab Jena, Germany", copyright = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {"de.julielab.jcore.types.casmultiplier.RowBatch"}, outputs = {"de.julielab.jcore.types.casflow.ToVisit"})
 public class XMLDBMultiplier extends DBMultiplier {
     public static final String PARAM_ROW_MAPPING = Initializer.PARAM_ROW_MAPPING;
     public static final String PARAM_MAPPING_FILE = Initializer.PARAM_MAPPING_FILE;
     public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
     public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
+    public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
     public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
 
     private final static Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
@@ -59,6 +62,8 @@ public class XMLDBMultiplier extends DBMultiplier {
     @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
             "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
     private String xmiStorageDataTable;
+    @ConfigurationParameter(name= PARAM_TABLE_DOCUMENT_SCHEMA, mandatory = false, description = "For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the "+PARAM_TABLE_DOCUMENT+" parameter - adheres to. Only the primary key part is required for hash value retrieval.")
+    private String xmiStorageDataTableSchema;
     @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.")
     private String[] toVisitKeys;
 
@@ -75,6 +80,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         mappingFileStr = (String) aContext.getConfigParameterValue(PARAM_MAPPING_FILE);
         rowMappingArray = (String[]) aContext.getConfigParameterValue(PARAM_ROW_MAPPING);
         xmiStorageDataTable = (String) aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT);
+        xmiStorageDataTableSchema = (String) aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT_SCHEMA);
         documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
         toVisitKeys = (String[]) aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
         // We don't know yet which tables to read. Thus, we leave the row mapping out.
@@ -82,6 +88,12 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         Initializer initializer = new Initializer(mappingFileStr, null, null);
         xmlMapper = initializer.getXmlMapper();
         initialized = false;
+
+        if (!(xmiStorageDataTable == null && xmiStorageDataTableSchema == null && documentItemToHash == null) && !(xmiStorageDataTable != null && xmiStorageDataTableSchema != null && documentItemToHash != null)) {
+            String errorMsg = String.format("From the parameters '%s', '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA, PARAM_ADD_SHA_HASH);
+            log.error(errorMsg);
+            throw new ResourceInitializationException(new IllegalArgumentException(errorMsg));
+        }
     }
 
     @Override
@@ -162,7 +174,6 @@ private void populateCas(JCas jCas, byte[][] documentData) throws AnalysisEngine
     }
 
     protected List<Map<String, Object>> getAllRetrievedColumns() {
-        List<Map<String, Object>> fields = new ArrayList<Map<String, Object>>();
         Pair<Integer, List<Map<String, String>>> numColumnsAndFields = dbc.getNumColumnsAndFields(tables.length > 1, schemaNames);
         return numColumnsAndFields.getRight().stream().map(HashMap<String, Object>::new).collect(Collectors.toList());
     }
@@ -189,20 +200,20 @@ private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) th
             String sql = null;
             // Query the database for the document IDs in the current RowBatch and retrieve hashes.
             try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
-                FieldConfig activeTableFieldConfiguration = dbc.getActiveTableFieldConfiguration();
+                FieldConfig xmiTableSchema = dbc.getFieldConfiguration(xmiStorageDataTableSchema);
                 String idQuery = documentIds.stream()
-                        .map(key -> Arrays.stream(key).map(part -> "%s='" + part + '"').toArray(String[]::new))
-                        .map(activeTableFieldConfiguration::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
+                        .map(key -> Arrays.stream(key).map(part -> "%s='" + part + "'").toArray(String[]::new))
+                        .map(xmiTableSchema::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
                         .collect(Collectors.joining(" OR "));
-                sql = String.format("SELECT %s,%s FROM %s WHERE %s", activeTableFieldConfiguration.getPrimaryKeyString(), hashColumn, xmiStorageDataTable, idQuery);
+                sql = String.format("SELECT %s,%s FROM %s WHERE %s", xmiTableSchema.getPrimaryKeyString(), hashColumn, xmiStorageDataTable, idQuery);
                 ResultSet rs = conn.createStatement().executeQuery(sql);
                 while (rs.next()) {
                     StringBuilder pkSb = new StringBuilder();
-                    for (int i = 0; i < activeTableFieldConfiguration.getPrimaryKey().length; i++)
-                        pkSb.append(rs.getString(i)).append(',');
+                    for (int i = 0; i < xmiTableSchema.getPrimaryKey().length; i++)
+                        pkSb.append(rs.getString(i+1)).append(',');
                     // Remove training comma
-                    pkSb.deleteCharAt(pkSb.length());
-                    String hash = rs.getString(activeTableFieldConfiguration.getPrimaryKey().length);
+                    pkSb.deleteCharAt(pkSb.length()-1);
+                    String hash = rs.getString(xmiTableSchema.getPrimaryKey().length+1);
                     id2hash.put(pkSb.toString(), hash);
                 }
             } catch (SQLException e) {
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
index 910c682ba..75dc1659e 100644
--- a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -4,10 +4,23 @@
 import de.julielab.costosys.dbconnection.CoStoSysConnection;
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.db.test.DBTestUtils;
+import de.julielab.jcore.types.casflow.ToVisit;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.JCasIterator;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
@@ -18,13 +31,14 @@
 import java.nio.file.Path;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
 import java.util.List;
 
 import static org.assertj.core.api.Assertions.assertThat;
+
 public class XMLDBMultiplierTest {
 
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer("postgres:11.12");
-    private static String costosysConfig;
     private static final String SOURCE_XML_TABLE = "source_xml_table";
     private static final String TARGET_XMI_TABLE = "target_xmi_table";
     private static final String PMID_FIELD_NAME = "pmid";
@@ -34,6 +48,9 @@ public class XMLDBMultiplierTest {
     private static final String HASH_FIELD_NAME = "documentText_sha256";
     private static final String MAX_XMI_ID_FIELD_NAME = "max_xmi_id";
     private static final String SOFA_MAPPING_FIELD_NAME = "sofa_mapping";
+    private static final String SUBSET_TABLE = "test_subset";
+    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer("postgres:11.12");
+    private static String costosysConfig;
 
     @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
@@ -44,22 +61,24 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.setActiveTableSchema("medline_2016_nozip");
         costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2016_nozip", 1, postgres);
         new File(costosysConfig).deleteOnExit();
-        try(CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+        try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
             prepareSourceXMLTable(dbc, conn);
             prepareTargetXMITable(dbc, conn);
         }
+        dbc.defineSubset(SUBSET_TABLE, SOURCE_XML_TABLE, "Test subset");
         assertThat(dbc.getNumRows(SOURCE_XML_TABLE)).isEqualTo(10);
         assertThat(dbc.getNumRows(TARGET_XMI_TABLE)).isEqualTo(5);
+
         dbc.close();
     }
 
     private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
-        String xmlFmt = "<xml><docid>%d</docid></xml>";
+        String xmlFmt = "<xml><docid>%d</docid><text>This is document text number %d</text></xml>";
         dbc.createTable(SOURCE_XML_TABLE, "Test table for hash comparison test.");
         String sql = String.format("INSERT INTO %s (%s,%s) VALUES (?,?)", SOURCE_XML_TABLE, PMID_FIELD_NAME, XML_FIELD_NAME);
         PreparedStatement ps = conn.prepareStatement(sql);
         for (int i = 0; i < 10; i++) {
-            String xml = String.format(xmlFmt, i);
+            String xml = String.format(xmlFmt, i, i);
             ps.setString(1, String.valueOf(i));
             ps.setString(2, xml);
             ps.addBatch();
@@ -69,7 +88,7 @@ private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnect
 
     private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
         // Note that the root is "xmi" and not "xml"
-        String xmlFmt = "<xmi><docid>%d</docid></xmi>";
+        String xmlFmt = "<xmi><docid>%d</docid><text>This is document text number %d</text></xmi>";
         dbc.createTable(TARGET_XMI_TABLE, "xmi_text", "Test table for hash comparison test.");
         dbc.assureColumnsExist(TARGET_XMI_TABLE, List.of(HASH_FIELD_NAME), "text");
         String sql = String.format("INSERT INTO %s (%s,%s,%s,%s,%s) VALUES (?,XMLPARSE(CONTENT ?),?,?,?)", TARGET_XMI_TABLE, DOCID_FIELD_NAME, BASE_DOCUMENT_FIELD_NAME, HASH_FIELD_NAME, MAX_XMI_ID_FIELD_NAME, SOFA_MAPPING_FIELD_NAME);
@@ -77,7 +96,7 @@ private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnect
         // Note that we only add half of the documents compared to the source XML import. This way we test
         // if the code behaves right when the target document does not yet exist at all.
         for (int i = 0; i < 5; i++) {
-            String xml = String.format(xmlFmt, i);
+            String xml = String.format(xmlFmt, i, i);
             ps.setString(1, String.valueOf(i));
             ps.setString(2, xml);
             ps.setString(3, getHash(xml));
@@ -99,9 +118,69 @@ private static String getHash(String str) {
     }
 
     @Test
-    public void testHashComparison() {
-
+    public void testMultiplier() throws Exception {
+        JCas jCas = prepareCas();
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(XMLDBMultiplier.class, XMLDBMultiplier.PARAM_MAPPING_FILE, Path.of("src", "test", "resources", "test-mappingfile.xml").toString());
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
+        List<String> documentTexts = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+            documentTexts.add(newCas.getDocumentText());
+            System.out.println(newCas.getDocumentText());
+            newCas.release();
+        }
+        assertThat(documentTexts).containsExactly("This is document text number 0", "This is document text number 1", "This is document text number 2", "This is document text number 3", "This is document text number 4", "This is document text number 5", "This is document text number 6", "This is document text number 7", "This is document text number 8", "This is document text number 9");
     }
 
+    /**
+     * Creates a JCas and adds a RowBatch for all 10 documents in the source XML table as well as the data table and subset table and schema names.
+     *
+     * @return A JCas prepared for the tests in this class.
+     * @throws UIMAException If some UIMA operation fails.
+     */
+    private JCas prepareCas() throws UIMAException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.jcore-casflow-types");
+        RowBatch rowBatch = new RowBatch(jCas);
+        StringArray dataTable = new StringArray(jCas, 1);
+        dataTable.set(0, SOURCE_XML_TABLE);
+        rowBatch.setTables(dataTable);
+        StringArray tableSchema = new StringArray(jCas, 1);
+        tableSchema.set(0, "medline_2016_nozip");
+        rowBatch.setTableSchemas(tableSchema);
+        rowBatch.setTableName(SUBSET_TABLE);
+        FSArray pks = new FSArray(jCas, 10);
+        // Read all documents
+        for (int i = 0; i < 10; i++) {
+            StringArray pk = new StringArray(jCas, 1);
+            pk.set(0, String.valueOf(i));
+            pks = JCoReTools.addToFSArray(pks, pk);
+        }
+        rowBatch.setIdentifiers(pks);
+        rowBatch.setCostosysConfiguration(costosysConfig);
+        rowBatch.addToIndexes();
+        return jCas;
+    }
 
+    @Test
+    public void testHashComparison() throws Exception {
+        JCas jCas = prepareCas();
+        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types","de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(XMLDBMultiplier.class,tsDesc,
+                XMLDBMultiplier.PARAM_MAPPING_FILE, Path.of("src", "test", "resources", "test-mappingfile.xml").toString(),
+                XMLDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text",
+                XMLDBMultiplier.PARAM_TO_VISIT_KEYS, "ThisIsTheVisitKey"
+                );
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
+        List<String> documentTexts = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+//            System.out.println(newCas.getTypeSystem());
+            Collection<ToVisit> select = JCasUtil.select(newCas, ToVisit.class);
+            System.out.println(select);
+            newCas.release();
+            break;
+        }
+    }
 }
diff --git a/jcore-xml-db-reader/src/test/resources/test-mappingfile.xml b/jcore-xml-db-reader/src/test/resources/test-mappingfile.xml
new file mode 100644
index 000000000..22af9d7cc
--- /dev/null
+++ b/jcore-xml-db-reader/src/test/resources/test-mappingfile.xml
@@ -0,0 +1,17 @@
+<mappings>
+    <documentText>
+        <partOfDocumentText id="0">
+            <xPath>/xml/text</xPath>
+        </partOfDocumentText>
+    </documentText>
+    <tsType>
+        <tsFullClassName>de.julielab.jcore.types.Header</tsFullClassName>
+        <tsFeature>
+            <tsFeatureName>
+                docId
+            </tsFeatureName>
+            <xPath>/xml/docid</xPath>
+            <tsFullClassName>java.lang.String</tsFullClassName>
+        </tsFeature>
+    </tsType>
+</mappings>
\ No newline at end of file

From be868d16274ab2b6112e27625056a422a9e8bfb0 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Jun 2021 07:16:58 +0200
Subject: [PATCH 060/269] Finished the tests for the XMLDBMultiplier testing
 whether the ToVisit annotation addition for hash comparison is working right.

---
 .../jcore/reader/xml/XMLDBMultiplier.java     |  4 +-
 .../jcore/reader/xml/XMLDBMultiplierTest.java | 56 +++++++++++++++----
 jedis-parent/pom.xml                          |  2 +-
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index d6c1f7186..8cd4ce9b4 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -89,8 +89,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         xmlMapper = initializer.getXmlMapper();
         initialized = false;
 
-        if (!(xmiStorageDataTable == null && xmiStorageDataTableSchema == null && documentItemToHash == null) && !(xmiStorageDataTable != null && xmiStorageDataTableSchema != null && documentItemToHash != null)) {
-            String errorMsg = String.format("From the parameters '%s', '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA, PARAM_ADD_SHA_HASH);
+        if (!(xmiStorageDataTable == null && xmiStorageDataTableSchema == null) && !(xmiStorageDataTable != null && xmiStorageDataTableSchema != null && documentItemToHash != null)) {
+            String errorMsg = String.format("From the parameters '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA);
             log.error(errorMsg);
             throw new ResourceInitializationException(new IllegalArgumentException(errorMsg));
         }
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
index 75dc1659e..a56950c00 100644
--- a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -62,6 +62,14 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2016_nozip", 1, postgres);
         new File(costosysConfig).deleteOnExit();
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+            // We create two tables. One is the XML table the multiplier reads from and maps the contents to the JCas.
+            // The other is a simulation of an XMI table used to serialize CAS instances via the jcore-xmi-db-writer.
+            // We need that target table to test the hash value comparison mechanism: If a document does not exist
+            // in the target table or has a non-matching hash on its document text, proceed as normal.
+            // But if the hash matches, we want to reserve the possibility to skip most part of the subsequent pipeline.
+            // For this, we could use the AnnnotationDefinedFlowController for jcore-flow-controllers. This controller
+            // looks for annotations of the ToVisit type that specify which exact components in an aggregate should
+            // be applied to the CAS carrying the ToVisit annotation.
             prepareSourceXMLTable(dbc, conn);
             prepareTargetXMITable(dbc, conn);
         }
@@ -88,7 +96,7 @@ private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnect
 
     private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
         // Note that the root is "xmi" and not "xml"
-        String xmlFmt = "<xmi><docid>%d</docid><text>This is document text number %d</text></xmi>";
+        String documentTextFmt = "This is document text number %d";
         dbc.createTable(TARGET_XMI_TABLE, "xmi_text", "Test table for hash comparison test.");
         dbc.assureColumnsExist(TARGET_XMI_TABLE, List.of(HASH_FIELD_NAME), "text");
         String sql = String.format("INSERT INTO %s (%s,%s,%s,%s,%s) VALUES (?,XMLPARSE(CONTENT ?),?,?,?)", TARGET_XMI_TABLE, DOCID_FIELD_NAME, BASE_DOCUMENT_FIELD_NAME, HASH_FIELD_NAME, MAX_XMI_ID_FIELD_NAME, SOFA_MAPPING_FIELD_NAME);
@@ -96,10 +104,14 @@ private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnect
         // Note that we only add half of the documents compared to the source XML import. This way we test
         // if the code behaves right when the target document does not yet exist at all.
         for (int i = 0; i < 5; i++) {
-            String xml = String.format(xmlFmt, i, i);
+            String xml = String.format(documentTextFmt, i, i);
             ps.setString(1, String.valueOf(i));
             ps.setString(2, xml);
-            ps.setString(3, getHash(xml));
+            // For one document in the "target XMI" table we put in a wrong hash. Thus, this document should not trigger
+            // the "toVisit" mechanism.
+            if (i != 3)
+                ps.setString(3, getHash(xml));
+            else ps.setString(3, "someanotherhash");
             ps.setInt(4, 0);
             ps.setString(5, "dummy");
             ps.addBatch();
@@ -164,23 +176,47 @@ private JCas prepareCas() throws UIMAException {
     @Test
     public void testHashComparison() throws Exception {
         JCas jCas = prepareCas();
-        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types","de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
-        AnalysisEngine engine = AnalysisEngineFactory.createEngine(XMLDBMultiplier.class,tsDesc,
+        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(XMLDBMultiplier.class, tsDesc,
                 XMLDBMultiplier.PARAM_MAPPING_FILE, Path.of("src", "test", "resources", "test-mappingfile.xml").toString(),
                 XMLDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
                 XMLDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
                 XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text",
                 XMLDBMultiplier.PARAM_TO_VISIT_KEYS, "ThisIsTheVisitKey"
-                );
+        );
         JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
-        List<String> documentTexts = new ArrayList<>();
+        List<String> toVisitKeys = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+            Collection<ToVisit> select = JCasUtil.select(newCas, ToVisit.class);
+            select.forEach(tv -> tv.getDelegateKeys().forEach(k -> toVisitKeys.add(k)));
+            newCas.release();
+        }
+        // There are 4 documents in the target table with the correct hash so we expect the delegate key 5 times
+        assertThat(toVisitKeys).containsExactly("ThisIsTheVisitKey", "ThisIsTheVisitKey", "ThisIsTheVisitKey", "ThisIsTheVisitKey");
+    }
+
+    @Test
+    public void testHashComparison2() throws Exception {
+        JCas jCas = prepareCas();
+        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
+        // In this test, we do not specify the keys to visit; the whole subsequent pipeline should be skipped.
+        // To indicate that, there should be ToVisit annotations but they should be null.
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(XMLDBMultiplier.class, tsDesc,
+                XMLDBMultiplier.PARAM_MAPPING_FILE, Path.of("src", "test", "resources", "test-mappingfile.xml").toString(),
+                XMLDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text"
+        );
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
+        List<ToVisit> emptyToVisitAnnotation = new ArrayList<>();
         while (jCasIterator.hasNext()) {
             JCas newCas = jCasIterator.next();
-//            System.out.println(newCas.getTypeSystem());
             Collection<ToVisit> select = JCasUtil.select(newCas, ToVisit.class);
-            System.out.println(select);
+            select.stream().filter(tv -> tv.getDelegateKeys() == null).forEach(emptyToVisitAnnotation::add);
             newCas.release();
-            break;
         }
+        // There are 4 documents in the target table with the correct hash so we expect the delegate key 5 times
+        assertThat(emptyToVisitAnnotation).hasSize(4);
     }
 }
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 48d783b6d..226e35c36 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -22,7 +22,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>jcore-db-test-utilities</artifactId>
-                <version>2.5.1-SNAPSHOT</version>
+                <version>2.5.1</version>
             </dependency>
             <dependency>
                 <groupId>de.julielab</groupId>

From c5e1de6ba14b9b1570fd4dc75c644864ad76a5a3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Jun 2021 10:19:34 +0200
Subject: [PATCH 061/269] Minor change.

---
 jcore-flow-controllers/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-flow-controllers/pom.xml b/jcore-flow-controllers/pom.xml
index d31933489..d17ecac74 100644
--- a/jcore-flow-controllers/pom.xml
+++ b/jcore-flow-controllers/pom.xml
@@ -44,7 +44,7 @@
         <dependency>
             <groupId>org.jetbrains</groupId>
             <artifactId>annotations</artifactId>
-            <version>RELEASE</version>
+            <version>21.0.1</version>
             <scope>compile</scope>
         </dependency>
         <dependency>

From 100a4120666d862bd412c211b4456b7d67792c59 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Jun 2021 07:07:32 +0200
Subject: [PATCH 062/269] Fixing a log of tests for jUnit 5 compatibility.

---
 jcore-ace-reader/pom.xml                      |   4 +-
 .../jcore/reader/ace/AceReaderTest.java       | 163 ++++++++--------
 .../jcore/reader/ace/data/out/PLACEHOLDER     |   0
 jcore-acronym-ae/pom.xml                      |   4 +-
 .../main/AcronymAnnotatorTest.java            |   8 +-
 jcore-acronym-writer/pom.xml                  |   4 +-
 jcore-annotation-adder-ae/pom.xml             |   4 +-
 .../AnnotationAdderAnnotatorTest.java         |   2 +-
 jcore-banner-ae/pom.xml                       |   4 +-
 .../src/main/java/banner/annotation/BEAT.java |   2 +-
 .../jcore/ae/banner/BANNERAnnotatorTest.java  |   4 +-
 .../jcore/ae/banner/ModelTrainTest.java       |   4 +-
 .../dataset/JCoReEntityDatasetTest.java       |   6 +-
 jcore-bc2gmformat-writer/pom.xml              |   4 +-
 .../bc2gmformat/BC2GMFormatWriterTest.java    |   6 +-
 jcore-biolemmatizer-ae/pom.xml                |   6 +-
 .../ae/biolemmatizer/BioLemmatizerTest.java   |   6 +-
 jcore-bionlpformat-consumer/pom.xml           |   4 +-
 .../bionlpformat/main/SegmentConsumer.java    |   3 -
 .../bionlpformat/main/EventConsumerTest.java  |  14 +-
 .../utils/DocumentWriterTest.java             |   6 +-
 .../bionlpformat/utils/EntityWriterTest.java  |  10 +-
 .../utils/EventTriggerWriterTest.java         |  10 +-
 .../bionlpformat/utils/EventWriterTest.java   |   6 +-
 .../bionlpformat/utils/ProteinWriterTest.java |  10 +-
 jcore-bionlpformat-reader/pom.xml             |   4 +-
 .../utils/AnnotationFileMapper.java           |   2 +-
 .../main/CoreferenceReadingTest.java          |   6 +-
 .../bionlp09event/main/EventReaderTest.java   |  14 +-
 .../utils/AbstractFileMapperTest.java         |  10 +-
 .../utils/AnnotationFileMapperTest.java       |  10 +-
 .../utils/OntoFormatReaderTest.java           |   2 +-
 jcore-biosem-ae/pom.xml                       |   4 +-
 .../jcore/ae/biosem/BioSemEventAnnotator.java |   2 +-
 .../ae/biosem/BioSemEventAnnotatorTest.java   |   8 +-
 jcore-conll-consumer/pom.xml                  |   4 +-
 .../cas2conll/test/ConllConsumerTest.java     |   4 +-
 jcore-coordination-baseline-ae/pom.xml        |   4 +-
 .../main/ConjunctAnnotatorTest.java           |  24 +--
 .../main/CoordinationAnnotatorTest.java       |  16 +-
 .../coordbaseline/main/EEEAnnotatorTest.java  |  17 +-
 .../main/EllipsisAnnotatorTest.java           |  17 +-
 jcore-cord19-reader/pom.xml                   |   4 +-
 .../cord19/Cord19MultiplierReaderTest.java    |   2 +-
 .../jcore/reader/cord19/JsonFormatTest.java   |   2 +-
 jcore-coreference-writer/pom.xml              |   4 +-
 .../coreference/CoreferenceWriter.java        |   6 -
 jcore-cpe-db-runner/pom.xml                   |   4 +-
 jcore-ct-reader/pom.xml                       |   4 +-
 .../reader/ct/ClinicalTrialsReaderTest.java   |   4 +-
 .../reader/db/DBMultiplierReaderTest.java     |   8 +-
 .../jcore/reader/db/DBMultiplierTest.java     |   8 +-
 .../jcore/reader/db/DBReaderTest.java         |   8 +-
 jcore-descriptor-creator/pom.xml              |   4 +-
 .../jcore/misc/DescriptorCreatorTest.java     |  21 +-
 .../jcore/reader/testreader/TestReader.java   |   4 +-
 jcore-dta-reader/pom.xml                      |   6 +-
 .../jcore/reader/dta/DTAFileReaderTest.java   |   4 +-
 .../jcore/reader/dta/util/DTAUtilsTest.java   |   6 +-
 jcore-ec-code-ae/pom.xml                      |   4 +-
 .../jcore/ae/ec/ECNumberAnnotatorTest.java    |   4 +-
 jcore-elasticsearch-consumer/pom.xml          |   8 +
 .../es/AbstractFieldGeneratorTest.java        |   2 +-
 .../consumer/es/ElasticSearchConsumerIT.java  |  11 +-
 jcore-embedding-writer/pom.xml                |   8 +-
 .../jcore/consumer/ew/DecoderTest.java        |   2 +-
 .../consumer/ew/EmbeddingWriterTest.java      |   2 +-
 jcore-event-flattener-ae/pom.xml              | 102 +++++-----
 .../julielab/jules/ae/EventFlattenerTest.java | 182 +++++++++---------
 jcore-feature-value-replacement-ae/pom.xml    |   4 +-
 .../FeatureValueReplacementAnnotatorTest.java |   4 +-
 jcore-file-reader/pom.xml                     |   4 +-
 .../reader/file/main/FileReaderTest.java      |  10 +-
 .../jcore/ae/flairner/NerTaggingResponse.java |   1 -
 .../jcore/ae/flairner/PythonConnector.java    |   1 -
 .../src/test/resources/1681975.xmi            |   6 +-
 jcore-flair-token-embedding-ae/pom.xml        |   4 +-
 .../jcore/ae/fte/python/getEmbeddingScript.py |  14 +-
 .../jcore/ae/fte/EmbeddingScriptTest.java     |  22 +--
 .../fte/FlairTokenEmbeddingAnnotatorTest.java |  11 +-
 jcore-flow-controllers/pom.xml                |   1 -
 jcore-iexml-consumer/pom.xml                  |   4 +-
 jcore-iexml-reader/pom.xml                    |   4 +-
 jcore-ign-reader/pom.xml                      |   4 +-
 .../jcore/reader/ign/IGNReaderTest.java       |   4 +-
 jcore-iob-consumer/pom.xml                    |   4 +-
 .../consumer/cas2iob/utils/UIMAUtils.java     |   1 +
 .../cas2iob/main/ToIOBConsumerTest.java       |   2 +-
 jcore-jnet-ae/pom.xml                         |   4 +-
 .../ae/jnet/cli/JNETApplicationTest.java      |   4 +-
 .../jcore/ae/jnet/tagger/NETaggerTest.java    |   2 +-
 .../jcore/ae/jnet/uima/MiniTestapp.java       |   8 +-
 jcore-jpos-ae/pom.xml                         |   4 +-
 .../ae/jpos/postagger/POSAnnotatorTest.java   |   4 +-
 jcore-jsbd-ae/pom.xml                         |   9 +-
 .../jcore/ae/jsbd/Abstract2UnitPipeTest.java  |   6 +-
 .../jcore/ae/jsbd/SentenceSplitterTest.java   |   6 +-
 .../ae/jsbd/main/SentenceAnnotatorTest.java   |   6 +-
 jcore-jtbd-ae/pom.xml                         |   4 +-
 .../julielab/jcore/ae/jtbd/TokenizerTest.java |   4 +-
 .../ae/jtbd/main/TokenAnnotatorTest.java      |   2 +-
 .../pom.xml                                   |   4 +-
 .../EntityEvaluatorConsumerTest.java          |   6 +-
 jcore-likelihood-assignment-ae/pom.xml        |   4 +-
 .../LikelihoodAssignmentAnnotatorTest.java    |   4 +-
 jcore-likelihood-detection-ae/pom.xml         |   4 +-
 .../LikelihoodDetectionAnnotatorTest.java     |   4 +-
 jcore-line-multiplier/pom.xml                 |   4 +-
 .../multiplier/line/LineMultiplierTest.java   |   4 +-
 .../multiplier/line/LineMultiplierTest.class  | Bin 3011 -> 3039 bytes
 jcore-lingpipe-porterstemmer-ae/pom.xml       |   4 +-
 .../LingpipePorterstemmerAnnotatorTest.java   |   6 +-
 jcore-lingpipegazetteer-ae/pom.xml            |   5 +-
 .../StringNormalizerForChunkingTest.java      |  92 ++++-----
 .../desc/ConfigurableDescriptorTest.java      |   4 +-
 .../uima/GazetteerAnnotatorTest.java          |  64 +++---
 .../uima/OverlappingChunkTest.java            |   4 +-
 jcore-lingscope-ae/pom.xml                    |   4 +-
 .../ae/lingscope/LingscopePosAnnotator.java   |   2 +-
 .../julielab/LingscopePosAnnotatorTest.java   |   2 +-
 jcore-linnaeus-species-ae/pom.xml             |   4 +-
 .../ae/linnaeus/LinnaeusMatcherProvider.java  |   2 -
 .../LinnaeusSpeciesAnnotatorTest.java         |   7 +-
 jcore-medxn-ae/pom.xml                        |   4 +-
 .../jcore/ae/medxn/MedAttrAnnotatorTest.java  |  27 +--
 jcore-msdoc-reader/pom.xml                    |   4 +-
 .../reader/msdoc/main/MSdocReaderTest.java    |  14 +-
 jcore-mstparser-ae/pom.xml                    |   4 +-
 .../ae/mstparser/main/MSTParserTest.java      |  20 +-
 jcore-muc7-reader/pom.xml                     |   4 +-
 jcore-muc7-reader/scripts/muc7_SGML2XML.py    |   2 -
 .../jcore/reader/muc7/MUC7ReaderTest.java     |  36 ++--
 jcore-mutationfinder-ae/pom.xml               |  13 +-
 .../mutationfinder/MutationAnnotatorTest.java |  11 +-
 .../nlp/ei/mutation/MutationFinderTest.java   |  37 +++-
 .../ccp/nlp/ei/mutation/MutationTest.java     |   9 +-
 .../nlp/ei/mutation/PointMutationTest.java    |  23 ++-
 jcore-neo4j-relations-consumer/pom.xml        |   5 +
 ...Neo4jRelationsConsumerIntegrationTest.java |   2 +-
 .../Neo4jRelationsConsumerTest.java           |   2 +-
 jcore-opennlp-chunk-ae/pom.xml                |   4 +-
 .../ae/opennlp/chunk/ChunkAnnotatorTest.java  |  15 +-
 jcore-opennlp-parser-ae/pom.xml               |   4 +-
 .../main/ParseAnnotatorTest.java              |  14 +-
 jcore-opennlp-postag-ae/pom.xml               |   4 +-
 .../opennlp/postag/PosTagAnnotatorTest.java   |   6 +-
 .../opennlp/postag/PosTagDictCreatorTest.java |   6 +-
 jcore-opennlp-sentence-ae/pom.xml             |   4 +-
 .../ae/jsentsplit/SentenceAnnotatorTest.java  |  11 +-
 jcore-opennlp-token-ae/pom.xml                |   4 +-
 .../ae/opennlp/token/TokenAnnotatorTest.java  |   5 +-
 jcore-pmc-reader/pom.xml                      |   4 +-
 .../jcore/reader/pmc/NXMLURIIteratorTest.java |   4 +-
 .../jcore/reader/pmc/PMCMultiplierTest.java   |   2 +-
 .../jcore/reader/pmc/PMCReaderTest.java       |   8 +-
 .../pmc/parser/ContribGroupParserTest.java    |   6 +-
 .../reader/pmc/parser/ContribParserTest.java  |   4 +-
 .../reader/pmc/parser/FrontParserTest.java    |   4 +-
 .../pmc/parser/NxmlElementParserTest.java     |   4 +-
 .../reader/pmc/parser/SectionParserTest.java  |   4 +-
 .../reader/pmc/parser/XRefParserTest.java     |   6 +-
 jcore-ppd-writer/pom.xml                      |   4 +-
 .../jcore/consumer/ppd/PPDWriterTest.java     |   6 +-
 jcore-pubtator-reader/pom.xml                 |   4 +-
 .../reader/pubtator/PubtatorReaderTest.java   |   6 +-
 jcore-stanford-lemmatizer-ae/pom.xml          |   4 +-
 .../lemma/StanfordLemmatizerTest.java         |   7 +-
 jcore-topic-indexing-ae/pom.xml               |   4 +-
 .../julielab/jcore/ae/TopicIndexingTest.java  |   6 +-
 jcore-topics-writer/pom.xml                   |   4 +-
 .../consumer/topics/TopicsWriterTest.java     |  10 +-
 jcore-txt-consumer/pom.xml                    |   4 +-
 .../txt/SentenceTokenConsumerTest.java        |   4 +-
 jcore-utilities/pom.xml                       |   4 +-
 .../utility/JCoReAnnotationToolsTest.java     |  36 ++--
 .../JCoReCondensedDocumentTextTest.java       |   4 +-
 .../utility/JCoReFSListIteratorTest.java      |   4 +-
 .../jcore/utility/JCoReFeaturePathTest.java   |  22 +--
 .../jcore/utility/JCoReToolsTest.java         |  22 +--
 .../jcore/utility/index/ComparatorsTest.java  |   6 +-
 .../index/JCoReCoverAnnotationIndexTest.java  |   6 +-
 .../index/JCoReMapAnnotationIndexTest.java    |   4 +-
 .../JCoReOverlapAnnotationIndexTest.java      |   6 +-
 .../index/JCoReSetAnnotationIndexTest.java    |   6 +-
 .../JCoReTreeMapAnnotationIndexTest.java      |   4 +-
 .../utility/index/TermGeneratorsTest.java     |   6 +-
 .../jcore/reader/xmi/CasPopulator.java        |   1 -
 .../XmiDBMultiplierDifferentNsSchemaTest.java |   8 +-
 .../jcore/reader/xmi/XmiDBMultiplierTest.java |   8 +-
 .../xmi/XmiDBReaderBinaryFormatTest.java      |  16 +-
 .../xmi/XmiDBReaderDifferentNsSchemaTest.java |  20 +-
 .../xmi/XmiDBReaderGzippedDataTest.java       |   8 +-
 .../XmiDBReaderMonolithicDocumentsTest.java   |   8 +-
 .../jcore/reader/xmi/XmiDBReaderTest.java     |  12 +-
 jcore-xmi-db-writer/pom.xml                   |   6 +
 .../xmi/XmiDBWriterBinaryFormatTest.java      |  20 +-
 .../XmiDBWriterMonolithicDocumentTest.java    |  22 +--
 .../jcore/consumer/xmi/XmiDBWriterTest.java   |  10 +-
 jcore-xmi-reader/pom.xml                      |   4 +-
 .../reader/xmi/XmiCollectionReaderTest.java   |   4 +-
 jcore-xmi-writer/pom.xml                      |   4 +-
 .../consumer/xmi/CasToXmiConsumerTest.java    |  12 +-
 jcore-xml-db-reader/pom.xml                   |   1 -
 jcore-xml-mapper/pom.xml                      |   4 +-
 .../xmlmapper/genericTypes/TypeTemplate.java  |   8 +-
 .../xmlmapper/mapper/DocumentTextHandler.java |  12 +-
 .../typeBuilder/StandardTypeBuilder.java      |   7 +-
 .../jcore/reader/xmlmapper/EncodingTest.java  |   4 +-
 .../jcore/reader/xmlmapper/XMLMapperTest.java |   4 +-
 .../jcore/reader/xmlmapper/XMLReaderTest.java | 110 +++++------
 jcore-xml-reader/pom.xml                      |   4 +-
 .../jcore/reader/XMLMultiplierReaderTest.java |  14 +-
 .../jcore/reader/XMLMultiplierTest.java       |   6 +-
 .../julielab/jcore/reader/XMLReaderTest.java  |  82 ++++----
 214 files changed, 1144 insertions(+), 1076 deletions(-)
 create mode 100644 jcore-ace-reader/src/test/resources/de/julielab/jcore/reader/ace/data/out/PLACEHOLDER

diff --git a/jcore-ace-reader/pom.xml b/jcore-ace-reader/pom.xml
index fdf961ad1..b983bbb5a 100644
--- a/jcore-ace-reader/pom.xml
+++ b/jcore-ace-reader/pom.xml
@@ -32,8 +32,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-ace-reader/src/test/java/de/julielab/jcore/reader/ace/AceReaderTest.java b/jcore-ace-reader/src/test/java/de/julielab/jcore/reader/ace/AceReaderTest.java
index 465a384f7..b6bd606e4 100644
--- a/jcore-ace-reader/src/test/java/de/julielab/jcore/reader/ace/AceReaderTest.java
+++ b/jcore-ace-reader/src/test/java/de/julielab/jcore/reader/ace/AceReaderTest.java
@@ -21,7 +21,6 @@
 import de.julielab.jcore.types.ArgumentMention;
 import de.julielab.jcore.types.EntityMention;
 import de.julielab.jcore.types.ace.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
 import org.apache.uima.cas.CAS;
@@ -38,6 +37,8 @@
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
 import org.apache.uima.util.XMLSerializer;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.w3c.dom.Node;
 import org.xml.sax.SAXException;
 
@@ -50,7 +51,9 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 
-public class AceReaderTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class AceReaderTest  {
 	/**
 	 * Path to the MedlineReader descriptor
 	 */
@@ -65,47 +68,46 @@ public class AceReaderTest extends TestCase {
 	/**
 	 * Object to be tested
 	 */
-	private CollectionReader aceReader;
+	private static CollectionReader aceReader;
 
 	/**
 	 * Auxiliary collection reader
 	 */
-	private CollectionReader testReader;
+	private static CollectionReader testReader;
 
 	/**
 	 * CAS array list with CAS objects that where processed by the aceReader
 	 */
-	private ArrayList<CAS> casArrayList = new ArrayList<CAS>();
+	private static ArrayList<CAS> casArrayList = new ArrayList<CAS>();
 
 	/**
 	 * Auxiliary CAS objects
 	 */
-	private CAS aceReaderCas;
+	private static CAS aceReaderCas;
 
-	private CAS testReaderCas;
+	private static CAS testReaderCas;
 
-	private JCas aceReaderJCas;
+	private static JCas aceReaderJCas;
 
-	private JCas testReaderJCas;
+	private static JCas testReaderJCas;
 
-	LOC entity1_1;
+	static LOC entity1_1;
 
-	LOC entity1_2;
+	static LOC entity1_2;
 
-	GPE entity2_1;
+	static GPE entity2_1;
 
-	GPE entity2_2;
+	static GPE entity2_2;
 
-	GPE entity2_3;
+	static GPE entity2_3;
 
-	GPE entity2_4;
+	static GPE entity2_4;
 
 	/*----------------------------------------------------------------------------------------------*/
-	@Override
-	protected void setUp() throws Exception {
+	@BeforeAll
+	protected static void setUp() throws Exception {
 		aceReader = getCollectionReader(ACE_READER_DESCRIPTOR);
 		processAllCases();
-		super.setUp();
 
 		System.out.println("ALL CASes were processed");
 	} // of setUp
@@ -118,7 +120,7 @@ protected void setUp() throws Exception {
 	 * @throws SAXException
 	 * @throws ParserConfigurationException
 	 */
-	private void processAllCases() throws CASException, SAXException, ParserConfigurationException {
+	private static void processAllCases() throws CASException, SAXException, ParserConfigurationException {
 
 		try {
 			while (aceReader.hasNext()) {
@@ -157,13 +159,13 @@ private void processAllCases() throws CASException, SAXException, ParserConfigur
 	} // of processAllCases
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void compareCASes() {
-		assertTrue("Invalid source file attributes!", checkSourceFile());
-		assertTrue("Invalid generated Jules Components!", checkGeneratedJulesComponents());
+	private static void compareCASes() {
+		assertTrue(checkSourceFile(), "Invalid source file attributes!");
+		assertTrue(checkGeneratedJulesComponents(), "Invalid generated Jules Components!");
 	} // compareCASes
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkGeneratedJulesComponents() {
+	private static boolean checkGeneratedJulesComponents() {
 		System.out.println("CALL checkGeneratedJulesComponents()");
 		boolean julesComponentsEqual = true;
 
@@ -185,7 +187,7 @@ private boolean checkGeneratedJulesComponents() {
 	} // checkGeneratedJulesComponents
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkJulesEntities() {
+	private static boolean checkJulesEntities() {
 		System.out.println("CALL checkJulesEntities()");
 		boolean julesEntityEqual = true;
 
@@ -237,7 +239,7 @@ private boolean checkJulesEntities() {
 	} // of checkJulesEntities
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkJulesRelations() {
+	private static boolean checkJulesRelations() {
 		System.out.println("CALL checkJulesRelations()");
 		boolean juleRelationEqual = true;
 
@@ -286,8 +288,8 @@ private boolean checkJulesRelations() {
 	} // of checkJulesRelations
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkJulesRelationArguments(de.julielab.jcore.types.RelationMention aceReaderRelation,
-			de.julielab.jcore.types.RelationMention testReaderRelation) {
+	private static boolean checkJulesRelationArguments(de.julielab.jcore.types.RelationMention aceReaderRelation,
+													   de.julielab.jcore.types.RelationMention testReaderRelation) {
 		System.out.println("CALL checkJulesRelationArguments()");
 		boolean julesRelationArgumentEqual = true;
 
@@ -449,7 +451,7 @@ private boolean checkJulesEventArguments(de.julielab.jcore.types.EventMention ac
 	} // of checkJulesEventArguments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkSourceFile() {
+	private static boolean checkSourceFile() {
 		boolean sourceFileEqual = true;
 
 		Iterator aceReaderIterator = getTypeIterator(aceReaderCas, de.julielab.jcore.types.ace.SourceFile.type);
@@ -499,7 +501,7 @@ private boolean checkSourceFile() {
 	} // checkSourceFile
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkDocument() {
+	private static boolean checkDocument() {
 		boolean documentEqual = true;
 
 		Iterator aceReaderIterator = getTypeIterator(aceReaderCas, de.julielab.jcore.types.ace.Document.type);
@@ -568,7 +570,7 @@ private boolean checkDocument() {
 	} // of checkDocument
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEvents(Document aceReaderDocument, Document testReaderDocument) {
+	private static boolean checkEvents(Document aceReaderDocument, Document testReaderDocument) {
 		System.out.println("CALL checkEvents()");
 		boolean eventEqual = true;
 
@@ -641,7 +643,7 @@ private boolean checkEvents(Document aceReaderDocument, Document testReaderDocum
 	} // of checkEvents
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEventMentions(Event aceReaderEvent, Event testReaderEvent) {
+	private static boolean checkEventMentions(Event aceReaderEvent, Event testReaderEvent) {
 		boolean eventMentionEqual = true;
 
 		FSArray aceReaderEventMentionFSArray = aceReaderEvent.getMentions();
@@ -703,7 +705,7 @@ private boolean checkEventMentions(Event aceReaderEvent, Event testReaderEvent)
 	} // checkEventMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEventMentionArguments(EventMention aceReaderEventMention, EventMention testReaderEventMention) {
+	private static boolean checkEventMentionArguments(EventMention aceReaderEventMention, EventMention testReaderEventMention) {
 		boolean eventMentionArgumentEqual = true;
 
 		FSArray aceReaderEventMentionArgumentFSArray = aceReaderEventMention.getArguments();
@@ -740,7 +742,7 @@ private boolean checkEventMentionArguments(EventMention aceReaderEventMention, E
 	} // of checkEventMentionArguments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEventArguments(Event aceReaderEvent, Event testReaderEvent) {
+	private static boolean checkEventArguments(Event aceReaderEvent, Event testReaderEvent) {
 		boolean eventArgumentEqual = true;
 
 		FSArray aceReaderEventArgumentFSArray = aceReaderEvent.getArguments();
@@ -767,7 +769,7 @@ private boolean checkEventArguments(Event aceReaderEvent, Event testReaderEvent)
 	} // of checkEventArguments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkRelations(Document aceReaderDocument, Document testReaderDocument) {
+	private static boolean checkRelations(Document aceReaderDocument, Document testReaderDocument) {
 		boolean relationEqual = true;
 
 		FSArray aceReaderRelationFSArray = aceReaderDocument.getRelations();
@@ -830,7 +832,7 @@ private boolean checkRelations(Document aceReaderDocument, Document testReaderDo
 	} // of checkRelations
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkRelationMentions(Relation aceReaderRelation, Relation testReaderRelation) {
+	private static boolean checkRelationMentions(Relation aceReaderRelation, Relation testReaderRelation) {
 		boolean relationMentionEqual = true;
 
 		FSArray aceReaderRelationMentionFSArray = aceReaderRelation.getMentions();
@@ -885,8 +887,8 @@ private boolean checkRelationMentions(Relation aceReaderRelation, Relation testR
 	} // checkRelationMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkRelationMentionArguments(RelationMention aceReaderRelationMention,
-			RelationMention testReaderRelationMention) {
+	private static boolean checkRelationMentionArguments(RelationMention aceReaderRelationMention,
+														 RelationMention testReaderRelationMention) {
 		boolean relationMentionArgumentEqual = true;
 
 		FSArray aceReaderRelationMentionArgumentFSArray = aceReaderRelationMention.getArguments();
@@ -925,7 +927,7 @@ private boolean checkRelationMentionArguments(RelationMention aceReaderRelationM
 	}
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkRelationArguments(Relation aceReaderRelation, Relation testReaderRelation) {
+	private static boolean checkRelationArguments(Relation aceReaderRelation, Relation testReaderRelation) {
 		boolean relationArgumentEqual = true;
 
 		FSArray aceReaderRelationArgumentFSArray = aceReaderRelation.getArguments();
@@ -952,7 +954,7 @@ private boolean checkRelationArguments(Relation aceReaderRelation, Relation test
 	} // checkRelationArguments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkTimex2(Document aceReaderDocument, Document testReaderDocument) {
+	private static boolean checkTimex2(Document aceReaderDocument, Document testReaderDocument) {
 		boolean timex2Equal = true;
 
 		FSArray aceReaderTimex2FSArray = aceReaderDocument.getTimex2();
@@ -985,7 +987,7 @@ private boolean checkTimex2(Document aceReaderDocument, Document testReaderDocum
 	} // checkTimex2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkTimex2Mentions(Timex2 aceReaderTimex2, Timex2 testReaderTimex2) {
+	private static boolean checkTimex2Mentions(Timex2 aceReaderTimex2, Timex2 testReaderTimex2) {
 		boolean timex2MentionEqual = true;
 
 		FSArray aceReaderTimex2MentionFSArray = aceReaderTimex2.getMentions();
@@ -1017,7 +1019,7 @@ private boolean checkTimex2Mentions(Timex2 aceReaderTimex2, Timex2 testReaderTim
 	} // of checkTimex2Mentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkValues(Document aceReaderDocument, Document testReaderDocument) {
+	private static boolean checkValues(Document aceReaderDocument, Document testReaderDocument) {
 		boolean valueEqual = true;
 
 		FSArray aceReaderValueFSArray = aceReaderDocument.getValues();
@@ -1060,7 +1062,7 @@ private boolean checkValues(Document aceReaderDocument, Document testReaderDocum
 	} // of checkValues
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkValueMentions(Value aceReaderValue, Value testReaderValue) {
+	private static boolean checkValueMentions(Value aceReaderValue, Value testReaderValue) {
 		boolean valueMentionEqual = true;
 
 		FSArray aceReaderValueMentionFSArray = aceReaderValue.getMentions();
@@ -1093,7 +1095,7 @@ private boolean checkValueMentions(Value aceReaderValue, Value testReaderValue)
 	} // of checkValueMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEntities() {
+	private static boolean checkEntities() {
 		boolean entityEqual = true;
 
 		Iterator aceReaderIterator = getTypeIterator(aceReaderCas, de.julielab.jcore.types.ace.Entity.type);
@@ -1176,7 +1178,7 @@ private boolean checkEntities() {
 	} // checkEntities
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEntityAttributes(Entity aceReaderEntity, Entity testReaderEntity) {
+	private static boolean checkEntityAttributes(Entity aceReaderEntity, Entity testReaderEntity) {
 		boolean entityAttributeEqual = true;
 		FSArray aceReaderEntityAttributeFSArray = aceReaderEntity.getEntity_attributes();
 		FSArray testReaderEntityAttributeFSArray = testReaderEntity.getEntity_attributes();
@@ -1208,8 +1210,8 @@ private boolean checkEntityAttributes(Entity aceReaderEntity, Entity testReaderE
 	} // of checkEntityAttributes
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEntityAttributesNames(EntityAttribute aceReaderEntityAttribute,
-			EntityAttribute testReaderEntityAttribute) {
+	private static boolean checkEntityAttributesNames(EntityAttribute aceReaderEntityAttribute,
+													  EntityAttribute testReaderEntityAttribute) {
 		boolean entityAttributesNamesEqual = true;
 		FSArray aceReaderEntityAttributesNamesFSArray = aceReaderEntityAttribute.getNames();
 		FSArray testReaderEntityAttributesNamesFSArray = testReaderEntityAttribute.getNames();
@@ -1241,7 +1243,7 @@ private boolean checkEntityAttributesNames(EntityAttribute aceReaderEntityAttrib
 	} // checkEntityAttributesNames
 
 	/*----------------------------------------------------------------------------------------------*/
-	private boolean checkEntityMentions(Entity aceReaderEntity, Entity testReaderEntity) {
+	private static boolean checkEntityMentions(Entity aceReaderEntity, Entity testReaderEntity) {
 		boolean entityMentionEqual = true;
 		FSArray aceReaderEntityMentionFSArray = aceReaderEntity.getEntity_mentions();
 		FSArray testReaderEntityMentionFSArray = testReaderEntity.getEntity_mentions();
@@ -1309,7 +1311,7 @@ private boolean checkEntityMentions(Entity aceReaderEntity, Entity testReaderEnt
 	} // of checkEntityMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildSourceFile(JCas jcas) throws SAXException, IOException, ParserConfigurationException {
+	private static void buildSourceFile(JCas jcas) throws SAXException, IOException, ParserConfigurationException {
 		de.julielab.jcore.types.ace.SourceFile sourceFile = new de.julielab.jcore.types.ace.SourceFile(jcas);
 
 		sourceFile.setUri("XIN_ENG_20030624.0085.sgm");
@@ -1329,14 +1331,14 @@ private void buildSourceFile(JCas jcas) throws SAXException, IOException, Parser
 	} // buildSourceFile
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void setDocumentText(CAS testReaderCas2, org.w3c.dom.Document sgmDomDocument) {
+	private static void setDocumentText(CAS testReaderCas2, org.w3c.dom.Document sgmDomDocument) {
 		Node documentNode = sgmDomDocument.getDocumentElement();
 		String documentText = documentNode.getTextContent();
 		testReaderCas2.setDocumentText(documentText);
 	} // of setDocumentText
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildDocument(JCas jcas, SourceFile sourceFile) {
+	private static void buildDocument(JCas jcas, SourceFile sourceFile) {
 		de.julielab.jcore.types.ace.Document document = new de.julielab.jcore.types.ace.Document(jcas);
 		document.setDocid("XIN_ENG_20030624.0085");
 		buildEntities(jcas, document);
@@ -1401,7 +1403,7 @@ private void buildJulesEventArgs(JCas jcas, Transaction event1) {
 	} // buildJulesEventArgs
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildJulesRelations(JCas jcas, Document document) {
+	private static void buildJulesRelations(JCas jcas, Document document) {
 		System.out.println("CALL buildJulesRelations()");
 		PART_WHOLE relation1_1 = new PART_WHOLE(jcas);
 		relation1_1.setBegin(543);
@@ -1490,7 +1492,7 @@ private void buildJulesRelations(JCas jcas, Document document) {
 	} // of buildJulesRelations
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildJulesEntities(JCas jcas, Document document) {
+	private static void buildJulesEntities(JCas jcas, Document document) {
 		System.out.println("CALL buildJulesEntities()");
 
 		entity1_1 = new LOC(jcas);
@@ -1562,7 +1564,7 @@ private void buildJulesEntities(JCas jcas, Document document) {
 	} // of buildJulesEntities
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEvents(JCas jcas, Document document) {
+	private static void buildEvents(JCas jcas, Document document) {
 		de.julielab.jcore.types.ace.Event event = new de.julielab.jcore.types.ace.Event(jcas);
 
 		event.setGenericity("Specific");
@@ -1583,7 +1585,7 @@ private void buildEvents(JCas jcas, Document document) {
 	} // of buildEvents
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEventMentions(JCas jcas, Event event) {
+	private static void buildEventMentions(JCas jcas, Event event) {
 		de.julielab.jcore.types.ace.EventMention eventMention = new de.julielab.jcore.types.ace.EventMention(jcas);
 		eventMention.setId("XIN_ENG_20030405.0080-EV2-1");
 		eventMention.setBegin(625);
@@ -1612,7 +1614,7 @@ private void buildEventMentions(JCas jcas, Event event) {
 	} // of buildEventMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEventMentionArguments(JCas jcas, EventMention eventMention) {
+	private static void buildEventMentionArguments(JCas jcas, EventMention eventMention) {
 		de.julielab.jcore.types.ace.EventMentionArgument eventMentionArgument1 = new de.julielab.jcore.types.ace.EventMentionArgument(
 				jcas);
 		eventMentionArgument1.setAce_role("Recipient");
@@ -1637,7 +1639,7 @@ private void buildEventMentionArguments(JCas jcas, EventMention eventMention) {
 	} // of buildEventMentionArguments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEventArguments(JCas jcas, Event event) {
+	private static void buildEventArguments(JCas jcas, Event event) {
 		de.julielab.jcore.types.ace.EventArgument eventArgument1 = new de.julielab.jcore.types.ace.EventArgument(jcas);
 		eventArgument1.setAce_role("Recipient");
 		eventArgument1.setRefid("XIN_ENG_20030405.0080-E1");
@@ -1656,7 +1658,7 @@ private void buildEventArguments(JCas jcas, Event event) {
 	} // of buildEventArguments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelations(JCas jcas, Document document) {
+	private static void buildRelations(JCas jcas, Document document) {
 		de.julielab.jcore.types.ace.Relation relation1 = new de.julielab.jcore.types.ace.Relation(jcas);
 		relation1.setModality("Asserted");
 		relation1.setTense("Unspecified");
@@ -1685,7 +1687,7 @@ private void buildRelations(JCas jcas, Document document) {
 	} // of buildRelations
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationMentions2(JCas jcas, Relation relation2) {
+	private static void buildRelationMentions2(JCas jcas, Relation relation2) {
 		de.julielab.jcore.types.ace.RelationMention relationMention2_1 = new de.julielab.jcore.types.ace.RelationMention(
 				jcas);
 		relationMention2_1.setLexical_condition("Preposition");
@@ -1714,7 +1716,7 @@ private void buildRelationMentions2(JCas jcas, Relation relation2) {
 	} // of buildRelationMentions2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationMentionArgument2_2(JCas jcas, RelationMention relationMention2_2) {
+	private static void buildRelationMentionArgument2_2(JCas jcas, RelationMention relationMention2_2) {
 		de.julielab.jcore.types.ace.RelationMentionArgument argument1 = new de.julielab.jcore.types.ace.RelationMentionArgument(
 				jcas);
 		argument1.setAce_role("Arg-2");
@@ -1739,7 +1741,7 @@ private void buildRelationMentionArgument2_2(JCas jcas, RelationMention relation
 	} // of buildRelationMentionArgument2_2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationMentionArguments2_1(JCas jcas, RelationMention relationMention1) {
+	private static void buildRelationMentionArguments2_1(JCas jcas, RelationMention relationMention1) {
 		de.julielab.jcore.types.ace.RelationMentionArgument argument1 = new de.julielab.jcore.types.ace.RelationMentionArgument(
 				jcas);
 		argument1.setAce_role("Arg-2");
@@ -1764,7 +1766,7 @@ private void buildRelationMentionArguments2_1(JCas jcas, RelationMention relatio
 	} // of buildRelationMentionArguments2_1
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationArguments2(JCas jcas, Relation relation2) {
+	private static void buildRelationArguments2(JCas jcas, Relation relation2) {
 		de.julielab.jcore.types.ace.RelationArgument argument1 = new de.julielab.jcore.types.ace.RelationArgument(jcas);
 		argument1.setAce_role("Arg-2");
 		argument1.setRefid("XIN_ENG_20030624.0085-E1");
@@ -1782,7 +1784,7 @@ private void buildRelationArguments2(JCas jcas, Relation relation2) {
 	} // of buildRelationArguments2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationMentions1(JCas jcas, Relation relation) {
+	private static void buildRelationMentions1(JCas jcas, Relation relation) {
 		de.julielab.jcore.types.ace.RelationMention relationMention1 = new de.julielab.jcore.types.ace.RelationMention(
 				jcas);
 		relationMention1.setLexical_condition("Preposition");
@@ -1811,7 +1813,7 @@ private void buildRelationMentions1(JCas jcas, Relation relation) {
 	} // buildRelationMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationMentionArguments1_2(JCas jcas, RelationMention relationMention2) {
+	private static void buildRelationMentionArguments1_2(JCas jcas, RelationMention relationMention2) {
 		de.julielab.jcore.types.ace.RelationMentionArgument argument1 = new de.julielab.jcore.types.ace.RelationMentionArgument(
 				jcas);
 		argument1.setAce_role("Arg-1");
@@ -1836,7 +1838,7 @@ private void buildRelationMentionArguments1_2(JCas jcas, RelationMention relatio
 	} // buildRelationMentionArguments2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationMentionArguments1_1(JCas jcas, RelationMention relationMention1) {
+	private static void buildRelationMentionArguments1_1(JCas jcas, RelationMention relationMention1) {
 		de.julielab.jcore.types.ace.RelationMentionArgument argument1 = new de.julielab.jcore.types.ace.RelationMentionArgument(
 				jcas);
 		argument1.setAce_role("Arg-1");
@@ -1861,7 +1863,7 @@ private void buildRelationMentionArguments1_1(JCas jcas, RelationMention relatio
 	} // buildRelationMentionArguments1
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildRelationAgruments1(JCas jcas, Relation relation) {
+	private static void buildRelationAgruments1(JCas jcas, Relation relation) {
 		de.julielab.jcore.types.ace.RelationArgument argument1 = new de.julielab.jcore.types.ace.RelationArgument(jcas);
 		argument1.setAce_role("Arg-1");
 		argument1.setRefid("XIN_ENG_20030624.0085-E1");
@@ -1880,7 +1882,7 @@ private void buildRelationAgruments1(JCas jcas, Relation relation) {
 	} // buildRelationAgruments
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildTimex2(JCas jcas, Document document) {
+	private static void buildTimex2(JCas jcas, Document document) {
 		de.julielab.jcore.types.ace.Timex2 timex2_1 = new de.julielab.jcore.types.ace.Timex2(jcas);
 		timex2_1.setId("XIN_ENG_20030624.0085-T4");
 		buildTimex2Mentions1(jcas, timex2_1);
@@ -1897,7 +1899,7 @@ private void buildTimex2(JCas jcas, Document document) {
 	} // buildTimex2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildTimex2Mentions2(JCas jcas, Timex2 timex2_2) {
+	private static void buildTimex2Mentions2(JCas jcas, Timex2 timex2_2) {
 		de.julielab.jcore.types.ace.Timex2Mention timex2Mention = new de.julielab.jcore.types.ace.Timex2Mention(jcas);
 		timex2Mention.setId("XIN_ENG_20030624.0085-T8-1");
 		timex2Mention.setBegin(1327);
@@ -1911,7 +1913,7 @@ private void buildTimex2Mentions2(JCas jcas, Timex2 timex2_2) {
 	} // buildTimex2Mentions2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildTimex2Mentions1(JCas jcas, Timex2 timex2_1) {
+	private static void buildTimex2Mentions1(JCas jcas, Timex2 timex2_1) {
 		de.julielab.jcore.types.ace.Timex2Mention timex2Mention = new de.julielab.jcore.types.ace.Timex2Mention(jcas);
 		timex2Mention.setId("XIN_ENG_20030624.0085-T4-1");
 		timex2Mention.setBegin(327);
@@ -1925,7 +1927,7 @@ private void buildTimex2Mentions1(JCas jcas, Timex2 timex2_1) {
 	} // buildTimex2Mentions1
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildValues(JCas jcas, Document document) {
+	private static void buildValues(JCas jcas, Document document) {
 		de.julielab.jcore.types.ace.Value value1 = new de.julielab.jcore.types.ace.Value(jcas);
 		value1.setAce_type("Numeric");
 		value1.setAce_subtype("Money");
@@ -1948,7 +1950,7 @@ private void buildValues(JCas jcas, Document document) {
 	} // buildValues
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildValueMentuions2(JCas jcas, Value value2) {
+	private static void buildValueMentuions2(JCas jcas, Value value2) {
 		de.julielab.jcore.types.ace.ValueMention valueMention = new de.julielab.jcore.types.ace.ValueMention(jcas);
 		valueMention.setId("XIN_ENG_20030624.0085-V3-1");
 		valueMention.setBegin(1079);
@@ -1962,7 +1964,7 @@ private void buildValueMentuions2(JCas jcas, Value value2) {
 	} // buildValueMentuions2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildValueMentions1(JCas jcas, Value value1) {
+	private static void buildValueMentions1(JCas jcas, Value value1) {
 		de.julielab.jcore.types.ace.ValueMention valueMention = new de.julielab.jcore.types.ace.ValueMention(jcas);
 		valueMention.setId("XIN_ENG_20030624.0085-V2-1");
 		valueMention.setBegin(826);
@@ -1976,7 +1978,7 @@ private void buildValueMentions1(JCas jcas, Value value1) {
 	} // buildValueMentions1
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEntities(JCas jcas, de.julielab.jcore.types.ace.Document document) {
+	private static void buildEntities(JCas jcas, de.julielab.jcore.types.ace.Document document) {
 		Entity entity1 = new Entity(jcas);
 		entity1.setAce_class("USP");
 		entity1.setAce_type("LOC");
@@ -2003,14 +2005,14 @@ private void buildEntities(JCas jcas, de.julielab.jcore.types.ace.Document docum
 	} // of buildEntities
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEntityAttributes1(JCas jcas, Entity entity1) {
+	private static void buildEntityAttributes1(JCas jcas, Entity entity1) {
 		FSArray entityAttributeFSArray = new FSArray(jcas, 0);
 		entityAttributeFSArray.addToIndexes();
 		entity1.setEntity_attributes(entityAttributeFSArray);
 	} // buildEntityAttributes1
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEntityAttributes2(JCas jcas, Entity entity2) {
+	private static void buildEntityAttributes2(JCas jcas, Entity entity2) {
 		de.julielab.jcore.types.ace.EntityAttribute entityAttribute = new de.julielab.jcore.types.ace.EntityAttribute(
 				jcas);
 
@@ -2024,7 +2026,7 @@ private void buildEntityAttributes2(JCas jcas, Entity entity2) {
 	} // ofbuildEntityAttributes2
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEntityAttributeNames(JCas jcas, de.julielab.jcore.types.ace.EntityAttribute entityAttribute) {
+	private static void buildEntityAttributeNames(JCas jcas, de.julielab.jcore.types.ace.EntityAttribute entityAttribute) {
 		FSArray nameFSArray = new FSArray(jcas, 4);
 
 		de.julielab.jcore.types.ace.Name entityAttributeName1 = new de.julielab.jcore.types.ace.Name(jcas);
@@ -2060,7 +2062,7 @@ private void buildEntityAttributeNames(JCas jcas, de.julielab.jcore.types.ace.En
 	} // buildEntityAttributeNames
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEntityMentions1(JCas jcas, Entity entity) {
+	private static void buildEntityMentions1(JCas jcas, Entity entity) {
 		de.julielab.jcore.types.ace.EntityMention entityMention1 = new de.julielab.jcore.types.ace.EntityMention(jcas);
 		entityMention1.setMention_ldctype("PTV");
 		entityMention1.setMention_type("PRO");
@@ -2101,7 +2103,7 @@ private void buildEntityMentions1(JCas jcas, Entity entity) {
 	} // of buildEntityMentions
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void buildEntityMentions2(JCas jcas, Entity entity2) {
+	private static void buildEntityMentions2(JCas jcas, Entity entity2) {
 		de.julielab.jcore.types.ace.EntityMention entityMention1 = new de.julielab.jcore.types.ace.EntityMention(jcas);
 		entityMention1.setLdcatr("FALSE");
 		entityMention1.setAce_role("LOC");
@@ -2180,6 +2182,7 @@ private void buildEntityMentions2(JCas jcas, Entity entity2) {
 	/**
 	 * Test if method getNextCas() has done its job
 	 */
+	@Test
 	public void testGetNextCas() {
 		System.out.println("CALL testGetNextCas");
 		checkDocumentText();
@@ -2195,7 +2198,7 @@ public void checkDocumentText() {
 
 		for (int i = 0; i < casArrayList.size(); i++) {
 			String text = casArrayList.get(i).getDocumentText();
-			assertTrue(((text == null) ? "null" : text), (text != null) && (!text.equals("")));
+			assertTrue((text != null) && (!text.equals("")), ((text == null) ? "null" : text));
 		} // of for
 	} // of checkDocumentText
 
@@ -2209,7 +2212,7 @@ public void checkDocumentText() {
 	 *            the type
 	 * @return the iterator
 	 */
-	private Iterator getTypeIterator(CAS cas, int type) {
+	private static Iterator getTypeIterator(CAS cas, int type) {
 
 		Iterator iterator = null;
 		try {
@@ -2221,7 +2224,7 @@ private Iterator getTypeIterator(CAS cas, int type) {
 	} // getTypeIterator
 
 	/*----------------------------------------------------------------------------------------------*/
-	private void writeCasToXMI(CAS cas, int docs) throws CASException, IOException, SAXException {
+	private static void writeCasToXMI(CAS cas, int docs) throws CASException, IOException, SAXException {
 
 		JFSIndexRepository indexes = cas.getJCas().getJFSIndexRepository();
 		Iterator documentIter = indexes.getAnnotationIndex(Document.type).iterator();
diff --git a/jcore-ace-reader/src/test/resources/de/julielab/jcore/reader/ace/data/out/PLACEHOLDER b/jcore-ace-reader/src/test/resources/de/julielab/jcore/reader/ace/data/out/PLACEHOLDER
new file mode 100644
index 000000000..e69de29bb
diff --git a/jcore-acronym-ae/pom.xml b/jcore-acronym-ae/pom.xml
index dfd4fce45..b4fc4640d 100644
--- a/jcore-acronym-ae/pom.xml
+++ b/jcore-acronym-ae/pom.xml
@@ -38,8 +38,8 @@
             <version>${jcore-utilities-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-acronym-ae/src/test/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotatorTest.java b/jcore-acronym-ae/src/test/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotatorTest.java
index c2c74ba6e..3721ee562 100644
--- a/jcore-acronym-ae/src/test/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotatorTest.java
+++ b/jcore-acronym-ae/src/test/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotatorTest.java
@@ -18,7 +18,6 @@
 import de.julielab.jcore.types.Abbreviation;
 import de.julielab.jcore.types.AbbreviationLongform;
 import de.julielab.jcore.types.Sentence;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.cas.CAS;
@@ -35,7 +34,7 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -45,12 +44,14 @@
 import java.util.ArrayList;
 import java.util.Collection;
 
+import static org.junit.jupiter.api.Assertions.*;
+
 /**
  * The AcronymAnnotatorTest class
  * 
  * @author jwermter
  */
-public class AcronymAnnotatorTest extends TestCase {
+public class AcronymAnnotatorTest  {
 
 	private static final String DOCUMENT_TEXT = "[TAZ]Die Firma Kohl-kopf (FK-K) hat für die Straßenverkehrsordnung (StVO) "
 			+ "in der Bundesrepublik Deutschland(BRD)  einen hochintelligenten Manager für die Chefetage "
@@ -73,6 +74,7 @@ public class AcronymAnnotatorTest extends TestCase {
 
 	private static final String ALL_TYPES_NAME = "de.julielab.jcore.types.jcore-all-types";
 
+	@Test
 	public void testProcess() throws ResourceInitializationException, InvalidXMLException, IOException, CASException {
 
 		CAS cas = CasCreationUtils.createCas(
diff --git a/jcore-acronym-writer/pom.xml b/jcore-acronym-writer/pom.xml
index 035774709..7924aaae0 100644
--- a/jcore-acronym-writer/pom.xml
+++ b/jcore-acronym-writer/pom.xml
@@ -41,8 +41,8 @@
             <version>${jcore-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Acronym Writer</name>
diff --git a/jcore-annotation-adder-ae/pom.xml b/jcore-annotation-adder-ae/pom.xml
index a8f6ce3bd..dc4379f04 100644
--- a/jcore-annotation-adder-ae/pom.xml
+++ b/jcore-annotation-adder-ae/pom.xml
@@ -41,8 +41,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
index 65c0de306..a7f76f786 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
@@ -13,7 +13,7 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ExternalResourceDescription;
 import org.assertj.core.data.Offset;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.util.ArrayList;
diff --git a/jcore-banner-ae/pom.xml b/jcore-banner-ae/pom.xml
index 9e47d8857..d50f90b07 100644
--- a/jcore-banner-ae/pom.xml
+++ b/jcore-banner-ae/pom.xml
@@ -59,8 +59,8 @@
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <parent>
diff --git a/jcore-banner-ae/src/main/java/banner/annotation/BEAT.java b/jcore-banner-ae/src/main/java/banner/annotation/BEAT.java
index 113f6139f..38281692f 100644
--- a/jcore-banner-ae/src/main/java/banner/annotation/BEAT.java
+++ b/jcore-banner-ae/src/main/java/banner/annotation/BEAT.java
@@ -18,8 +18,8 @@
 import java.awt.*;
 import java.awt.event.*;
 import java.io.IOException;
-import java.util.*;
 import java.util.List;
+import java.util.*;
 
 public class BEAT extends JFrame implements ActionListener, CaretListener
 {
diff --git a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
index 12e9e2776..489ecd37d 100644
--- a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
+++ b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
@@ -18,14 +18,14 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class BANNERAnnotatorTest {
 	private final static Logger log = LoggerFactory.getLogger(BANNERAnnotatorTest.class);
diff --git a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/ModelTrainTest.java b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/ModelTrainTest.java
index 7604ae62f..9d5d4958c 100644
--- a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/ModelTrainTest.java
+++ b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/ModelTrainTest.java
@@ -12,11 +12,11 @@
 
 import banner.eval.BANNER;
 import org.apache.commons.configuration.XMLConfiguration;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class ModelTrainTest {
 	@Test
diff --git a/jcore-banner-ae/src/test/java/de/julielab/jcore/banner/dataset/JCoReEntityDatasetTest.java b/jcore-banner-ae/src/test/java/de/julielab/jcore/banner/dataset/JCoReEntityDatasetTest.java
index 35925ad84..843106130 100644
--- a/jcore-banner-ae/src/test/java/de/julielab/jcore/banner/dataset/JCoReEntityDatasetTest.java
+++ b/jcore-banner-ae/src/test/java/de/julielab/jcore/banner/dataset/JCoReEntityDatasetTest.java
@@ -11,14 +11,14 @@
 package de.julielab.jcore.banner.dataset;
 
 import banner.tokenization.SimpleTokenizer;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class JCoReEntityDatasetTest {
 	@Test
diff --git a/jcore-bc2gmformat-writer/pom.xml b/jcore-bc2gmformat-writer/pom.xml
index 8092a37ee..37c5a1de0 100644
--- a/jcore-bc2gmformat-writer/pom.xml
+++ b/jcore-bc2gmformat-writer/pom.xml
@@ -28,8 +28,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-bc2gmformat-writer/src/test/java/de/julielab/jcore/consumer/bc2gmformat/BC2GMFormatWriterTest.java b/jcore-bc2gmformat-writer/src/test/java/de/julielab/jcore/consumer/bc2gmformat/BC2GMFormatWriterTest.java
index 41faec637..3752d67b5 100644
--- a/jcore-bc2gmformat-writer/src/test/java/de/julielab/jcore/consumer/bc2gmformat/BC2GMFormatWriterTest.java
+++ b/jcore-bc2gmformat-writer/src/test/java/de/julielab/jcore/consumer/bc2gmformat/BC2GMFormatWriterTest.java
@@ -2,13 +2,13 @@
 package de.julielab.jcore.consumer.bc2gmformat;
 
 import org.apache.uima.fit.factory.UimaContextFactory;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.lang.reflect.Method;
 import java.util.TreeMap;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 
 /**
diff --git a/jcore-biolemmatizer-ae/pom.xml b/jcore-biolemmatizer-ae/pom.xml
index 241617304..b5e089a8f 100644
--- a/jcore-biolemmatizer-ae/pom.xml
+++ b/jcore-biolemmatizer-ae/pom.xml
@@ -31,7 +31,11 @@
 			<artifactId>biolemmatizer-core</artifactId>
 			<version>1.2</version>
 		</dependency>
-	<dependency><groupId>junit</groupId><artifactId>junit</artifactId></dependency></dependencies>
+	<dependency>
+		<groupId>org.junit.jupiter</groupId>
+		<artifactId>junit-jupiter-engine</artifactId>
+	</dependency>
+	</dependencies>
 	<name>JCoRe BioLemmatizer</name>
 	<organization>
 		<name>JULIE Lab Jena, Germany</name>
diff --git a/jcore-biolemmatizer-ae/src/test/java/de/julielab/jcore/ae/biolemmatizer/BioLemmatizerTest.java b/jcore-biolemmatizer-ae/src/test/java/de/julielab/jcore/ae/biolemmatizer/BioLemmatizerTest.java
index ada58be07..241aadaee 100644
--- a/jcore-biolemmatizer-ae/src/test/java/de/julielab/jcore/ae/biolemmatizer/BioLemmatizerTest.java
+++ b/jcore-biolemmatizer-ae/src/test/java/de/julielab/jcore/ae/biolemmatizer/BioLemmatizerTest.java
@@ -10,10 +10,10 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 /**
  * Unit tests for jcore-de.julielab.jcore.ae.biolemmatizer-ae.
  * @author 
diff --git a/jcore-bionlpformat-consumer/pom.xml b/jcore-bionlpformat-consumer/pom.xml
index d868129aa..a2cdae928 100644
--- a/jcore-bionlpformat-consumer/pom.xml
+++ b/jcore-bionlpformat-consumer/pom.xml
@@ -29,8 +29,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe BioNLP Format Consumer</name>
diff --git a/jcore-bionlpformat-consumer/src/main/java/de/julielab/jcore/consumer/bionlpformat/main/SegmentConsumer.java b/jcore-bionlpformat-consumer/src/main/java/de/julielab/jcore/consumer/bionlpformat/main/SegmentConsumer.java
index 287a79921..f09c3a48a 100644
--- a/jcore-bionlpformat-consumer/src/main/java/de/julielab/jcore/consumer/bionlpformat/main/SegmentConsumer.java
+++ b/jcore-bionlpformat-consumer/src/main/java/de/julielab/jcore/consumer/bionlpformat/main/SegmentConsumer.java
@@ -22,9 +22,6 @@
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.context.annotation.Configuration;
 
 import java.io.*;
 import java.util.Iterator;
diff --git a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/main/EventConsumerTest.java b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/main/EventConsumerTest.java
index 6668a969d..8a6659cfb 100644
--- a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/main/EventConsumerTest.java
+++ b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/main/EventConsumerTest.java
@@ -13,16 +13,16 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.*;
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class EventConsumerTest {
 	private static final String EVENT_E8 = "E8	Phosphorylation:T14 Theme:T17 Site:T13";
@@ -44,7 +44,7 @@ public class EventConsumerTest {
 	private AnalysisEngine consumer;
 	private FilenameFilter filter;
 
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception {
 		cas = JCasFactory.createJCas("src/test/resources/types/jcore-all-types");
 		consumer = AnalysisEngineFactory.createEngine(BioEventConsumer.class,
@@ -113,7 +113,7 @@ public boolean accept(File file, String name) {
 		};
 	}
 
-	@After
+	@AfterEach
 	public void tearDown() {
 
 		File dataDirectory = new File(TARGET_DIRECTORY);
diff --git a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/DocumentWriterTest.java b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/DocumentWriterTest.java
index bdd89cc38..12e2baa53 100644
--- a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/DocumentWriterTest.java
+++ b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/DocumentWriterTest.java
@@ -10,8 +10,8 @@
 import de.julielab.jcore.types.Title;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.io.Writer;
@@ -28,7 +28,7 @@ public class DocumentWriterTest {
 	private DocumentWriter documentWriter;
 	private Writer writer;
 
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception{
 		cas = JCasFactory.createJCas("src/test/resources/types/jcore-all-types");
 		cas.setDocumentText(DOCUMENT_TITLE + "\n" + DOCUMENT_ABSTRACT);
diff --git a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EntityWriterTest.java b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EntityWriterTest.java
index d98cb4722..29cd9e064 100644
--- a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EntityWriterTest.java
+++ b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EntityWriterTest.java
@@ -9,15 +9,15 @@
 import de.julielab.jcore.types.EntityMention;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.io.Writer;
 
 import static org.easymock.classextension.EasyMock.*;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class EntityWriterTest {
 	private static final String ENTITY_T13 = "T13	Entity 322 330	tyrosine\n";
@@ -30,7 +30,7 @@ public class EntityWriterTest {
 	private Writer writer;
 	private EntityMention entityT13;
 
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception{
 		cas = JCasFactory.createJCas("src/test/resources/types/jcore-semantics-biology-types");
 		
diff --git a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventTriggerWriterTest.java b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventTriggerWriterTest.java
index 317dd0cef..2a04a48f1 100644
--- a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventTriggerWriterTest.java
+++ b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventTriggerWriterTest.java
@@ -9,15 +9,15 @@
 import de.julielab.jcore.types.EventTrigger;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.io.Writer;
 
 import static org.easymock.classextension.EasyMock.*;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class EventTriggerWriterTest {
 	private static final String TRIGGER_T1 = "T1	Negative_regulation 12 19	inhibit\n";
@@ -28,7 +28,7 @@ public class EventTriggerWriterTest {
 	private Writer writer;
 	private EventTrigger triggerT1;
 
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception{
 		cas = JCasFactory.createJCas("src/test/resources/types/jcore-semantics-biology-types");
 		
diff --git a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventWriterTest.java b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventWriterTest.java
index 5d8b717cf..58052dc0b 100644
--- a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventWriterTest.java
+++ b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/EventWriterTest.java
@@ -10,8 +10,8 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.FileWriter;
 import java.io.IOException;
@@ -36,7 +36,7 @@ public class EventWriterTest {
 	private Gene proteinT17;
 	private EntityMention entityT13;
 	
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception{
 		cas = JCasFactory.createJCas("src/test/resources/types/jcore-semantics-biology-types");
 		
diff --git a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/ProteinWriterTest.java b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/ProteinWriterTest.java
index 3871f07ff..2cdc5be50 100644
--- a/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/ProteinWriterTest.java
+++ b/jcore-bionlpformat-consumer/src/test/java/de/julielab/jcore/consumer/bionlpformat/utils/ProteinWriterTest.java
@@ -9,16 +9,16 @@
 import de.julielab.jcore.types.Gene;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.Writer;
 
 import static org.easymock.classextension.EasyMock.*;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class ProteinWriterTest {
 
@@ -35,7 +35,7 @@ public class ProteinWriterTest {
 	private static final String DOCUMENT_TEXT = "Interferons inhibit activation of STAT6 by interleukin 4 in human monocytes by inducing SOCS-1 gene expression.\n" + 
 												"Interferons (IFNs) inhibit induction by IL-4 of multiple genes in human monocytes. However, the mechanism by which IFNs mediate this inhibition has not been defined. IL-4 activates gene expression by inducing tyrosine phosphorylation, homodimerization, and nuclear translocation of the latent transcription factor, STAT6 (signal transducer and activator of transcription-6). STAT6-responsive elements are characteristically present in the promoters of IL-4-inducible genes. Because STAT6 activation is essential for IL-4-induced gene expression, we examined the ability of type I and type II IFNs to regulate activation of STAT6 by IL-4 in primary human monocytes. Pretreatment of monocytes with IFN-beta or IFN-gamma, but not IL-1, IL-2, macrophage colony-stimulating factor, granulocyte/macrophage colony-stimulating factor, IL-6, or transforming growth factor beta suppressed activation of STAT6 by IL-4. This inhibition was associated with decreased tyrosine phosphorylation and nuclear translocation of STAT6 and was not evident unless the cells were preincubated with IFN for at least 1 hr before IL-4 stimulation. Furthermore, inhibition by IFN could be blocked by cotreatment with actinomycin D and correlated temporally with induction of the JAK/STAT inhibitory gene, SOCS-1. Forced expression of SOCS-1 in a macrophage cell line, RAW264, markedly suppressed trans-activation of an IL-4-inducible reporter as well as IL-6- and IFN-gamma-induced reporter gene activity. These findings demonstrate that IFNs inhibit IL-4-induced activation of STAT6 and STAT6-dependent gene expression, at least in part, by inducing expression of SOCS-1.";
 
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception{
 		cas = JCasFactory.createJCas("src/test/resources/types/jcore-semantics-biology-types");
 		
diff --git a/jcore-bionlpformat-reader/pom.xml b/jcore-bionlpformat-reader/pom.xml
index 65fcefb66..94aa1584f 100644
--- a/jcore-bionlpformat-reader/pom.xml
+++ b/jcore-bionlpformat-reader/pom.xml
@@ -41,8 +41,8 @@
             <version>${jcore-utilities-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-bionlpformat-reader/src/main/java/de/julielab/jcore/reader/bionlpformat/utils/AnnotationFileMapper.java b/jcore-bionlpformat-reader/src/main/java/de/julielab/jcore/reader/bionlpformat/utils/AnnotationFileMapper.java
index 70efe8571..5a265d736 100644
--- a/jcore-bionlpformat-reader/src/main/java/de/julielab/jcore/reader/bionlpformat/utils/AnnotationFileMapper.java
+++ b/jcore-bionlpformat-reader/src/main/java/de/julielab/jcore/reader/bionlpformat/utils/AnnotationFileMapper.java
@@ -17,8 +17,8 @@
 
  import java.io.BufferedReader;
  import java.io.IOException;
- import java.util.*;
  import java.util.List;
+ import java.util.*;
  import java.util.regex.Matcher;
  import java.util.regex.Pattern;
 
diff --git a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/CoreferenceReadingTest.java b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/CoreferenceReadingTest.java
index ce2926f00..1b2a68ac9 100644
--- a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/CoreferenceReadingTest.java
+++ b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/CoreferenceReadingTest.java
@@ -16,8 +16,8 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
 import org.xml.sax.SAXException;
 
 import java.io.FileOutputStream;
@@ -25,7 +25,7 @@
 import java.io.OutputStream;
 
 // Ignore because the data path does generally not exist; a fix should only contain some test data, not the whole dataset
-@Ignore
+@Disabled
 public class CoreferenceReadingTest {
 	@Test
 	public void testCoreferenceReading() throws UIMAException, IOException,
diff --git a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/EventReaderTest.java b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/EventReaderTest.java
index 9c7aea226..68c64fc94 100644
--- a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/EventReaderTest.java
+++ b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/main/EventReaderTest.java
@@ -17,23 +17,23 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
 
 import java.util.Set;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 // This test's EventReaderTest.xml descriptor points to local directories of Ekaterina Buyko and as such, the test doesn't work this way. However it might, if the data is made available as proper test data.
-@Ignore
+@Disabled
 public class EventReaderTest {
 
 	private static final String DESCRIPTOR_FILE = "src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml";
 	private CollectionReader collectionReader;
 
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception {
 		CollectionReaderDescription readerDescription = (CollectionReaderDescription) UIMAFramework
 				.getXMLParser().parseCollectionReaderDescription(
diff --git a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AbstractFileMapperTest.java b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AbstractFileMapperTest.java
index 25685ec01..c95a9d148 100644
--- a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AbstractFileMapperTest.java
+++ b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AbstractFileMapperTest.java
@@ -18,15 +18,15 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.BufferedReader;
 
 import static org.easymock.EasyMock.expect;
 import static org.easymock.classextension.EasyMock.*;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class AbstractFileMapperTest {
 
@@ -34,7 +34,7 @@ public class AbstractFileMapperTest {
     private JCas cas;
     private TextFileMapper abstractFileMapper;
 
-    @Before
+    @BeforeEach
     public void setUp() throws Exception {
         CollectionReaderDescription readerDescription = UIMAFramework.getXMLParser()
                 .parseCollectionReaderDescription(new XMLInputSource(DESCRIPTOR_FILE));
diff --git a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AnnotationFileMapperTest.java b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AnnotationFileMapperTest.java
index 46bf09ee6..85b582ed0 100644
--- a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AnnotationFileMapperTest.java
+++ b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/AnnotationFileMapperTest.java
@@ -28,8 +28,8 @@
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.BufferedReader;
 import java.util.HashMap;
@@ -37,8 +37,8 @@
 
 import static org.easymock.EasyMock.expect;
 import static org.easymock.classextension.EasyMock.*;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 
@@ -68,7 +68,7 @@ public class AnnotationFileMapperTest {
 	private Gene t3;
 	private Map<String, Annotation> mappedProteins;
 	
-	@Before
+	@BeforeEach
 	public void setUp() throws Exception {
 		CollectionReaderDescription readerDescription = (CollectionReaderDescription) UIMAFramework.getXMLParser().parseCollectionReaderDescription(new XMLInputSource(DESCRIPTOR_FILE));
 		CollectionReader collectionReader = UIMAFramework.produceCollectionReader(readerDescription);
diff --git a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/OntoFormatReaderTest.java b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/OntoFormatReaderTest.java
index 2abfcc03d..24a3d7805 100644
--- a/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/OntoFormatReaderTest.java
+++ b/jcore-bionlpformat-reader/src/test/java/de/julielab/jcore/reader/bionlp09event/utils/OntoFormatReaderTest.java
@@ -11,7 +11,7 @@
 package de.julielab.jcore.reader.bionlp09event.utils;
 
 import de.julielab.jcore.reader.bionlpformat.utils.OntoFormatReader;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
diff --git a/jcore-biosem-ae/pom.xml b/jcore-biosem-ae/pom.xml
index eec6bc55f..42cfd7194 100644
--- a/jcore-biosem-ae/pom.xml
+++ b/jcore-biosem-ae/pom.xml
@@ -52,8 +52,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
index 9a9f16a35..2b4011ff0 100644
--- a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
+++ b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
@@ -31,8 +31,8 @@
 import utils.BioSemException;
 import utils.DBUtils;
 
-import java.util.*;
 import java.util.List;
+import java.util.*;
 import java.util.Map.Entry;
 
 public class BioSemEventAnnotator extends JCasAnnotator_ImplBase {
diff --git a/jcore-biosem-ae/src/test/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotatorTest.java b/jcore-biosem-ae/src/test/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotatorTest.java
index ae49970cd..da7a683de 100644
--- a/jcore-biosem-ae/src/test/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotatorTest.java
+++ b/jcore-biosem-ae/src/test/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotatorTest.java
@@ -22,15 +22,15 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ExternalResourceDescription;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
 import java.util.Collections;
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class BioSemEventAnnotatorTest {
 	@Test
@@ -53,7 +53,7 @@ public void testProcess() throws Exception {
 		if (testOutputFile.exists())
 			testOutputFile.delete();
 
-		assertTrue("Test document was not found by the BioNLP ST reader.", bioNlpSTReader.hasNext());
+		assertTrue(bioNlpSTReader.hasNext(), "Test document was not found by the BioNLP ST reader.");
 		bioNlpSTReader.getNext(jCas.getCas());
 		engine.process(jCas);
 		bioNlpSTWriter.process(jCas);
diff --git a/jcore-conll-consumer/pom.xml b/jcore-conll-consumer/pom.xml
index 4ba6ef20c..bbab62b95 100644
--- a/jcore-conll-consumer/pom.xml
+++ b/jcore-conll-consumer/pom.xml
@@ -24,8 +24,8 @@
             <artifactId>logback-classic</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-conll-consumer/src/test/java/de/julielab/jcore/consumer/cas2conll/test/ConllConsumerTest.java b/jcore-conll-consumer/src/test/java/de/julielab/jcore/consumer/cas2conll/test/ConllConsumerTest.java
index cb66ca825..ad46ef663 100644
--- a/jcore-conll-consumer/src/test/java/de/julielab/jcore/consumer/cas2conll/test/ConllConsumerTest.java
+++ b/jcore-conll-consumer/src/test/java/de/julielab/jcore/consumer/cas2conll/test/ConllConsumerTest.java
@@ -21,7 +21,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -30,7 +30,7 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class ConllConsumerTest {
 
diff --git a/jcore-coordination-baseline-ae/pom.xml b/jcore-coordination-baseline-ae/pom.xml
index ea88c0b43..0b54fac37 100644
--- a/jcore-coordination-baseline-ae/pom.xml
+++ b/jcore-coordination-baseline-ae/pom.xml
@@ -37,8 +37,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/ConjunctAnnotatorTest.java b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/ConjunctAnnotatorTest.java
index fdca4b78e..6eb0c2ee6 100644
--- a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/ConjunctAnnotatorTest.java
+++ b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/ConjunctAnnotatorTest.java
@@ -7,7 +7,6 @@
 package de.julielab.jcore.ae.coordbaseline.main;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.cas.FSIterator;
@@ -17,13 +16,16 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class ConjunctAnnotatorTest extends TestCase
+
+public class ConjunctAnnotatorTest
 {
 	private static final Logger LOGGER = LoggerFactory.getLogger(ConjunctAnnotatorTest.class);
 	private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
@@ -32,19 +34,8 @@ public class ConjunctAnnotatorTest extends TestCase
 	private static final String coordinationLabels2 = "antecedent,conjunct,conjunction,conjunct,antecedent,antecedent";
 	private static final String TEST_DESC = "src/test/resources/desc/ConjunctAnnotatorTest.xml";
 	
-	
-	
-	
-	
-	
-	
-/*--------------------------------------------------------------------------------*/
-	protected void setUp() throws Exception 
-	{
-		super.setUp();
-	} // of setUp	
-/*--------------------------------------------------------------------------------*/
-	public void initCas(JCas jcas) 
+
+	public void initCas(JCas jcas)
 	{
 		jcas.reset();
 		
@@ -558,6 +549,7 @@ public void initCas(JCas jcas)
 		
 	} // of initCas
 /*--------------------------------------------------------------------------------*/
+	@Test
 	public void testProcess() 
 	{
 		XMLInputSource descriptor = null; 
@@ -595,7 +587,7 @@ public void testProcess()
 		try
 		{
 			ae.process(jcas, null);			
-			assertTrue("Invalid JCas!", checkJCas(jcas));
+			assertTrue(checkJCas(jcas), "Invalid JCas!");
 		} // of try 
 		catch (Exception e)
 		{
diff --git a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/CoordinationAnnotatorTest.java b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/CoordinationAnnotatorTest.java
index dd5416a7a..32662f928 100644
--- a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/CoordinationAnnotatorTest.java
+++ b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/CoordinationAnnotatorTest.java
@@ -22,7 +22,6 @@
 package de.julielab.jcore.ae.coordbaseline.main;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.cas.FSIterator;
@@ -32,13 +31,16 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class CoordinationAnnotatorTest extends TestCase 
+
+public class CoordinationAnnotatorTest
 {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(CoordinationAnnotatorTest.class);
@@ -55,11 +57,7 @@ public class CoordinationAnnotatorTest extends TestCase
 	private static final String TEST_DESC = "src/test/resources/desc/CoordinationAnnotatorTest.xml";
 	
 	
-	protected void setUp() throws Exception 
-	{
-		super.setUp();
-	} // of setUp
-      
+
 	public void initCas(JCas jcas) 
 	{
 		jcas.reset();
@@ -562,7 +560,7 @@ public void initCas(JCas jcas)
 	} // of initCas
 	
 	
-	
+	@Test
 	public void testProcess() 
 	{
 		XMLInputSource descriptor = null; 
@@ -598,7 +596,7 @@ public void testProcess()
 		try
 		{
 			ae.process(jcas, null);			
-			assertTrue("Invalid JCas!", checkJCas(jcas));
+			assertTrue(checkJCas(jcas), "Invalid JCas!");
 			
 		} // of try 
 		catch (Exception e)
diff --git a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EEEAnnotatorTest.java b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EEEAnnotatorTest.java
index a010c3178..4203cdc16 100644
--- a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EEEAnnotatorTest.java
+++ b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EEEAnnotatorTest.java
@@ -7,7 +7,6 @@
 package de.julielab.jcore.ae.coordbaseline.main;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.cas.FSIterator;
@@ -17,13 +16,16 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
-public class EEEAnnotatorTest extends TestCase
+
+public class EEEAnnotatorTest
 {
 	private static final Logger LOGGER = LoggerFactory.getLogger(EEEAnnotatorTest.class);
 	private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
@@ -33,12 +35,7 @@ public class EEEAnnotatorTest extends TestCase
 	private static final String EEE2 = "simple upstream and downstream sequence elements";
 	private static final String TEST_DESC = "src/test/resources/desc/EEEAnnotatorTest.xml";
 	
-/*--------------------------------------------------------------------------------*/
-	protected void setUp() throws Exception 
-	{
-		super.setUp();
-	} // of setUp	
-/*--------------------------------------------------------------------------------*/
+
 	public void initCas(JCas jcas) 
 	{
 		jcas.reset();
@@ -538,6 +535,8 @@ public void initCas(JCas jcas)
 		entity3.addToIndexes();
 	} // of initCas
 /*--------------------------------------------------------------------------------*/
+
+	@Test
 	public void testProcess() 
 	{
 		XMLInputSource descriptor = null; 
@@ -575,7 +574,7 @@ public void testProcess()
 		try
 		{
 			ae.process(jcas, null);			
-			assertTrue("Invalid JCas!", checkJCas(jcas));
+			assertTrue(checkJCas(jcas), "Invalid JCas!");
 			
 			
 		} // of try 
diff --git a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EllipsisAnnotatorTest.java b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EllipsisAnnotatorTest.java
index 749371a51..94d697619 100644
--- a/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EllipsisAnnotatorTest.java
+++ b/jcore-coordination-baseline-ae/src/test/java/de/julielab/jcore/ae/coordbaseline/main/EllipsisAnnotatorTest.java
@@ -7,7 +7,6 @@
 package de.julielab.jcore.ae.coordbaseline.main;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.cas.FSIterator;
@@ -17,12 +16,15 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 
-public class EllipsisAnnotatorTest extends TestCase
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class EllipsisAnnotatorTest
 {
 	private static final Logger LOGGER = LoggerFactory.getLogger(EllipsisAnnotatorTest.class);
 	private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
@@ -30,12 +32,7 @@ public class EllipsisAnnotatorTest extends TestCase
 	private static final String ellipsis1 = "X cells, Y cells, and Z cells";
 	private static final String ellipsis2 = "simple upstream sequence elements and simple downstream sequence elements";
 	private static final String TEST_DESC = "src/test/resources/desc/EllipsisAnnotatorTest.xml";
-/*--------------------------------------------------------------------------------*/
-	protected void setUp() throws Exception 
-	{
-		super.setUp();
-	} // of setUp	
-/*--------------------------------------------------------------------------------*/
+
 	public void initCas(JCas jcas) 
 	{
 		jcas.reset();
@@ -697,6 +694,8 @@ public void initCas(JCas jcas)
 		c26.addToIndexes();
 	} // of initCas	
 /*---------------------------------------------------------------------------*/
+
+	@Test
 	public void testProcess() 
 	{
 		XMLInputSource descriptor = null; 
@@ -734,7 +733,7 @@ public void testProcess()
 		try
 		{
 			ae.process(jcas, null);			
-			assertTrue("Invalid JCas!", checkJCas(jcas));
+			assertTrue(checkJCas(jcas), "Invalid JCas!");
 		} // of try 
 		catch (Exception e)
 		{
diff --git a/jcore-cord19-reader/pom.xml b/jcore-cord19-reader/pom.xml
index b77f93e91..d9f7736d7 100644
--- a/jcore-cord19-reader/pom.xml
+++ b/jcore-cord19-reader/pom.xml
@@ -48,8 +48,8 @@
             <version>${jcore-utilities-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>org.assertj</groupId>
diff --git a/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java b/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java
index f7a8e8fcf..0453a1cde 100644
--- a/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java
+++ b/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/Cord19MultiplierReaderTest.java
@@ -15,7 +15,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.nio.file.Path;
 import java.util.Collection;
diff --git a/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/JsonFormatTest.java b/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/JsonFormatTest.java
index dba932cac..5e39b79d0 100644
--- a/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/JsonFormatTest.java
+++ b/jcore-cord19-reader/src/test/java/de/julielab/jcore/reader/cord19/JsonFormatTest.java
@@ -4,7 +4,7 @@
 import de.julielab.jcore.reader.cord19.jsonformat.Affiliation;
 import de.julielab.jcore.reader.cord19.jsonformat.Author;
 import de.julielab.jcore.reader.cord19.jsonformat.Cord19Document;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.nio.file.Path;
diff --git a/jcore-coreference-writer/pom.xml b/jcore-coreference-writer/pom.xml
index ee4c26044..1bafb6e13 100644
--- a/jcore-coreference-writer/pom.xml
+++ b/jcore-coreference-writer/pom.xml
@@ -41,8 +41,8 @@
             <version>${jcore-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Coreference Writer</name>
diff --git a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
index 32613e57d..c85dcfa82 100644
--- a/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
+++ b/jcore-coreference-writer/src/main/java/de/julielab/jcore/consumer/coreference/CoreferenceWriter.java
@@ -1,7 +1,6 @@
 package de.julielab.jcore.consumer.coreference;
 
 import de.julielab.java.utilities.FileUtilities;
-import de.julielab.jcore.types.Abbreviation;
 import de.julielab.jcore.types.CorefExpression;
 import de.julielab.jcore.types.CorefRelation;
 import de.julielab.jcore.utility.JCoReTools;
@@ -15,17 +14,12 @@
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.cas.FSArray;
-import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.util.HashMap;
 import java.util.Iterator;
-import java.util.Map;
-import java.util.Spliterators;
 
 @ResourceMetaData(name = "JCoRe Coreference Writer", description = "Writes co-reference annotation to a text file.")
 public class CoreferenceWriter extends JCasAnnotator_ImplBase {
diff --git a/jcore-cpe-db-runner/pom.xml b/jcore-cpe-db-runner/pom.xml
index d84ab5a84..62e879169 100644
--- a/jcore-cpe-db-runner/pom.xml
+++ b/jcore-cpe-db-runner/pom.xml
@@ -71,8 +71,8 @@
             <version>${project.parent.version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-cpe-db-runner</url>
diff --git a/jcore-ct-reader/pom.xml b/jcore-ct-reader/pom.xml
index ac50c8cdb..0630444f6 100644
--- a/jcore-ct-reader/pom.xml
+++ b/jcore-ct-reader/pom.xml
@@ -41,8 +41,8 @@
             <version>${jcore-utilities-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Clinical Trials Reader</name>
diff --git a/jcore-ct-reader/src/test/java/de/julielab/jcore/reader/ct/ClinicalTrialsReaderTest.java b/jcore-ct-reader/src/test/java/de/julielab/jcore/reader/ct/ClinicalTrialsReaderTest.java
index b1aa75967..140b19874 100644
--- a/jcore-ct-reader/src/test/java/de/julielab/jcore/reader/ct/ClinicalTrialsReaderTest.java
+++ b/jcore-ct-reader/src/test/java/de/julielab/jcore/reader/ct/ClinicalTrialsReaderTest.java
@@ -11,13 +11,13 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.Collection;
 
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * Unit tests for jcore-ct-reader.
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
index 579613897..11aa0d9ab 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
@@ -12,22 +12,22 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
 
 import java.io.IOException;
 import java.sql.SQLException;
 
 import static de.julielab.jcore.reader.db.TableReaderConstants.*;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class DBMultiplierReaderTest {
     @ClassRule
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException {
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         try (final CoStoSysConnection ignore = dbc.obtainOrReserveConnection()) {
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
index 2816f9535..fa378c49e 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
@@ -19,9 +19,9 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.PostgreSQLContainer;
@@ -32,14 +32,14 @@
 import java.sql.SQLException;
 
 import static de.julielab.jcore.reader.db.TableReaderConstants.*;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class DBMultiplierTest {
     private final static Logger log = LoggerFactory.getLogger(DBMultiplierTest.class);
     @ClassRule
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, IOException {
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.reserveConnection();
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
index 8ed7c86bf..015d3e3f5 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.factory.CollectionReaderFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.PostgreSQLContainer;
@@ -25,14 +25,14 @@
 import java.sql.SQLException;
 
 import static de.julielab.jcore.reader.db.TableReaderConstants.*;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 
 public class DBReaderTest {
     @ClassRule
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException {
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.reserveConnection();
diff --git a/jcore-descriptor-creator/pom.xml b/jcore-descriptor-creator/pom.xml
index aae843561..0c410747d 100644
--- a/jcore-descriptor-creator/pom.xml
+++ b/jcore-descriptor-creator/pom.xml
@@ -46,8 +46,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java b/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java
index 534fadda9..41a146892 100644
--- a/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java
+++ b/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java
@@ -1,27 +1,24 @@
 package de.julielab.jcore.misc;
 
-import static java.util.stream.Collectors.joining;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import de.julielab.java.utilities.IOStreamUtilities;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.Arrays;
-import java.util.Optional;
 import java.util.stream.Stream;
 
-import de.julielab.java.utilities.IOStreamUtilities;
-import org.apache.commons.io.FileUtils;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import static java.util.stream.Collectors.joining;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class DescriptorCreatorTest {
 	
-	@BeforeClass
-	@AfterClass
+	@BeforeAll
+	@AfterAll
 	public static void shutdown() throws IOException {
 		//FileUtils.deleteDirectory(new File(Arrays.asList("src", "test", "resources", "de").stream().collect(joining(File.separator))));
 	}
diff --git a/jcore-descriptor-creator/src/test/java/de/julielab/jcore/reader/testreader/TestReader.java b/jcore-descriptor-creator/src/test/java/de/julielab/jcore/reader/testreader/TestReader.java
index 937c00e4d..36e70a5cc 100644
--- a/jcore-descriptor-creator/src/test/java/de/julielab/jcore/reader/testreader/TestReader.java
+++ b/jcore-descriptor-creator/src/test/java/de/julielab/jcore/reader/testreader/TestReader.java
@@ -1,12 +1,12 @@
 package de.julielab.jcore.reader.testreader;
 
-import java.io.IOException;
-
 import org.apache.uima.cas.CAS;
 import org.apache.uima.collection.CollectionException;
 import org.apache.uima.collection.CollectionReader_ImplBase;
 import org.apache.uima.util.Progress;
 
+import java.io.IOException;
+
 public class TestReader extends CollectionReader_ImplBase {
 
 	@Override
diff --git a/jcore-dta-reader/pom.xml b/jcore-dta-reader/pom.xml
index b47f53e66..7f5b51af2 100644
--- a/jcore-dta-reader/pom.xml
+++ b/jcore-dta-reader/pom.xml
@@ -70,7 +70,11 @@
 			<groupId>org.slf4j</groupId>
 			<artifactId>slf4j-api</artifactId>
 		</dependency>
-	<dependency><groupId>junit</groupId><artifactId>junit</artifactId></dependency></dependencies>
+	<dependency>
+		<groupId>org.junit.jupiter</groupId>
+		<artifactId>junit-jupiter-engine</artifactId>
+	</dependency>
+	</dependencies>
 	<description>Reader for DTA files (German digital humanities corpus)</description>
 	<organization>
 		<url>http://www.julielab.de</url>
diff --git a/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/DTAFileReaderTest.java b/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/DTAFileReaderTest.java
index eadb4101b..0e2b0f995 100644
--- a/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/DTAFileReaderTest.java
+++ b/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/DTAFileReaderTest.java
@@ -24,13 +24,13 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
 import java.util.*;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class DTAFileReaderTest {
 
diff --git a/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/util/DTAUtilsTest.java b/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/util/DTAUtilsTest.java
index 3f9780106..b971e6cfc 100644
--- a/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/util/DTAUtilsTest.java
+++ b/jcore-dta-reader/src/test/java/de/julielab/jcore/reader/dta/util/DTAUtilsTest.java
@@ -14,14 +14,14 @@
 import de.julielab.jcore.reader.dta.DTAFileReaderTest.Version;
 import de.julielab.jcore.types.extensions.dta.DTABelletristik;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class DTAUtilsTest {
 
diff --git a/jcore-ec-code-ae/pom.xml b/jcore-ec-code-ae/pom.xml
index 05cc496a5..6f0c55f60 100644
--- a/jcore-ec-code-ae/pom.xml
+++ b/jcore-ec-code-ae/pom.xml
@@ -28,8 +28,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-ec-code-ae/src/test/java/de/julielab/jcore/ae/ec/ECNumberAnnotatorTest.java b/jcore-ec-code-ae/src/test/java/de/julielab/jcore/ae/ec/ECNumberAnnotatorTest.java
index 8408f9d56..3960c59a9 100644
--- a/jcore-ec-code-ae/src/test/java/de/julielab/jcore/ae/ec/ECNumberAnnotatorTest.java
+++ b/jcore-ec-code-ae/src/test/java/de/julielab/jcore/ae/ec/ECNumberAnnotatorTest.java
@@ -18,9 +18,9 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class ECNumberAnnotatorTest {
 	@Test
diff --git a/jcore-elasticsearch-consumer/pom.xml b/jcore-elasticsearch-consumer/pom.xml
index 540e2f7d1..a4fed0dc9 100644
--- a/jcore-elasticsearch-consumer/pom.xml
+++ b/jcore-elasticsearch-consumer/pom.xml
@@ -68,6 +68,10 @@
             <artifactId>icu4j</artifactId>
             <version>55.1</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.mapdb</groupId>
             <artifactId>mapdb</artifactId>
@@ -89,6 +93,10 @@
             <artifactId>logback-classic</artifactId>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter</artifactId>
+        </dependency>
     </dependencies>
     <organization>
         <name>JULIE Lab Jena, Germany</name>
diff --git a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/AbstractFieldGeneratorTest.java b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/AbstractFieldGeneratorTest.java
index 68292673e..588489b8c 100644
--- a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/AbstractFieldGeneratorTest.java
+++ b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/AbstractFieldGeneratorTest.java
@@ -367,6 +367,6 @@ public void testAddArray() {
         builder.registerTypeAdapter(PreanalyzedFieldValue.class,
                 new PreanalyzedFieldValue.PreanalyzedFieldValueGsonAdapter());
         Gson gson = builder.create();
-        assertEquals("{\"field\":[\"eins\",\"zwei\"]}", gson.toJson(d));
+        assertEquals("{\"field\":[\"eins\",\"zwei\"]}",  gson.toJson(d));
 	}
 }
diff --git a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
index 849c005df..36a71fbe0 100644
--- a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
+++ b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
@@ -1,17 +1,15 @@
 package de.julielab.jcore.consumer.es;
 
-import de.julielab.java.utilities.IOStreamUtilities;
 import de.julielab.jcore.consumer.es.preanalyzed.Document;
 import de.julielab.jcore.consumer.es.preanalyzed.RawToken;
 import de.julielab.jcore.types.Header;
 import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.cas.CASException;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.GenericContainer;
@@ -19,12 +17,11 @@
 import org.testcontainers.containers.output.Slf4jLogConsumer;
 import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
 
-import java.net.HttpURLConnection;
 import java.net.URL;
 import java.time.Duration;
 import java.util.Map;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class ElasticSearchConsumerIT {
     public static final String TEST_INDEX = "testindex";
@@ -39,7 +36,7 @@ public class ElasticSearchConsumerIT {
             .withStartupTimeout(Duration.ofMinutes(2))
             .withEnv("cluster.name", TEST_CLUSTER);
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() {
         Slf4jLogConsumer toStringConsumer = new Slf4jLogConsumer(log);
         es.followOutput(toStringConsumer, OutputFrame.OutputType.STDOUT);
diff --git a/jcore-embedding-writer/pom.xml b/jcore-embedding-writer/pom.xml
index d5d5304a6..d294419fd 100644
--- a/jcore-embedding-writer/pom.xml
+++ b/jcore-embedding-writer/pom.xml
@@ -28,14 +28,18 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-utilities</artifactId>
             <version>${jcore-utilities-version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
diff --git a/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/DecoderTest.java b/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/DecoderTest.java
index 85ed94597..d34cdd780 100644
--- a/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/DecoderTest.java
+++ b/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/DecoderTest.java
@@ -2,7 +2,7 @@
 
 import org.apache.commons.lang3.tuple.Pair;
 import org.assertj.core.data.Offset;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.*;
 import java.nio.ByteBuffer;
diff --git a/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/EmbeddingWriterTest.java b/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/EmbeddingWriterTest.java
index 6a80fbcbe..8d19cf1ce 100644
--- a/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/EmbeddingWriterTest.java
+++ b/jcore-embedding-writer/src/test/java/de/julielab/jcore/consumer/ew/EmbeddingWriterTest.java
@@ -10,7 +10,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.DoubleArray;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
diff --git a/jcore-event-flattener-ae/pom.xml b/jcore-event-flattener-ae/pom.xml
index 423a141b9..91788a532 100644
--- a/jcore-event-flattener-ae/pom.xml
+++ b/jcore-event-flattener-ae/pom.xml
@@ -1,48 +1,56 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>de.julielab</groupId>
-    <artifactId>jcore-base</artifactId>
-    <version>2.6.0-SNAPSHOT</version>
-  </parent>
-  <artifactId>jcore-event-flattener-ae</artifactId>
-  <name>JCoRe Event Flattener AE</name>
-  <description>This component reads de.julielab.jcore.types.EventMention annotations and converts event structures into de.julielab.jcore.types.ext.FlattenedRelation annotation. The purpose of FlattenedRelations is to represent complex event structures in a more simple manner. This can be helpful for visualization or further processing.</description>
-  <dependencies>
-  	<dependency>
-  		<groupId>org.slf4j</groupId>
-  		<artifactId>slf4j-api</artifactId>
-  	</dependency>
-  	<dependency>
-  		<groupId>com.google.guava</groupId>
-  		<artifactId>guava</artifactId>
-  		<version>18.0</version>
-  		<scope>test</scope>
-  	</dependency>
-  	<dependency>
-  		<groupId>de.julielab</groupId>
-  		<artifactId>jcore-types</artifactId>
-  		<version>${jcore-types-version}</version>
-  	</dependency>
-  	<dependency>
-  		<groupId>ch.qos.logback</groupId>
-  		<artifactId>logback-classic</artifactId>
-  		<scope>test</scope>
-  	</dependency>
-  	<dependency>
-  		<groupId>de.julielab</groupId>
-  		<artifactId>jcore-descriptor-creator</artifactId>
-  	</dependency>
-  <dependency><groupId>junit</groupId><artifactId>junit</artifactId></dependency></dependencies>
-  <organization>
-  	<name>JULIE Lab Jena, Germany</name>
-  	<url>http://www.julielab.de</url>
-  </organization>
-  <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-event-flattener-ae</url>
-	<licenses>
-		<license>
-			<name>BSD-2-Clause</name>
-			<url>https://opensource.org/licenses/BSD-2-Clause</url>
-		</license>
-	</licenses>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+    <artifactId>jcore-event-flattener-ae</artifactId>
+    <name>JCoRe Event Flattener AE</name>
+    <description>This component reads de.julielab.jcore.types.EventMention annotations and converts event structures
+        into de.julielab.jcore.types.ext.FlattenedRelation annotation. The purpose of FlattenedRelations is to represent
+        complex event structures in a more simple manner. This can be helpful for visualization or further processing.
+    </description>
+    <dependencies>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.guava</groupId>
+            <artifactId>guava</artifactId>
+            <version>18.0</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-event-flattener-ae</url>
+    <licenses>
+        <license>
+            <name>BSD-2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
 </project>
diff --git a/jcore-event-flattener-ae/src/test/java/de/julielab/jules/ae/EventFlattenerTest.java b/jcore-event-flattener-ae/src/test/java/de/julielab/jules/ae/EventFlattenerTest.java
index ea1c0d4c3..8af8ce297 100644
--- a/jcore-event-flattener-ae/src/test/java/de/julielab/jules/ae/EventFlattenerTest.java
+++ b/jcore-event-flattener-ae/src/test/java/de/julielab/jules/ae/EventFlattenerTest.java
@@ -13,108 +13,108 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.FileInputStream;
 import java.util.Set;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class EventFlattenerTest {
 
-	@SuppressWarnings("unused")
-	private final static Logger log = LoggerFactory
-			.getLogger(EventFlattenerTest.class);
+    @SuppressWarnings("unused")
+    private final static Logger log = LoggerFactory
+            .getLogger(EventFlattenerTest.class);
 
-	@Test
-	public void testProcess() throws Exception, SecurityException {
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-all-types");
-		XmiCasDeserializer.deserialize(new FileInputStream(
-				"src/test/resources/21499307.xmi"), jCas
-				.getCas());
+    @Test
+    public void testProcess() throws Exception, SecurityException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-all-types");
+        XmiCasDeserializer.deserialize(new FileInputStream(
+                "src/test/resources/21499307.xmi"), jCas
+                .getCas());
 
-		AnalysisEngine flattener = AnalysisEngineFactory
-				.createEngine(EventFlattener.class);
-		flattener.process(jCas);
+        AnalysisEngine flattener = AnalysisEngineFactory
+                .createEngine(EventFlattener.class);
+        flattener.process(jCas);
 
-		FSIterator<Annotation> sentit = jCas.getAnnotationIndex(Sentence.type)
-				.iterator();
-		int sentenceCounter = 1;
-		// we are interested in the 8th sentence because there is the only complex event structure there
-		Sentence interestingSent = null;
-		while (sentit.hasNext()) {
-			Sentence s = (Sentence) sentit.next();
-			switch (sentenceCounter) {
-			case 3:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 2, countEventsInSentence(s));
-				break;
-			case 5:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 1, countEventsInSentence(s));
-				break;
-			case 6:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 2, countEventsInSentence(s));
-				break;
-			case 7:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 4, countEventsInSentence(s));
-				break;
-			case 8:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 6, countEventsInSentence(s));
-				interestingSent = s;
-				break;
-			case 9:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 1, countEventsInSentence(s));
-				break;
-			default:
-				assertEquals("Wrong number of flattened events in sentence "
-						+ s.getCoveredText(), 0, countEventsInSentence(s));
-			}
-			sentenceCounter++;
-		}
-		FSIterator<Annotation> flateventit = jCas
-				.getAnnotationIndex(FlattenedRelation.type).subiterator(interestingSent);
-		while (flateventit.hasNext()) {
-			FlattenedRelation fr = (FlattenedRelation) flateventit.next();
-			if (fr.getId().equals("FE" + 13)) {
-				// All arguments there?
-				Set<String> expectedArguments = Sets.newHashSet("anti-apoptotic Bcl-2", "CSN5");
-				for (int i = 0; i < fr.getArguments().size(); ++i)
-					assertTrue("Unexpected argument: " + fr.getArguments(i).getCoveredText(), expectedArguments.remove(fr.getArguments(i).getCoveredText()));
-				assertTrue("Expected arguments not found in relation: " + expectedArguments, expectedArguments.isEmpty());
-				// Arguments correctly divided into agents and patients?
-				assertEquals(1, fr.getAgents().size());
-				assertEquals(1, fr.getPatients().size());
-				assertEquals("CSN5", fr.getAgents(0).getCoveredText());
-				assertEquals("anti-apoptotic Bcl-2", fr.getPatients(0).getCoveredText());
-				// All participating (sub-)events there?
-				assertEquals(3, fr.getRelations().size());
-				Set<String> expectedRelations = Sets.newHashSet("depletion", "caused", "expression");
-				for (int i = 0; i < fr.getRelations().size(); ++i)
-					assertTrue("Unexpected relation: " + fr.getRelations(i).getCoveredText(), expectedRelations.remove(fr.getRelations(i).getCoveredText()));
-				assertTrue(expectedRelations.isEmpty());
-			}
-		}
-		
-	}
+        FSIterator<Annotation> sentit = jCas.getAnnotationIndex(Sentence.type)
+                .iterator();
+        int sentenceCounter = 1;
+        // we are interested in the 8th sentence because there is the only complex event structure there
+        Sentence interestingSent = null;
+        while (sentit.hasNext()) {
+            Sentence s = (Sentence) sentit.next();
+            switch (sentenceCounter) {
+                case 3:
+                    assertEquals(2, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+                    break;
+                case 5:
+                    assertEquals(1, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+                    break;
+                case 6:
+                    assertEquals(2, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+                    break;
+                case 7:
+                    assertEquals(4, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+                    break;
+                case 8:
+                    assertEquals(6, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+                    interestingSent = s;
+                    break;
+                case 9:
+                    assertEquals(1, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+                    break;
+                default:
+                    assertEquals(0, countEventsInSentence(s), "Wrong number of flattened events in sentence "
+                            + s.getCoveredText());
+            }
+            sentenceCounter++;
+        }
+        FSIterator<Annotation> flateventit = jCas
+                .getAnnotationIndex(FlattenedRelation.type).subiterator(interestingSent);
+        while (flateventit.hasNext()) {
+            FlattenedRelation fr = (FlattenedRelation) flateventit.next();
+            if (fr.getId().equals("FE" + 13)) {
+                // All arguments there?
+                Set<String> expectedArguments = Sets.newHashSet("anti-apoptotic Bcl-2", "CSN5");
+                for (int i = 0; i < fr.getArguments().size(); ++i)
+                    assertTrue(expectedArguments.remove(fr.getArguments(i).getCoveredText()), "Unexpected argument: " + fr.getArguments(i).getCoveredText());
+                assertTrue(expectedArguments.isEmpty(), "Expected arguments not found in relation: " + expectedArguments);
+                // Arguments correctly divided into agents and patients?
+                assertEquals(1, fr.getAgents().size());
+                assertEquals(1, fr.getPatients().size());
+                assertEquals("CSN5", fr.getAgents(0).getCoveredText());
+                assertEquals("anti-apoptotic Bcl-2", fr.getPatients(0).getCoveredText());
+                // All participating (sub-)events there?
+                assertEquals(3, fr.getRelations().size());
+                Set<String> expectedRelations = Sets.newHashSet("depletion", "caused", "expression");
+                for (int i = 0; i < fr.getRelations().size(); ++i)
+                    assertTrue(expectedRelations.remove(fr.getRelations(i).getCoveredText()), "Unexpected relation: " + fr.getRelations(i).getCoveredText());
+                assertTrue(expectedRelations.isEmpty());
+            }
+        }
 
-	private int countEventsInSentence(Sentence s) throws CASRuntimeException,
-			CASException {
-		FSIterator<Annotation> flateventit = s.getCAS().getJCas()
-				.getAnnotationIndex(FlattenedRelation.type).subiterator(s);
-		int count = 0;
-		while (flateventit.hasNext()) {
-			@SuppressWarnings("unused")
-			Annotation annotation = (Annotation) flateventit.next();
-			count++;
-		}
-		return count;
-	}
+    }
+
+    private int countEventsInSentence(Sentence s) throws CASRuntimeException,
+            CASException {
+        FSIterator<Annotation> flateventit = s.getCAS().getJCas()
+                .getAnnotationIndex(FlattenedRelation.type).subiterator(s);
+        int count = 0;
+        while (flateventit.hasNext()) {
+            @SuppressWarnings("unused")
+            Annotation annotation = (Annotation) flateventit.next();
+            count++;
+        }
+        return count;
+    }
 }
diff --git a/jcore-feature-value-replacement-ae/pom.xml b/jcore-feature-value-replacement-ae/pom.xml
index f3e120d76..8395dfefd 100644
--- a/jcore-feature-value-replacement-ae/pom.xml
+++ b/jcore-feature-value-replacement-ae/pom.xml
@@ -34,8 +34,8 @@
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-feature-value-replacement-ae/src/test/java/de/julielab/jcore/ae/fvr/FeatureValueReplacementAnnotatorTest.java b/jcore-feature-value-replacement-ae/src/test/java/de/julielab/jcore/ae/fvr/FeatureValueReplacementAnnotatorTest.java
index efb4df831..81958daf1 100644
--- a/jcore-feature-value-replacement-ae/src/test/java/de/julielab/jcore/ae/fvr/FeatureValueReplacementAnnotatorTest.java
+++ b/jcore-feature-value-replacement-ae/src/test/java/de/julielab/jcore/ae/fvr/FeatureValueReplacementAnnotatorTest.java
@@ -10,11 +10,11 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.resource.ExternalResourceDescription;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class FeatureValueReplacementAnnotatorTest {
 	@Test
diff --git a/jcore-file-reader/pom.xml b/jcore-file-reader/pom.xml
index 0de264d3b..179cc5647 100644
--- a/jcore-file-reader/pom.xml
+++ b/jcore-file-reader/pom.xml
@@ -26,8 +26,8 @@
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-file-reader/src/test/java/de/julielab/jcore/reader/file/main/FileReaderTest.java b/jcore-file-reader/src/test/java/de/julielab/jcore/reader/file/main/FileReaderTest.java
index f5f2f9cd7..f1e440d04 100644
--- a/jcore-file-reader/src/test/java/de/julielab/jcore/reader/file/main/FileReaderTest.java
+++ b/jcore-file-reader/src/test/java/de/julielab/jcore/reader/file/main/FileReaderTest.java
@@ -29,16 +29,16 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class FileReaderTest {
 
@@ -107,7 +107,7 @@ public class FileReaderTest {
 	 private static final String FILE_ARTIFACT_4 = "data/onlyToken/8563171.txt";
 
 
-	@BeforeClass
+	@BeforeAll
 
 	public static void setUp() throws Exception {
 		writeArtifact(ARTIFACT_1, FILE_ARTIFACT_1);
diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/NerTaggingResponse.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/NerTaggingResponse.java
index f28e7bd22..b876a0731 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/NerTaggingResponse.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/NerTaggingResponse.java
@@ -1,7 +1,6 @@
 package de.julielab.jcore.ae.flairner;
 
 import java.util.List;
-import java.util.stream.Stream;
 
 /**
  * <p>A class to assemble the response from FLAIR for a tagging request. The found entities are returned as
diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/PythonConnector.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/PythonConnector.java
index 2ba03c82c..f7a09ba7b 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/PythonConnector.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/PythonConnector.java
@@ -1,7 +1,6 @@
 package de.julielab.jcore.ae.flairner;
 
 import de.julielab.jcore.types.Sentence;
-import org.apache.commons.lang3.tuple.Pair;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 
 import java.io.IOException;
diff --git a/jcore-flair-ner-ae/src/test/resources/1681975.xmi b/jcore-flair-ner-ae/src/test/resources/1681975.xmi
index 467d07936..04b9b74fa 100644
--- a/jcore-flair-ner-ae/src/test/resources/1681975.xmi
+++ b/jcore-flair-ner-ae/src/test/resources/1681975.xmi
@@ -1 +1,5 @@
-<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:muc7="http:///de/julielab/jcore/types/muc7.ecore" xmlns:stemnet="http:///de/julielab/jcore/types/stemnet.ecore" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore" xmlns:ct="http:///de/julielab/jcore/types/ct.ecore" xmlns:medical="http:///de/julielab/jcore/types/medical.ecore" xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore" xmlns:ext="http:///de/julielab/jcore/types/ext.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore" xmlns:bootstrep="http:///de/julielab/jcore/types/bootstrep.ecore" xmlns:wikipedia="http:///de/julielab/jcore/types/wikipedia.ecore" xmlns:noNamespace="http:///uima/noNamespace.ecore" xmlns:ace="http:///de/julielab/jcore/types/ace.ecore" xmlns:dta="http:///de/julielab/jcore/types/extensions/dta.ecore" xmlns:mantra="http:///de/julielab/jcore/types/mantra.ecore" xmlns:mmax="http:///de/julielab/jcore/types/mmax.ecore" xmi:version="2.0"><cas:NULL xmi:id="0"/><types:AbstractText xmi:id="1" sofa="431" begin="125" end="1859"/><types:Chemical xmi:id="9" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Purinones"/><types:Chemical xmi:id="18" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Vasodilator Agents"/><types:Chemical xmi:id="86" sofa="431" begin="0" end="0" registryNumber="76898-47-0" nameOfSubstance="15-Hydroxy-11 alpha,9 alpha-(epoxymethano)prosta-5,13-dienoic Acid"/><types:Chemical xmi:id="225" sofa="431" begin="0" end="0" registryNumber="EC 4.6.1.2" nameOfSubstance="Guanylate Cyclase"/><types:Chemical xmi:id="297" sofa="431" begin="0" end="0" registryNumber="G59M7S0WS3" nameOfSubstance="Nitroglycerin"/><types:Chemical xmi:id="306" sofa="431" begin="0" end="0" registryNumber="T42P99266K" nameOfSubstance="Methylene Blue"/><types:Chemical xmi:id="315" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Nitro Compounds"/><types:Chemical xmi:id="324" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Prostaglandin Endoperoxides, Synthetic"/><types:Chemical xmi:id="333" sofa="431" begin="0" end="0" registryNumber="92454-60-9" nameOfSubstance="FK 409"/><types:Chemical xmi:id="440" sofa="431" begin="0" end="0" registryNumber="EC 3.1.4.35" nameOfSubstance="3',5'-Cyclic-GMP Phosphodiesterases"/><types:Chemical xmi:id="489" sofa="431" begin="0" end="0" registryNumber="GXT25D5DS0" nameOfSubstance="zaprinast"/><types:MeshHeading xmi:id="27" sofa="431" begin="0" end="0" descriptorName="Nitroglycerin" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="38" sofa="431" begin="0" end="0" descriptorName="In Vitro Techniques" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="49" sofa="431" begin="0" end="0" descriptorName="Methylene Blue" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="60" sofa="431" begin="0" end="0" descriptorName="3',5'-Cyclic-GMP Phosphodiesterases" qualifierName="antagonists &amp; inhibitors" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="95" sofa="431" begin="0" end="0" descriptorName="Coronary Vessels" qualifierName="drug effects" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="106" sofa="431" begin="0" end="0" descriptorName="Purinones" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="117" sofa="431" begin="0" end="0" descriptorName="Prostaglandin Endoperoxides, Synthetic" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="128" sofa="431" begin="0" end="0" descriptorName="Nitro Compounds" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="179" sofa="431" begin="0" end="0" descriptorName="Vasodilator Agents" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="258" sofa="431" begin="0" end="0" descriptorName="Dogs" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="269" sofa="431" begin="0" end="0" descriptorName="Muscle Relaxation" qualifierName="drug effects" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="367" sofa="431" begin="0" end="0" descriptorName="Male" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="398" sofa="431" begin="0" end="0" descriptorName="15-Hydroxy-11 alpha,9 alpha-(epoxymethano)prosta-5,13-dienoic Acid" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="409" sofa="431" begin="0" end="0" descriptorName="Guanylate Cyclase" qualifierName="antagonists &amp; inhibitors" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="420" sofa="431" begin="0" end="0" descriptorName="Female" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="449" sofa="431" begin="0" end="0" descriptorName="Animals" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><pubmed:ManualDescriptor xmi:id="71" sofa="431" begin="0" end="0" meSHList="380" chemicalList="342" dBInfoList="460" keywordList="438" geneSymbolList=""/><types:AuthorInfo xmi:id="139" sofa="431" begin="0" end="0" foreName="N" lastName="Taira" initials="N"/><types:AuthorInfo xmi:id="151" sofa="431" begin="0" end="0" foreName="H" affiliation="Department of Pharmacology, Tohoku University School of Medicine, Sendai, Japan." lastName="Yamada" initials="H"/><types:AuthorInfo xmi:id="355" sofa="431" begin="0" end="0" foreName="F" lastName="Yoneyama" initials="F"/><types:AuthorInfo xmi:id="498" sofa="431" begin="0" end="0" foreName="K" lastName="Satoh" initials="K"/><types:Date xmi:id="163" sofa="431" begin="0" end="0" day="0" month="7" year="1991"/><types:Date xmi:id="215" sofa="431" begin="0" end="0" day="0" month="7" year="1991"/><types:Date xmi:id="462" sofa="431" begin="0" end="0" day="0" month="7" year="1991"/><types:Title xmi:id="190" sofa="431" begin="0" end="124" titleType="document"/><types:Journal xmi:id="198" sofa="431" begin="0" end="0" name="Comparative Study" pubDate="163" ISSN="0007-1188" volume="103" title="British journal of pharmacology" shortTitle="Br J Pharmacol" issue="3" pages="1713-8" nlmId="7502536"/><types:Journal xmi:id="280" sofa="431" begin="0" end="0" name="Journal Article" pubDate="462" ISSN="0007-1188" volume="103" title="British journal of pharmacology" shortTitle="Br J Pharmacol" issue="3" pages="1713-8" nlmId="7502536"/><types:Journal xmi:id="472" sofa="431" begin="0" end="0" name="Research Support, Non-U.S. Gov't" pubDate="215" ISSN="0007-1188" volume="103" title="British journal of pharmacology" shortTitle="Br J Pharmacol" issue="3" pages="1713-8" nlmId="7502536"/><pubmed:Header xmi:id="234" sofa="431" begin="0" end="0" source="1681975" docId="1681975" truncated="false" authors="173" pubTypeList="198 280 472" language="eng" citationStatus="MEDLINE" otherIDs="378"/><types:Sentence xmi:id="526" sofa="431" begin="0" end="124" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="582" sofa="431" begin="128" end="345" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="534" sofa="431" begin="349" end="461" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="542" sofa="431" begin="462" end="556" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="590" sofa="431" begin="560" end="745" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="558" sofa="431" begin="746" end="819" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="510" sofa="431" begin="823" end="1106" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="574" sofa="431" begin="1110" end="1330" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="550" sofa="431" begin="1331" end="1500" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="566" sofa="431" begin="1504" end="1696" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="518" sofa="431" begin="1697" end="1859" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:PennBioIEPOSTag xmi:id="6294" sofa="431" begin="0" end="10" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4545" sofa="431" begin="11" end="13" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5223" sofa="431" begin="14" end="17" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="6162" sofa="431" begin="18" end="25" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="12006" sofa="431" begin="26" end="28" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5334" sofa="431" begin="29" end="32" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="9615" sofa="431" begin="33" end="38" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="1864" sofa="431" begin="39" end="50" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1010" sofa="431" begin="51" end="56" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10564" sofa="431" begin="57" end="61" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11943" sofa="431" begin="62" end="67" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="4105" sofa="431" begin="68" end="70" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11890" sofa="431" begin="71" end="84" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12431" sofa="431" begin="85" end="87" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="2275" sofa="431" begin="88" end="96" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="11005" sofa="431" begin="97" end="105" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3353" sofa="431" begin="106" end="112" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8121" sofa="431" begin="113" end="115" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="12015" sofa="431" begin="116" end="119" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="3095" sofa="431" begin="120" end="123" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3569" sofa="431" begin="123" end="124" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="7951" sofa="431" begin="128" end="131" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="2899" sofa="431" begin="132" end="144" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="6783" sofa="431" begin="145" end="152" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="5299" sofa="431" begin="153" end="155" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3703" sofa="431" begin="156" end="161" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11495" sofa="431" begin="161" end="162" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="11114" sofa="431" begin="163" end="164" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="974" sofa="431" begin="165" end="168" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="5895" sofa="431" begin="169" end="185" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7670" sofa="431" begin="186" end="197" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="9942" sofa="431" begin="198" end="202" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8505" sofa="431" begin="203" end="204" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="855" sofa="431" begin="205" end="214" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3740" sofa="431" begin="215" end="222" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4426" sofa="431" begin="222" end="223" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="2012" sofa="431" begin="224" end="228" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="8759" sofa="431" begin="229" end="237" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="3520" sofa="431" begin="238" end="242" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10822" sofa="431" begin="243" end="248" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="10455" sofa="431" begin="249" end="251" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="2625" sofa="431" begin="252" end="265" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10047" sofa="431" begin="266" end="268" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="7573" sofa="431" begin="269" end="277" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="8274" sofa="431" begin="278" end="286" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="6411" sofa="431" begin="287" end="293" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3877" sofa="431" begin="294" end="299" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="12065" sofa="431" begin="300" end="302" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="12234" sofa="431" begin="303" end="306" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="11391" sofa="431" begin="307" end="310" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7746" sofa="431" begin="311" end="321" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="9597" sofa="431" begin="322" end="326" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11928" sofa="431" begin="327" end="333" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6673" sofa="431" begin="334" end="335" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="5059" sofa="431" begin="335" end="341" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="6303" sofa="431" begin="342" end="343" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10671" sofa="431" begin="343" end="344" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="11803" sofa="431" begin="344" end="345" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="5582" sofa="431" begin="349" end="354" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5792" sofa="431" begin="355" end="356" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="5904" sofa="431" begin="356" end="362" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="6907" sofa="431" begin="362" end="363" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="11794" sofa="431" begin="363" end="364" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2390" sofa="431" begin="364" end="370" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="10956" sofa="431" begin="371" end="372" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11332" sofa="431" begin="372" end="373" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="12243" sofa="431" begin="374" end="377" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="2186" sofa="431" begin="378" end="391" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7220" sofa="431" begin="392" end="393" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="3943" sofa="431" begin="393" end="398" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5317" sofa="431" begin="398" end="399" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="11358" sofa="431" begin="399" end="400" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="5308" sofa="431" begin="400" end="406" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9523" sofa="431" begin="407" end="408" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="598" sofa="431" begin="408" end="409" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="3781" sofa="431" begin="410" end="414" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="11349" sofa="431" begin="415" end="423" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="9874" sofa="431" begin="424" end="425" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="867" sofa="431" begin="426" end="439" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1267" sofa="431" begin="439" end="440" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2951" sofa="431" begin="440" end="449" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4411" sofa="431" begin="450" end="460" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4962" sofa="431" begin="460" end="461" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="11093" sofa="431" begin="462" end="472" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8884" sofa="431" begin="473" end="475" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8650" sofa="431" begin="476" end="480" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3814" sofa="431" begin="481" end="487" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="6603" sofa="431" begin="488" end="494" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="7679" sofa="431" begin="495" end="499" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="876" sofa="431" begin="500" end="505" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1228" sofa="431" begin="506" end="509" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="6620" sofa="431" begin="510" end="515" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="7138" sofa="431" begin="516" end="518" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9810" sofa="431" begin="519" end="524" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="6484" sofa="431" begin="525" end="529" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RBR"/><types:PennBioIEPOSTag xmi:id="10089" sofa="431" begin="530" end="536" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4350" sofa="431" begin="537" end="541" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10127" sofa="431" begin="542" end="555" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10874" sofa="431" begin="555" end="556" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="7058" sofa="431" begin="560" end="570" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4655" sofa="431" begin="571" end="585" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="2592" sofa="431" begin="586" end="588" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3216" sofa="431" begin="589" end="602" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7854" sofa="431" begin="603" end="604" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="10242" sofa="431" begin="604" end="610" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="3986" sofa="431" begin="611" end="612" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1334" sofa="431" begin="612" end="613" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="10225" sofa="431" begin="614" end="617" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="5154" sofa="431" begin="618" end="623" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5521" sofa="431" begin="624" end="625" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7885" sofa="431" begin="625" end="626" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="10398" sofa="431" begin="627" end="628" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="SYM"/><types:PennBioIEPOSTag xmi:id="11599" sofa="431" begin="629" end="635" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="10101" sofa="431" begin="636" end="637" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3757" sofa="431" begin="637" end="638" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="3270" sofa="431" begin="639" end="647" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="6448" sofa="431" begin="648" end="657" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3077" sofa="431" begin="658" end="670" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11057" sofa="431" begin="671" end="684" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10729" sofa="431" begin="685" end="686" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7990" sofa="431" begin="686" end="692" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11634" sofa="431" begin="693" end="696" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11367" sofa="431" begin="696" end="697" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9006" sofa="431" begin="698" end="704" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="4359" sofa="431" begin="704" end="705" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4276" sofa="431" begin="706" end="713" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="1610" sofa="431" begin="714" end="724" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="2689" sofa="431" begin="725" end="729" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="4684" sofa="431" begin="730" end="744" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="2399" sofa="431" begin="744" end="745" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="8893" sofa="431" begin="746" end="755" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5434" sofa="431" begin="756" end="768" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="2874" sofa="431" begin="769" end="782" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11646" sofa="431" begin="783" end="784" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="2556" sofa="431" begin="784" end="790" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="2300" sofa="431" begin="791" end="794" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9253" sofa="431" begin="794" end="795" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="3621" sofa="431" begin="796" end="802" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="7380" sofa="431" begin="803" end="807" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="10587" sofa="431" begin="808" end="818" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="910" sofa="431" begin="818" end="819" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="3446" sofa="431" begin="823" end="826" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="9568" sofa="431" begin="827" end="840" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9898" sofa="431" begin="840" end="841" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2021" sofa="431" begin="841" end="851" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4693" sofa="431" begin="852" end="858" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="5849" sofa="431" begin="859" end="862" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10538" sofa="431" begin="863" end="876" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3769" sofa="431" begin="877" end="880" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="11105" sofa="431" begin="881" end="886" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10547" sofa="431" begin="887" end="891" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="12139" sofa="431" begin="892" end="899" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="3413" sofa="431" begin="900" end="902" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="6833" sofa="431" begin="903" end="906" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="7328" sofa="431" begin="907" end="912" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="9726" sofa="431" begin="913" end="915" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="1782" sofa="431" begin="916" end="925" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9674" sofa="431" begin="926" end="930" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="9606" sofa="431" begin="931" end="932" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9465" sofa="431" begin="932" end="933" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5452" sofa="431" begin="934" end="935" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="SYM"/><types:PennBioIEPOSTag xmi:id="2616" sofa="431" begin="936" end="942" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="1497" sofa="431" begin="943" end="944" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2460" sofa="431" begin="945" end="948" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="3995" sofa="431" begin="949" end="955" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="643" sofa="431" begin="956" end="957" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9118" sofa="431" begin="957" end="958" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="2774" sofa="431" begin="958" end="959" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="3191" sofa="431" begin="960" end="962" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="6551" sofa="431" begin="963" end="972" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7780" sofa="431" begin="973" end="975" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5998" sofa="431" begin="976" end="983" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3086" sofa="431" begin="984" end="993" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="738" sofa="431" begin="994" end="1001" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9627" sofa="431" begin="1001" end="1002" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4376" sofa="431" begin="1003" end="1006" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="824" sofa="431" begin="1007" end="1009" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="2730" sofa="431" begin="1010" end="1013" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1672" sofa="431" begin="1014" end="1018" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11293" sofa="431" begin="1019" end="1021" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11777" sofa="431" begin="1022" end="1031" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="2488" sofa="431" begin="1032" end="1033" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9999" sofa="431" begin="1033" end="1034" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5038" sofa="431" begin="1035" end="1036" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="SYM"/><types:PennBioIEPOSTag xmi:id="1362" sofa="431" begin="1037" end="1043" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="2356" sofa="431" begin="1044" end="1045" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="1506" sofa="431" begin="1046" end="1049" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="6355" sofa="431" begin="1050" end="1056" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="4096" sofa="431" begin="1057" end="1058" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5425" sofa="431" begin="1058" end="1059" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="1371" sofa="431" begin="1059" end="1060" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="5713" sofa="431" begin="1061" end="1063" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1722" sofa="431" begin="1064" end="1073" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6795" sofa="431" begin="1074" end="1076" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="6898" sofa="431" begin="1077" end="1083" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="6972" sofa="431" begin="1084" end="1087" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6263" sofa="431" begin="1088" end="1105" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6656" sofa="431" begin="1105" end="1106" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="9046" sofa="431" begin="1110" end="1115" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9220" sofa="431" begin="1116" end="1124" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="989" sofa="431" begin="1125" end="1127" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9559" sofa="431" begin="1128" end="1136" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="5886" sofa="431" begin="1137" end="1145" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="3902" sofa="431" begin="1146" end="1148" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="2101" sofa="431" begin="1149" end="1152" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="8867" sofa="431" begin="1153" end="1162" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="8224" sofa="431" begin="1162" end="1163" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10680" sofa="431" begin="1163" end="1172" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4646" sofa="431" begin="1173" end="1186" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8784" sofa="431" begin="1187" end="1189" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8032" sofa="431" begin="1190" end="1203" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="885" sofa="431" begin="1204" end="1205" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7771" sofa="431" begin="1205" end="1211" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5606" sofa="431" begin="1212" end="1213" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6201" sofa="431" begin="1213" end="1214" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="8750" sofa="431" begin="1214" end="1215" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4087" sofa="431" begin="1216" end="1219" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="10118" sofa="431" begin="1220" end="1224" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4504" sofa="431" begin="1225" end="1229" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12024" sofa="431" begin="1230" end="1235" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6254" sofa="431" begin="1236" end="1238" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8041" sofa="431" begin="1239" end="1244" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8423" sofa="431" begin="1245" end="1248" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="1019" sofa="431" begin="1249" end="1252" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="10933" sofa="431" begin="1253" end="1259" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VB"/><types:PennBioIEPOSTag xmi:id="7630" sofa="431" begin="1260" end="1273" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="11183" sofa="431" begin="1273" end="1274" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="10905" sofa="431" begin="1275" end="1283" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9098" sofa="431" begin="1284" end="1288" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="11622" sofa="431" begin="1289" end="1291" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5405" sofa="431" begin="1292" end="1305" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4675" sofa="431" begin="1306" end="1315" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="5655" sofa="431" begin="1316" end="1321" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="6812" sofa="431" begin="1322" end="1324" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9577" sofa="431" begin="1325" end="1329" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="4979" sofa="431" begin="1329" end="1330" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="5858" sofa="431" begin="1331" end="1336" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="6758" sofa="431" begin="1337" end="1345" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4457" sofa="431" begin="1346" end="1348" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="6749" sofa="431" begin="1349" end="1352" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1402" sofa="431" begin="1353" end="1362" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="1069" sofa="431" begin="1362" end="1363" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="7067" sofa="431" begin="1363" end="1372" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="5948" sofa="431" begin="1373" end="1386" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3104" sofa="431" begin="1387" end="1389" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3336" sofa="431" begin="1390" end="1395" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11382" sofa="431" begin="1396" end="1397" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9498" sofa="431" begin="1397" end="1403" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="2817" sofa="431" begin="1404" end="1405" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9392" sofa="431" begin="1405" end="1406" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="2653" sofa="431" begin="1406" end="1407" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="11171" sofa="431" begin="1408" end="1411" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="7492" sofa="431" begin="1412" end="1416" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11997" sofa="431" begin="1417" end="1421" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5198" sofa="431" begin="1422" end="1427" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10339" sofa="431" begin="1428" end="1430" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8403" sofa="431" begin="1431" end="1436" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12201" sofa="431" begin="1437" end="1446" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="10136" sofa="431" begin="1447" end="1452" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="6386" sofa="431" begin="1453" end="1454" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9127" sofa="431" begin="1455" end="1459" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="12279" sofa="431" begin="1460" end="1463" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="5591" sofa="431" begin="1464" end="1468" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="12192" sofa="431" begin="1469" end="1471" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10798" sofa="431" begin="1472" end="1485" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10480" sofa="431" begin="1486" end="1491" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="7715" sofa="431" begin="1492" end="1494" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="8823" sofa="431" begin="1495" end="1499" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="4385" sofa="431" begin="1499" end="1500" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="729" sofa="431" begin="1504" end="1509" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1001" sofa="431" begin="1510" end="1517" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="5696" sofa="431" begin="1518" end="1525" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBP"/><types:PennBioIEPOSTag xmi:id="6436" sofa="431" begin="1526" end="1530" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="7024" sofa="431" begin="1531" end="1534" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="2799" sofa="431" begin="1535" end="1547" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="12316" sofa="431" begin="1548" end="1554" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3731" sofa="431" begin="1555" end="1557" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="7340" sofa="431" begin="1558" end="1563" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10810" sofa="431" begin="1563" end="1564" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="5050" sofa="431" begin="1565" end="1569" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3245" sofa="431" begin="1570" end="1574" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1900" sofa="431" begin="1575" end="1577" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="4756" sofa="431" begin="1578" end="1591" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8563" sofa="431" begin="1591" end="1592" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4988" sofa="431" begin="1593" end="1595" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBZ"/><types:PennBioIEPOSTag xmi:id="11966" sofa="431" begin="1596" end="1599" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11916" sofa="431" begin="1600" end="1602" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="2808" sofa="431" begin="1603" end="1613" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="949" sofa="431" begin="1614" end="1616" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="2381" sofa="431" begin="1617" end="1624" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="10178" sofa="431" begin="1625" end="1634" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7349" sofa="431" begin="1635" end="1642" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12291" sofa="431" begin="1643" end="1646" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="4162" sofa="431" begin="1647" end="1648" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="7653" sofa="431" begin="1649" end="1658" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3236" sofa="431" begin="1659" end="1667" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="777" sofa="431" begin="1668" end="1670" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8432" sofa="431" begin="1671" end="1684" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4997" sofa="431" begin="1685" end="1691" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11679" sofa="431" begin="1692" end="1695" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1619" sofa="431" begin="1695" end="1696" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="11547" sofa="431" begin="1697" end="1704" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="11203" sofa="431" begin="1704" end="1705" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="2092" sofa="431" begin="1706" end="1714" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="1585" sofa="431" begin="1715" end="1719" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9883" sofa="431" begin="1720" end="1733" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4341" sofa="431" begin="1733" end="1734" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="615" sofa="431" begin="1735" end="1740" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="EX"/><types:PennBioIEPOSTag xmi:id="6824" sofa="431" begin="1741" end="1744" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="10038" sofa="431" begin="1745" end="1749" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJR"/><types:PennBioIEPOSTag xmi:id="10275" sofa="431" begin="1750" end="1754" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="AFX"/><types:PennBioIEPOSTag xmi:id="1957" sofa="431" begin="1754" end="1755" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="11416" sofa="431" begin="1755" end="1764" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3971" sofa="431" begin="1765" end="1767" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="5461" sofa="431" begin="1768" end="1771" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="5573" sofa="431" begin="1772" end="1780" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="10738" sofa="431" begin="1781" end="1788" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="2601" sofa="431" begin="1789" end="1791" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="682" sofa="431" begin="1792" end="1797" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7999" sofa="431" begin="1798" end="1801" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="9021" sofa="431" begin="1802" end="1812" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="6515" sofa="431" begin="1813" end="1819" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="10351" sofa="431" begin="1820" end="1825" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="AFX"/><types:PennBioIEPOSTag xmi:id="9841" sofa="431" begin="1825" end="1826" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10161" sofa="431" begin="1826" end="1835" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11907" sofa="431" begin="1836" end="1843" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3033" sofa="431" begin="1844" end="1847" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="8112" sofa="431" begin="1848" end="1851" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="1282" sofa="431" begin="1852" end="1858" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="10327" sofa="431" begin="1858" end="1859" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:Lemma xmi:id="6476" sofa="431" begin="0" end="10" value="comparison"/><types:Lemma xmi:id="3749" sofa="431" begin="11" end="13" value="of"/><types:Lemma xmi:id="11958" sofa="431" begin="14" end="17" value="the"/><types:Lemma xmi:id="10615" sofa="431" begin="18" end="25" value="effect"/><types:Lemma xmi:id="3069" sofa="431" begin="26" end="28" value="of"/><types:Lemma xmi:id="9532" sofa="431" begin="29" end="32" value="the"/><types:Lemma xmi:id="11704" sofa="431" begin="33" end="38" value="novel"/><types:Lemma xmi:id="6138" sofa="431" begin="39" end="50" value="vasodilator"/><types:Lemma xmi:id="938" sofa="431" begin="51" end="56" value="FK409"/><types:Lemma xmi:id="9357" sofa="431" begin="57" end="61" value="with"/><types:Lemma xmi:id="11786" sofa="431" begin="62" end="67" value="those"/><types:Lemma xmi:id="2076" sofa="431" begin="68" end="70" value="of"/><types:Lemma xmi:id="1346" sofa="431" begin="71" end="84" value="nitroglycerin"/><types:Lemma xmi:id="6171" sofa="431" begin="85" end="87" value="in"/><types:Lemma xmi:id="6916" sofa="431" begin="88" end="96" value="isolate"/><types:Lemma xmi:id="7590" sofa="431" begin="97" end="105" value="coronary"/><types:Lemma xmi:id="10847" sofa="431" begin="106" end="112" value="artery"/><types:Lemma xmi:id="10170" sofa="431" begin="113" end="115" value="of"/><types:Lemma xmi:id="8768" sofa="431" begin="116" end="119" value="the"/><types:Lemma xmi:id="4235" sofa="431" begin="120" end="123" value="dog"/><types:Lemma xmi:id="6286" sofa="431" begin="123" end="124" value="."/><types:Lemma xmi:id="7372" sofa="431" begin="128" end="131" value="the"/><types:Lemma xmi:id="11341" sofa="431" begin="132" end="144" value="vasorelaxant"/><types:Lemma xmi:id="2284" sofa="431" begin="145" end="152" value="effect"/><types:Lemma xmi:id="3581" sofa="431" begin="153" end="155" value="of"/><types:Lemma xmi:id="8024" sofa="431" begin="156" end="161" value="FK409"/><types:Lemma xmi:id="11049" sofa="431" begin="161" end="162" value=","/><types:Lemma xmi:id="8050" sofa="431" begin="163" end="164" value="a"/><types:Lemma xmi:id="9490" sofa="431" begin="165" end="168" value="new"/><types:Lemma xmi:id="7662" sofa="431" begin="169" end="185" value="nitrovasodilator"/><types:Lemma xmi:id="3823" sofa="431" begin="186" end="197" value="synthesize"/><types:Lemma xmi:id="11899" sofa="431" begin="198" end="202" value="from"/><types:Lemma xmi:id="2747" sofa="431" begin="203" end="204" value="a"/><types:Lemma xmi:id="2167" sofa="431" begin="205" end="214" value="microbial"/><types:Lemma xmi:id="4862" sofa="431" begin="215" end="222" value="product"/><types:Lemma xmi:id="2175" sofa="431" begin="222" end="223" value=","/><types:Lemma xmi:id="9457" sofa="431" begin="224" end="228" value="be"/><types:Lemma xmi:id="5326" sofa="431" begin="229" end="237" value="compare"/><types:Lemma xmi:id="8465" sofa="431" begin="238" end="242" value="with"/><types:Lemma xmi:id="8599" sofa="431" begin="243" end="248" value="those"/><types:Lemma xmi:id="11528" sofa="431" begin="249" end="251" value="of"/><types:Lemma xmi:id="2662" sofa="431" begin="252" end="265" value="nitroglycerin"/><types:Lemma xmi:id="11671" sofa="431" begin="266" end="268" value="in"/><types:Lemma xmi:id="4838" sofa="431" begin="269" end="277" value="isolate"/><types:Lemma xmi:id="5022" sofa="431" begin="278" end="286" value="coronary"/><types:Lemma xmi:id="10284" sofa="431" begin="287" end="293" value="artery"/><types:Lemma xmi:id="5275" sofa="431" begin="294" end="299" value="ring"/><types:Lemma xmi:id="4397" sofa="431" begin="300" end="302" value="of"/><types:Lemma xmi:id="7582" sofa="431" begin="303" end="306" value="the"/><types:Lemma xmi:id="3850" sofa="431" begin="307" end="310" value="dog"/><types:Lemma xmi:id="2540" sofa="431" begin="311" end="321" value="contract"/><types:Lemma xmi:id="10008" sofa="431" begin="322" end="326" value="with"/><types:Lemma xmi:id="6725" sofa="431" begin="327" end="333" value="U46619"/><types:Lemma xmi:id="8812" sofa="431" begin="334" end="335" value="("/><types:Lemma xmi:id="10495" sofa="431" begin="335" end="341" value="10(-7)"/><types:Lemma xmi:id="691" sofa="431" begin="342" end="343" value="m"/><types:Lemma xmi:id="10110" sofa="431" begin="343" end="344" value=")"/><types:Lemma xmi:id="7446" sofa="431" begin="344" end="345" value="."/><types:Lemma xmi:id="11747" sofa="431" begin="349" end="354" value="FK409"/><types:Lemma xmi:id="8832" sofa="431" begin="355" end="356" value="("/><types:Lemma xmi:id="6347" sofa="431" begin="356" end="362" value="10(-11"/><types:Lemma xmi:id="4603" sofa="431" begin="362" end="363" value=")"/><types:Lemma xmi:id="6981" sofa="431" begin="363" end="364" value="-"/><types:Lemma xmi:id="12090" sofa="431" begin="364" end="370" value="10(-5)"/><types:Lemma xmi:id="11739" sofa="431" begin="371" end="372" value="m"/><types:Lemma xmi:id="6701" sofa="431" begin="372" end="373" value=")"/><types:Lemma xmi:id="6034" sofa="431" begin="374" end="377" value="and"/><types:Lemma xmi:id="3806" sofa="431" begin="378" end="391" value="nitroglycerin"/><types:Lemma xmi:id="6042" sofa="431" begin="392" end="393" value="("/><types:Lemma xmi:id="10447" sofa="431" begin="393" end="398" value="10(-9"/><types:Lemma xmi:id="5940" sofa="431" begin="398" end="399" value=")"/><types:Lemma xmi:id="10639" sofa="431" begin="399" end="400" value="-"/><types:Lemma xmi:id="9775" sofa="431" begin="400" end="406" value="10(-4)"/><types:Lemma xmi:id="9449" sofa="431" begin="407" end="408" value="m"/><types:Lemma xmi:id="11536" sofa="431" begin="408" end="409" value=")"/><types:Lemma xmi:id="9718" sofa="431" begin="410" end="414" value="each"/><types:Lemma xmi:id="10965" sofa="431" begin="415" end="423" value="produce"/><types:Lemma xmi:id="2584" sofa="431" begin="424" end="425" value="a"/><types:Lemma xmi:id="8165" sofa="431" begin="426" end="439" value="concentration"/><types:Lemma xmi:id="11556" sofa="431" begin="439" end="440" value="-"/><types:Lemma xmi:id="7269" sofa="431" begin="440" end="449" value="dependent"/><types:Lemma xmi:id="9751" sofa="431" begin="450" end="460" value="relaxation"/><types:Lemma xmi:id="5103" sofa="431" begin="460" end="461" value="."/><types:Lemma xmi:id="1310" sofa="431" begin="462" end="472" value="comparison"/><types:Lemma xmi:id="8776" sofa="431" begin="473" end="475" value="of"/><types:Lemma xmi:id="1028" sofa="431" begin="476" end="480" value="EC50"/><types:Lemma xmi:id="9850" sofa="431" begin="481" end="487" value="value"/><types:Lemma xmi:id="3295" sofa="431" begin="488" end="494" value="show"/><types:Lemma xmi:id="5990" sofa="431" begin="495" end="499" value="that"/><types:Lemma xmi:id="11212" sofa="431" begin="500" end="505" value="FK409"/><types:Lemma xmi:id="607" sofa="431" begin="506" end="509" value="be"/><types:Lemma xmi:id="6082" sofa="431" begin="510" end="515" value="about"/><types:Lemma xmi:id="1925" sofa="431" begin="516" end="518" value="25"/><types:Lemma xmi:id="7960" sofa="431" begin="519" end="524" value="time"/><types:Lemma xmi:id="4554" sofa="431" begin="525" end="529" value="much"/><types:Lemma xmi:id="9586" sofa="431" begin="530" end="536" value="potent"/><types:Lemma xmi:id="1791" sofa="431" begin="537" end="541" value="than"/><types:Lemma xmi:id="2417" sofa="431" begin="542" end="555" value="nitroglycerin"/><types:Lemma xmi:id="8536" sofa="431" begin="555" end="556" value="."/><types:Lemma xmi:id="9384" sofa="431" begin="560" end="570" value="submaximum"/><types:Lemma xmi:id="6665" sofa="431" begin="571" end="585" value="concentration"/><types:Lemma xmi:id="3438" sofa="431" begin="586" end="588" value="of"/><types:Lemma xmi:id="10556" sofa="431" begin="589" end="602" value="nitroglycerin"/><types:Lemma xmi:id="7196" sofa="431" begin="603" end="604" value="("/><types:Lemma xmi:id="10251" sofa="431" begin="604" end="610" value="10(-6)"/><types:Lemma xmi:id="7688" sofa="431" begin="611" end="612" value="m"/><types:Lemma xmi:id="919" sofa="431" begin="612" end="613" value=")"/><types:Lemma xmi:id="6612" sofa="431" begin="614" end="617" value="and"/><types:Lemma xmi:id="10831" sofa="431" begin="618" end="623" value="FK409"/><types:Lemma xmi:id="10989" sofa="431" begin="624" end="625" value="("/><types:Lemma xmi:id="7127" sofa="431" begin="625" end="626" value="3"/><types:Lemma xmi:id="9212" sofa="431" begin="627" end="628" value="x"/><types:Lemma xmi:id="10779" sofa="431" begin="629" end="635" value="10(-8)"/><types:Lemma xmi:id="1708" sofa="431" begin="636" end="637" value="m"/><types:Lemma xmi:id="4932" sofa="431" begin="637" end="638" value=")"/><types:Lemma xmi:id="8008" sofa="431" begin="639" end="647" value="elevate"/><types:Lemma xmi:id="11882" sofa="431" begin="648" end="657" value="guanosine"/><types:Lemma xmi:id="11260" sofa="431" begin="658" end="670" value="3':5'-cyclic"/><types:Lemma xmi:id="8856" sofa="431" begin="671" end="684" value="monophosphate"/><types:Lemma xmi:id="3225" sofa="431" begin="685" end="686" value="("/><types:Lemma xmi:id="1949" sofa="431" begin="686" end="692" value="cyclic"/><types:Lemma xmi:id="2642" sofa="431" begin="693" end="696" value="gmp"/><types:Lemma xmi:id="2681" sofa="431" begin="696" end="697" value=")"/><types:Lemma xmi:id="11305" sofa="431" begin="698" end="704" value="level"/><types:Lemma xmi:id="11014" sofa="431" begin="704" end="705" value=","/><types:Lemma xmi:id="11236" sofa="431" begin="706" end="713" value="effect"/><types:Lemma xmi:id="10839" sofa="431" begin="714" end="724" value="associate"/><types:Lemma xmi:id="4592" sofa="431" begin="725" end="729" value="with"/><types:Lemma xmi:id="11614" sofa="431" begin="730" end="744" value="vasorelaxation"/><types:Lemma xmi:id="2739" sofa="431" begin="744" end="745" value="."/><types:Lemma xmi:id="4846" sofa="431" begin="746" end="755" value="adenosine"/><types:Lemma xmi:id="12210" sofa="431" begin="756" end="768" value="3':5'-cyclic"/><types:Lemma xmi:id="8395" sofa="431" begin="769" end="782" value="monophosphate"/><types:Lemma xmi:id="10948" sofa="431" begin="783" end="784" value="("/><types:Lemma xmi:id="11066" sofa="431" begin="784" end="790" value="cyclic"/><types:Lemma xmi:id="10423" sofa="431" begin="791" end="794" value="amp"/><types:Lemma xmi:id="8591" sofa="431" begin="794" end="795" value=")"/><types:Lemma xmi:id="1700" sofa="431" begin="796" end="802" value="level"/><types:Lemma xmi:id="9330" sofa="431" begin="803" end="807" value="be"/><types:Lemma xmi:id="750" sofa="431" begin="808" end="818" value="unaffected"/><types:Lemma xmi:id="9759" sofa="431" begin="818" end="819" value="."/><types:Lemma xmi:id="5470" sofa="431" begin="823" end="826" value="the"/><types:Lemma xmi:id="11975" sofa="431" begin="827" end="840" value="concentration"/><types:Lemma xmi:id="12440" sofa="431" begin="840" end="841" value="-"/><types:Lemma xmi:id="7050" sofa="431" begin="841" end="851" value="relaxation"/><types:Lemma xmi:id="10997" sofa="431" begin="852" end="858" value="curve"/><types:Lemma xmi:id="9087" sofa="431" begin="859" end="862" value="for"/><types:Lemma xmi:id="5557" sofa="431" begin="863" end="876" value="nitroglycerin"/><types:Lemma xmi:id="1689" sofa="431" begin="877" end="880" value="and"/><types:Lemma xmi:id="5705" sofa="431" begin="881" end="886" value="FK409"/><types:Lemma xmi:id="2113" sofa="431" begin="887" end="891" value="be"/><types:Lemma xmi:id="9284" sofa="431" begin="892" end="899" value="shift"/><types:Lemma xmi:id="5111" sofa="431" begin="900" end="902" value="to"/><types:Lemma xmi:id="1036" sofa="431" begin="903" end="906" value="the"/><types:Lemma xmi:id="2038" sofa="431" begin="907" end="912" value="right"/><types:Lemma xmi:id="7185" sofa="431" begin="913" end="915" value="by"/><types:Lemma xmi:id="9802" sofa="431" begin="916" end="925" value="methylene"/><types:Lemma xmi:id="652" sofa="431" begin="926" end="930" value="blue"/><types:Lemma xmi:id="7438" sofa="431" begin="931" end="932" value="("/><types:Lemma xmi:id="6804" sofa="431" begin="932" end="933" value="3"/><types:Lemma xmi:id="6948" sofa="431" begin="934" end="935" value="x"/><types:Lemma xmi:id="7824" sofa="431" begin="936" end="942" value="10(-6)"/><types:Lemma xmi:id="1972" sofa="431" begin="943" end="944" value="-"/><types:Lemma xmi:id="2826" sofa="431" begin="945" end="948" value="3 x"/><types:Lemma xmi:id="1259" sofa="431" begin="949" end="955" value="10(-5)"/><types:Lemma xmi:id="11874" sofa="431" begin="956" end="957" value="m"/><types:Lemma xmi:id="11989" sofa="431" begin="957" end="958" value=")"/><types:Lemma xmi:id="2634" sofa="431" begin="958" end="959" value=","/><types:Lemma xmi:id="8958" sofa="431" begin="960" end="962" value="a"/><types:Lemma xmi:id="2548" sofa="431" begin="963" end="972" value="inhibitor"/><types:Lemma xmi:id="12173" sofa="431" begin="973" end="975" value="of"/><types:Lemma xmi:id="6007" sofa="431" begin="976" end="983" value="soluble"/><types:Lemma xmi:id="1430" sofa="431" begin="984" end="993" value="guanylate"/><types:Lemma xmi:id="4079" sofa="431" begin="994" end="1001" value="cyclase"/><types:Lemma xmi:id="1326" sofa="431" begin="1001" end="1002" value=","/><types:Lemma xmi:id="10022" sofa="431" begin="1003" end="1006" value="and"/><types:Lemma xmi:id="3345" sofa="431" begin="1007" end="1009" value="to"/><types:Lemma xmi:id="5478" sofa="431" begin="1010" end="1013" value="the"/><types:Lemma xmi:id="1873" sofa="431" begin="1014" end="1018" value="left"/><types:Lemma xmi:id="2030" sofa="431" begin="1019" end="1021" value="by"/><types:Lemma xmi:id="7642" sofa="431" begin="1022" end="1031" value="M&amp;B22,948"/><types:Lemma xmi:id="8368" sofa="431" begin="1032" end="1033" value="("/><types:Lemma xmi:id="3545" sofa="431" begin="1033" end="1034" value="3"/><types:Lemma xmi:id="6595" sofa="431" begin="1035" end="1036" value="x"/><types:Lemma xmi:id="7042" sofa="431" begin="1037" end="1043" value="10(-6)"/><types:Lemma xmi:id="5841" sofa="431" begin="1044" end="1045" value="-"/><types:Lemma xmi:id="3061" sofa="431" begin="1046" end="1049" value="3 x"/><types:Lemma xmi:id="5030" sofa="431" begin="1050" end="1056" value="10(-5)"/><types:Lemma xmi:id="5647" sofa="431" begin="1057" end="1058" value="m"/><types:Lemma xmi:id="4466" sofa="431" begin="1058" end="1059" value=")"/><types:Lemma xmi:id="2084" sofa="431" begin="1059" end="1060" value=","/><types:Lemma xmi:id="9858" sofa="431" begin="1061" end="1063" value="a"/><types:Lemma xmi:id="1251" sofa="431" begin="1064" end="1073" value="inhibitor"/><types:Lemma xmi:id="5163" sofa="431" begin="1074" end="1076" value="of"/><types:Lemma xmi:id="7541" sofa="431" begin="1077" end="1083" value="cyclic"/><types:Lemma xmi:id="12448" sofa="431" begin="1084" end="1087" value="gmp"/><types:Lemma xmi:id="3790" sofa="431" begin="1088" end="1105" value="phosphodiesterase"/><types:Lemma xmi:id="4243" sofa="431" begin="1105" end="1106" value="."/><types:Lemma xmi:id="10689" sofa="431" begin="1110" end="1115" value="after"/><types:Lemma xmi:id="12181" sofa="431" begin="1116" end="1124" value="exposure"/><types:Lemma xmi:id="4971" sofa="431" begin="1125" end="1127" value="of"/><types:Lemma xmi:id="7813" sofa="431" begin="1128" end="1136" value="coronary"/><types:Lemma xmi:id="5833" sofa="431" begin="1137" end="1145" value="artery"/><types:Lemma xmi:id="4584" sofa="431" begin="1146" end="1148" value="to"/><types:Lemma xmi:id="9907" sofa="431" begin="1149" end="1152" value="the"/><types:Lemma xmi:id="11425" sofa="431" begin="1153" end="1162" value="maximal"/><types:Lemma xmi:id="5549" sofa="431" begin="1162" end="1163" value="-"/><types:Lemma xmi:id="12356" sofa="431" begin="1163" end="1172" value="effective"/><types:Lemma xmi:id="5976" sofa="431" begin="1173" end="1186" value="concentration"/><types:Lemma xmi:id="6629" sofa="431" begin="1187" end="1189" value="of"/><types:Lemma xmi:id="9959" sofa="431" begin="1190" end="1203" value="nitroglycerin"/><types:Lemma xmi:id="7549" sofa="431" begin="1204" end="1205" value="("/><types:Lemma xmi:id="10771" sofa="431" begin="1205" end="1211" value="10(-4)"/><types:Lemma xmi:id="2068" sofa="431" begin="1212" end="1213" value="m"/><types:Lemma xmi:id="3487" sofa="431" begin="1213" end="1214" value=")"/><types:Lemma xmi:id="2513" sofa="431" begin="1214" end="1215" value=","/><types:Lemma xmi:id="1731" sofa="431" begin="1216" end="1219" value="the"/><types:Lemma xmi:id="1489" sofa="431" begin="1220" end="1224" value="mean"/><types:Lemma xmi:id="9767" sofa="431" begin="1225" end="1229" value="EC50"/><types:Lemma xmi:id="8328" sofa="431" begin="1230" end="1235" value="value"/><types:Lemma xmi:id="3200" sofa="431" begin="1236" end="1238" value="of"/><types:Lemma xmi:id="6874" sofa="431" begin="1239" end="1244" value="FK409"/><types:Lemma xmi:id="3798" sofa="431" begin="1245" end="1248" value="do"/><types:Lemma xmi:id="10747" sofa="431" begin="1249" end="1252" value="not"/><types:Lemma xmi:id="9951" sofa="431" begin="1253" end="1259" value="change"/><types:Lemma xmi:id="9507" sofa="431" begin="1260" end="1273" value="significant"/><types:Lemma xmi:id="8181" sofa="431" begin="1273" end="1274" value=","/><types:Lemma xmi:id="6493" sofa="431" begin="1275" end="1283" value="although"/><types:Lemma xmi:id="3638" sofa="431" begin="1284" end="1288" value="that"/><types:Lemma xmi:id="7900" sofa="431" begin="1289" end="1291" value="of"/><types:Lemma xmi:id="3455" sofa="431" begin="1292" end="1305" value="nitroglycerin"/><types:Lemma xmi:id="1354" sofa="431" begin="1306" end="1315" value="increase"/><types:Lemma xmi:id="4071" sofa="431" begin="1316" end="1321" value="about"/><types:Lemma xmi:id="4138" sofa="431" begin="1322" end="1324" value="60"/><types:Lemma xmi:id="2292" sofa="431" begin="1325" end="1329" value="fold"/><types:Lemma xmi:id="5375" sofa="431" begin="1329" end="1330" value="."/><types:Lemma xmi:id="8876" sofa="431" begin="1331" end="1336" value="after"/><types:Lemma xmi:id="2057" sofa="431" begin="1337" end="1345" value="exposure"/><types:Lemma xmi:id="8918" sofa="431" begin="1346" end="1348" value="to"/><types:Lemma xmi:id="4227" sofa="431" begin="1349" end="1352" value="the"/><types:Lemma xmi:id="9866" sofa="431" begin="1353" end="1362" value="maximal"/><types:Lemma xmi:id="8173" sofa="431" begin="1362" end="1363" value="-"/><types:Lemma xmi:id="4490" sofa="431" begin="1363" end="1372" value="effective"/><types:Lemma xmi:id="4664" sofa="431" begin="1373" end="1386" value="concentration"/><types:Lemma xmi:id="632" sofa="431" begin="1387" end="1389" value="of"/><types:Lemma xmi:id="10030" sofa="431" begin="1390" end="1395" value="FK409"/><types:Lemma xmi:id="966" sofa="431" begin="1396" end="1397" value="("/><types:Lemma xmi:id="3676" sofa="431" begin="1397" end="1403" value="10(-5)"/><types:Lemma xmi:id="5513" sofa="431" begin="1404" end="1405" value="m"/><types:Lemma xmi:id="10065" sofa="431" begin="1405" end="1406" value=")"/><types:Lemma xmi:id="4854" sofa="431" begin="1406" end="1407" value=","/><types:Lemma xmi:id="8457" sofa="431" begin="1408" end="1411" value="the"/><types:Lemma xmi:id="10787" sofa="431" begin="1412" end="1416" value="mean"/><types:Lemma xmi:id="9915" sofa="431" begin="1417" end="1421" value="EC50"/><types:Lemma xmi:id="1631" sofa="431" begin="1422" end="1427" value="value"/><types:Lemma xmi:id="3630" sofa="431" begin="1428" end="1430" value="of"/><types:Lemma xmi:id="10573" sofa="431" begin="1431" end="1436" value="FK409"/><types:Lemma xmi:id="1291" sofa="431" begin="1437" end="1446" value="increase"/><types:Lemma xmi:id="11712" sofa="431" begin="1447" end="1452" value="about"/><types:Lemma xmi:id="2211" sofa="431" begin="1453" end="1454" value="6"/><types:Lemma xmi:id="3132" sofa="431" begin="1455" end="1459" value="fold"/><types:Lemma xmi:id="1856" sofa="431" begin="1460" end="1463" value="and"/><types:Lemma xmi:id="8412" sofa="431" begin="1464" end="1468" value="that"/><types:Lemma xmi:id="5913" sofa="431" begin="1469" end="1471" value="of"/><types:Lemma xmi:id="6507" sofa="431" begin="1472" end="1485" value="nitroglycerin"/><types:Lemma xmi:id="1681" sofa="431" begin="1486" end="1491" value="about"/><types:Lemma xmi:id="4114" sofa="431" begin="1492" end="1494" value="11"/><types:Lemma xmi:id="8723" sofa="431" begin="1495" end="1499" value="fold"/><types:Lemma xmi:id="2943" sofa="431" begin="1499" end="1500" value="."/><types:Lemma xmi:id="1380" sofa="431" begin="1504" end="1509" value="these"/><types:Lemma xmi:id="8016" sofa="431" begin="1510" end="1517" value="result"/><types:Lemma xmi:id="1658" sofa="431" begin="1518" end="1525" value="suggest"/><types:Lemma xmi:id="3167" sofa="431" begin="1526" end="1530" value="that"/><types:Lemma xmi:id="8292" sofa="431" begin="1531" end="1534" value="the"/><types:Lemma xmi:id="1758" sofa="431" begin="1535" end="1547" value="vasorelaxant"/><types:Lemma xmi:id="5190" sofa="431" begin="1548" end="1554" value="effect"/><types:Lemma xmi:id="6709" sofa="431" begin="1555" end="1557" value="of"/><types:Lemma xmi:id="5565" sofa="431" begin="1558" end="1563" value="FK409"/><types:Lemma xmi:id="4333" sofa="431" begin="1563" end="1564" value=","/><types:Lemma xmi:id="4368" sofa="431" begin="1565" end="1569" value="like"/><types:Lemma xmi:id="9245" sofa="431" begin="1570" end="1574" value="that"/><types:Lemma xmi:id="5631" sofa="431" begin="1575" end="1577" value="of"/><types:Lemma xmi:id="9030" sofa="431" begin="1578" end="1591" value="nitroglycerin"/><types:Lemma xmi:id="7501" sofa="431" begin="1591" end="1592" value=","/><types:Lemma xmi:id="3479" sofa="431" begin="1593" end="1595" value="be"/><types:Lemma xmi:id="2046" sofa="431" begin="1596" end="1599" value="due"/><types:Lemma xmi:id="6540" sofa="431" begin="1600" end="1602" value="to"/><types:Lemma xmi:id="3140" sofa="431" begin="1603" end="1613" value="activation"/><types:Lemma xmi:id="2251" sofa="431" begin="1614" end="1616" value="of"/><types:Lemma xmi:id="12268" sofa="431" begin="1617" end="1624" value="soluble"/><types:Lemma xmi:id="930" sofa="431" begin="1625" end="1634" value="guanylate"/><types:Lemma xmi:id="3208" sofa="431" begin="1635" end="1642" value="cyclase"/><types:Lemma xmi:id="3306" sofa="431" begin="1643" end="1646" value="and"/><types:Lemma xmi:id="1243" sofa="431" begin="1647" end="1648" value="a"/><types:Lemma xmi:id="5417" sofa="431" begin="1649" end="1658" value="resultant"/><types:Lemma xmi:id="11812" sofa="431" begin="1659" end="1667" value="increase"/><types:Lemma xmi:id="10663" sofa="431" begin="1668" end="1670" value="in"/><types:Lemma xmi:id="7229" sofa="431" begin="1671" end="1684" value="intracellular"/><types:Lemma xmi:id="1318" sofa="431" begin="1685" end="1691" value="cyclic"/><types:Lemma xmi:id="11192" sofa="431" begin="1692" end="1695" value="gmp"/><types:Lemma xmi:id="2673" sofa="431" begin="1695" end="1696" value="."/><types:Lemma xmi:id="1388" sofa="431" begin="1697" end="1704" value="however"/><types:Lemma xmi:id="6275" sofa="431" begin="1704" end="1705" value=","/><types:Lemma xmi:id="9515" sofa="431" begin="1706" end="1714" value="compare"/><types:Lemma xmi:id="11022" sofa="431" begin="1715" end="1719" value="with"/><types:Lemma xmi:id="5502" sofa="431" begin="1720" end="1733" value="nitroglycerin"/><types:Lemma xmi:id="7789" sofa="431" begin="1733" end="1734" value=","/><types:Lemma xmi:id="624" sofa="431" begin="1735" end="1740" value="there"/><types:Lemma xmi:id="2850" sofa="431" begin="1741" end="1744" value="be"/><types:Lemma xmi:id="6924" sofa="431" begin="1745" end="1749" value="little"/><types:Lemma xmi:id="2127" sofa="431" begin="1750" end="1754" value="self"/><types:Lemma xmi:id="9110" sofa="431" begin="1754" end="1755" value="-"/><types:Lemma xmi:id="7927" sofa="431" begin="1755" end="1764" value="tolerance"/><types:Lemma xmi:id="958" sofa="431" begin="1765" end="1767" value="to"/><types:Lemma xmi:id="8610" sofa="431" begin="1768" end="1771" value="the"/><types:Lemma xmi:id="5235" sofa="431" begin="1772" end="1780" value="relaxant"/><types:Lemma xmi:id="4905" sofa="431" begin="1781" end="1788" value="effect"/><types:Lemma xmi:id="1299" sofa="431" begin="1789" end="1791" value="of"/><types:Lemma xmi:id="3646" sofa="431" begin="1792" end="1797" value="FK409"/><types:Lemma xmi:id="11468" sofa="431" begin="1798" end="1801" value="and"/><types:Lemma xmi:id="4171" sofa="431" begin="1802" end="1812" value="relative"/><types:Lemma xmi:id="7364" sofa="431" begin="1813" end="1819" value="little"/><types:Lemma xmi:id="5639" sofa="431" begin="1820" end="1825" value="cross"/><types:Lemma xmi:id="6717" sofa="431" begin="1825" end="1826" value="-"/><types:Lemma xmi:id="7427" sofa="431" begin="1826" end="1835" value="tolerance"/><types:Lemma xmi:id="699" sofa="431" begin="1836" end="1843" value="between"/><types:Lemma xmi:id="10234" sofa="431" begin="1844" end="1847" value="the"/><types:Lemma xmi:id="8982" sofa="431" begin="1848" end="1851" value="two"/><types:Lemma xmi:id="9038" sofa="431" begin="1852" end="1858" value="agent"/><types:Lemma xmi:id="11504" sofa="431" begin="1858" end="1859" value="."/><types:Token xmi:id="10292" sofa="431" begin="0" end="10" componentId="JULIE Token Boundary Detector" id="1" lemma="6476" posTag="3847"/><types:Token xmi:id="3911" sofa="431" begin="11" end="13" componentId="JULIE Token Boundary Detector" id="2" lemma="3749" posTag="4956"/><types:Token xmi:id="5801" sofa="431" begin="14" end="17" componentId="JULIE Token Boundary Detector" id="3" lemma="11958" posTag="12189"/><types:Token xmi:id="4146" sofa="431" begin="18" end="25" componentId="JULIE Token Boundary Detector" id="4" lemma="10615" posTag="11986"/><types:Token xmi:id="5359" sofa="431" begin="26" end="28" componentId="JULIE Token Boundary Detector" id="5" lemma="3069" posTag="8864"/><types:Token xmi:id="8189" sofa="431" begin="29" end="32" componentId="JULIE Token Boundary Detector" id="6" lemma="9532" posTag="3333"/><types:Token xmi:id="9735" sofa="431" begin="33" end="38" componentId="JULIE Token Boundary Detector" id="7" lemma="11704" posTag="1669"/><types:Token xmi:id="3529" sofa="431" begin="39" end="50" componentId="JULIE Token Boundary Detector" id="8" lemma="6138" posTag="1966"/><types:Token xmi:id="7509" sofa="431" begin="51" end="56" componentId="JULIE Token Boundary Detector" id="9" lemma="938" posTag="7358"/><types:Token xmi:id="6576" sofa="431" begin="57" end="61" componentId="JULIE Token Boundary Detector" id="10" lemma="9357" posTag="6592"/><types:Token xmi:id="8208" sofa="431" begin="62" end="67" componentId="JULIE Token Boundary Detector" id="11" lemma="11786" posTag="3517"/><types:Token xmi:id="7755" sofa="431" begin="68" end="70" componentId="JULIE Token Boundary Detector" id="12" lemma="2076" posTag="11631"/><types:Token xmi:id="2444" sofa="431" begin="71" end="84" componentId="JULIE Token Boundary Detector" id="13" lemma="1346" posTag="9822"/><types:Token xmi:id="2755" sofa="431" begin="85" end="87" componentId="JULIE Token Boundary Detector" id="14" lemma="6171" posTag="3024"/><types:Token xmi:id="3148" sofa="431" begin="88" end="96" componentId="JULIE Token Boundary Detector" id="15" lemma="6916" posTag="6195"/><types:Token xmi:id="7935" sofa="431" begin="97" end="105" componentId="JULIE Token Boundary Detector" id="16" lemma="7590" posTag="998"/><types:Token xmi:id="3861" sofa="431" begin="106" end="112" componentId="JULIE Token Boundary Detector" id="17" lemma="10847" posTag="8793"/><types:Token xmi:id="7525" sofa="431" begin="113" end="115" componentId="JULIE Token Boundary Detector" id="18" lemma="10170" posTag="6229"/><types:Token xmi:id="2259" sofa="431" begin="116" end="119" componentId="JULIE Token Boundary Detector" id="19" lemma="8768" posTag="1531"/><types:Token xmi:id="1569" sofa="431" begin="120" end="123" componentId="JULIE Token Boundary Detector" id="20" lemma="4235" posTag="786"/><types:Token xmi:id="4301" sofa="431" begin="123" end="124" componentId="JULIE Token Boundary Detector" id="21" lemma="6286" posTag="7473"/><types:Token xmi:id="10431" sofa="431" begin="128" end="131" componentId="JULIE Token Boundary Detector" id="22" lemma="7372" posTag="1655"/><types:Token xmi:id="12074" sofa="431" begin="132" end="144" componentId="JULIE Token Boundary Detector" id="23" lemma="11341" posTag="8109"/><types:Token xmi:id="12415" sofa="431" begin="145" end="152" componentId="JULIE Token Boundary Detector" id="24" lemma="2284" posTag="7832"/><types:Token xmi:id="9474" sofa="431" begin="153" end="155" componentId="JULIE Token Boundary Detector" id="25" lemma="3581" posTag="3113"/><types:Token xmi:id="9658" sofa="431" begin="156" end="161" componentId="JULIE Token Boundary Detector" id="26" lemma="8024" posTag="6364"/><types:Token xmi:id="10360" sofa="431" begin="161" end="162" componentId="JULIE Token Boundary Detector" id="27" lemma="11049" posTag="7325"/><types:Token xmi:id="9983" sofa="431" begin="163" end="164" componentId="JULIE Token Boundary Detector" id="28" lemma="8050" posTag="6548"/><types:Token xmi:id="4211" sofa="431" begin="165" end="168" componentId="JULIE Token Boundary Detector" id="29" lemma="9490" posTag="9206"/><types:Token xmi:id="4179" sofa="431" begin="169" end="185" componentId="JULIE Token Boundary Detector" id="30" lemma="7662" posTag="11090"/><types:Token xmi:id="4806" sofa="431" begin="186" end="197" componentId="JULIE Token Boundary Detector" id="31" lemma="3823" posTag="1177"/><types:Token xmi:id="5957" sofa="431" begin="198" end="202" componentId="JULIE Token Boundary Detector" id="32" lemma="11899" posTag="4562"/><types:Token xmi:id="3008" sofa="431" begin="203" end="204" componentId="JULIE Token Boundary Detector" id="33" lemma="2747" posTag="7984"/><types:Token xmi:id="663" sofa="431" begin="205" end="214" componentId="JULIE Token Boundary Detector" id="34" lemma="2167" posTag="10807"/><types:Token xmi:id="2521" sofa="431" begin="215" end="222" componentId="JULIE Token Boundary Detector" id="35" lemma="4862" posTag="10062"/><types:Token xmi:id="10259" sofa="431" begin="222" end="223" componentId="JULIE Token Boundary Detector" id="36" lemma="2175" posTag="10883"/><types:Token xmi:id="3831" sofa="431" begin="224" end="228" componentId="JULIE Token Boundary Detector" id="37" lemma="9457" posTag="2610"/><types:Token xmi:id="4940" sofa="431" begin="229" end="237" componentId="JULIE Token Boundary Detector" id="38" lemma="5326" posTag="2411"/><types:Token xmi:id="2960" sofa="431" begin="238" end="242" componentId="JULIE Token Boundary Detector" id="39" lemma="8465" posTag="9168"/><types:Token xmi:id="9967" sofa="431" begin="243" end="248" componentId="JULIE Token Boundary Detector" id="40" lemma="8599" posTag="2065"/><types:Token xmi:id="2151" sofa="431" begin="249" end="251" componentId="JULIE Token Boundary Detector" id="41" lemma="11528" posTag="707"/><types:Token xmi:id="5171" sofa="431" begin="252" end="265" componentId="JULIE Token Boundary Detector" id="42" lemma="2662" posTag="3654"/><types:Token xmi:id="10073" sofa="431" begin="266" end="268" componentId="JULIE Token Boundary Detector" id="43" lemma="11671" posTag="10056"/><types:Token xmi:id="8077" sofa="431" begin="269" end="277" componentId="JULIE Token Boundary Detector" id="44" lemma="4838" posTag="11937"/><types:Token xmi:id="5119" sofa="431" begin="278" end="286" componentId="JULIE Token Boundary Detector" id="45" lemma="5022" posTag="1969"/><types:Token xmi:id="8441" sofa="431" begin="287" end="293" componentId="JULIE Token Boundary Detector" id="46" lemma="10284" posTag="4734"/><types:Token xmi:id="8093" sofa="431" begin="294" end="299" componentId="JULIE Token Boundary Detector" id="47" lemma="5275" posTag="4672"/><types:Token xmi:id="5722" sofa="431" begin="300" end="302" componentId="JULIE Token Boundary Detector" id="48" lemma="4397" posTag="6198"/><types:Token xmi:id="8058" sofa="431" begin="303" end="306" componentId="JULIE Token Boundary Detector" id="49" lemma="7582" posTag="4454"/><types:Token xmi:id="2309" sofa="431" begin="307" end="310" componentId="JULIE Token Boundary Detector" id="50" lemma="3850" posTag="4420"/><types:Token xmi:id="1212" sofa="431" begin="311" end="321" componentId="JULIE Token Boundary Detector" id="51" lemma="2540" posTag="8205"/><types:Token xmi:id="9229" sofa="431" begin="322" end="326" componentId="JULIE Token Boundary Detector" id="52" lemma="10008" posTag="6328"/><types:Token xmi:id="1158" sofa="431" begin="327" end="333" componentId="JULIE Token Boundary Detector" id="53" lemma="6725" posTag="3042"/><types:Token xmi:id="3254" sofa="431" begin="334" end="335" componentId="JULIE Token Boundary Detector" id="54" lemma="8812" posTag="833"/><types:Token xmi:id="5664" sofa="431" begin="335" end="341" componentId="JULIE Token Boundary Detector" id="55" lemma="10495" posTag="8514"/><types:Token xmi:id="1909" sofa="431" begin="342" end="343" componentId="JULIE Token Boundary Detector" id="56" lemma="691" posTag="7740"/><types:Token xmi:id="2783" sofa="431" begin="343" end="344" componentId="JULIE Token Boundary Detector" id="57" lemma="10110" posTag="10819"/><types:Token xmi:id="10376" sofa="431" begin="344" end="345" componentId="JULIE Token Boundary Detector" id="58" lemma="7446" posTag="5084"/><types:Token xmi:id="2219" sofa="431" begin="349" end="354" componentId="JULIE Token Boundary Detector" id="59" lemma="11747" posTag="11030"/><types:Token xmi:id="3495" sofa="431" begin="355" end="356" componentId="JULIE Token Boundary Detector" id="60" lemma="8832" posTag="11774"/><types:Token xmi:id="6420" sofa="431" begin="356" end="362" componentId="JULIE Token Boundary Detector" id="61" lemma="6347" posTag="12456"/><types:Token xmi:id="1414" sofa="431" begin="362" end="363" componentId="JULIE Token Boundary Detector" id="62" lemma="4603" posTag="11952"/><types:Token xmi:id="10623" sofa="431" begin="363" end="364" componentId="JULIE Token Boundary Detector" id="63" lemma="6981" posTag="12475"/><types:Token xmi:id="7095" sofa="431" begin="364" end="370" componentId="JULIE Token Boundary Detector" id="64" lemma="12090" posTag="8322"/><types:Token xmi:id="8352" sofa="431" begin="371" end="372" componentId="JULIE Token Boundary Detector" id="65" lemma="11739" posTag="7033"/><types:Token xmi:id="839" sofa="431" begin="372" end="373" componentId="JULIE Token Boundary Detector" id="66" lemma="6701" posTag="10098"/><types:Token xmi:id="2568" sofa="431" begin="374" end="377" componentId="JULIE Token Boundary Detector" id="67" lemma="6034" posTag="4600"/><types:Token xmi:id="9055" sofa="431" begin="378" end="391" componentId="JULIE Token Boundary Detector" id="68" lemma="3806" posTag="1396"/><types:Token xmi:id="4737" sofa="431" begin="392" end="393" componentId="JULIE Token Boundary Detector" id="69" lemma="6042" posTag="1240"/><types:Token xmi:id="10599" sofa="431" begin="393" end="398" componentId="JULIE Token Boundary Detector" id="70" lemma="10447" posTag="6637"/><types:Token xmi:id="8258" sofa="431" begin="398" end="399" componentId="JULIE Token Boundary Detector" id="71" lemma="5940" posTag="8420"/><types:Token xmi:id="1594" sofa="431" begin="399" end="400" componentId="JULIE Token Boundary Detector" id="72" lemma="10639" posTag="2613"/><types:Token xmi:id="11139" sofa="431" begin="400" end="406" componentId="JULIE Token Boundary Detector" id="73" lemma="9775" posTag="3983"/><types:Token xmi:id="9699" sofa="431" begin="407" end="408" componentId="JULIE Token Boundary Detector" id="74" lemma="9449" posTag="4498"/><types:Token xmi:id="6066" sofa="431" begin="408" end="409" componentId="JULIE Token Boundary Detector" id="75" lemma="11536" posTag="10395"/><types:Token xmi:id="1553" sofa="431" begin="410" end="414" componentId="JULIE Token Boundary Detector" id="76" lemma="9718" posTag="12347"/><types:Token xmi:id="3715" sofa="431" begin="415" end="423" componentId="JULIE Token Boundary Detector" id="77" lemma="10965" posTag="4501"/><types:Token xmi:id="5680" sofa="431" begin="424" end="425" componentId="JULIE Token Boundary Detector" id="78" lemma="2584" posTag="2414"/><types:Token xmi:id="4822" sofa="431" begin="426" end="439" componentId="JULIE Token Boundary Detector" id="79" lemma="8165" posTag="4273"/><types:Token xmi:id="7237" sofa="431" begin="439" end="440" componentId="JULIE Token Boundary Detector" id="80" lemma="11556" posTag="5937"/><types:Token xmi:id="4438" sofa="431" begin="440" end="449" componentId="JULIE Token Boundary Detector" id="81" lemma="7269" posTag="7879"/><types:Token xmi:id="7557" sofa="431" begin="450" end="460" componentId="JULIE Token Boundary Detector" id="82" lemma="9751" posTag="8252"/><types:Token xmi:id="2334" sofa="431" begin="460" end="461" componentId="JULIE Token Boundary Detector" id="83" lemma="5103" posTag="11284"/><types:Token xmi:id="6235" sofa="431" begin="462" end="472" componentId="JULIE Token Boundary Detector" id="84" lemma="1310" posTag="660"/><types:Token xmi:id="4317" sofa="431" begin="473" end="475" componentId="JULIE Token Boundary Detector" id="85" lemma="8776" posTag="12276"/><types:Token xmi:id="2858" sofa="431" begin="476" end="480" componentId="JULIE Token Boundary Detector" id="86" lemma="1028" posTag="5399"/><types:Token xmi:id="1196" sofa="431" begin="481" end="487" componentId="JULIE Token Boundary Detector" id="87" lemma="9850" posTag="10535"/><types:Token xmi:id="4718" sofa="431" begin="488" end="494" componentId="JULIE Token Boundary Detector" id="88" lemma="3295" posTag="10059"/><types:Token xmi:id="9923" sofa="431" begin="495" end="499" componentId="JULIE Token Boundary Detector" id="89" lemma="5990" posTag="1470"/><types:Token xmi:id="1473" sofa="431" begin="500" end="505" componentId="JULIE Token Boundary Detector" id="90" lemma="11212" posTag="2183"/><types:Token xmi:id="7863" sofa="431" begin="506" end="509" componentId="JULIE Token Boundary Detector" id="91" lemma="607" posTag="5446"/><types:Token xmi:id="5135" sofa="431" begin="510" end="515" componentId="JULIE Token Boundary Detector" id="92" lemma="6082" posTag="6989"/><types:Token xmi:id="9071" sofa="431" begin="516" end="518" componentId="JULIE Token Boundary Detector" id="93" lemma="1925" posTag="640"/><types:Token xmi:id="3660" sofa="431" begin="519" end="524" componentId="JULIE Token Boundary Detector" id="94" lemma="7960" posTag="6698"/><types:Token xmi:id="4702" sofa="431" begin="525" end="529" componentId="JULIE Token Boundary Detector" id="95" lemma="4554" posTag="6251"/><types:Token xmi:id="3422" sofa="431" begin="530" end="536" componentId="JULIE Token Boundary Detector" id="96" lemma="9586" posTag="8392"/><types:Token xmi:id="11564" sofa="431" begin="537" end="541" componentId="JULIE Token Boundary Detector" id="97" lemma="1791" posTag="12353"/><types:Token xmi:id="3175" sofa="431" begin="542" end="555" componentId="JULIE Token Boundary Detector" id="98" lemma="2417" posTag="1063"/><types:Token xmi:id="9368" sofa="431" begin="555" end="556" componentId="JULIE Token Boundary Detector" id="99" lemma="8536" posTag="7036"/><types:Token xmi:id="6312" sofa="431" begin="560" end="570" componentId="JULIE Token Boundary Detector" id="100" lemma="9384" posTag="4254"/><types:Token xmi:id="4614" sofa="431" begin="571" end="585" componentId="JULIE Token Boundary Detector" id="101" lemma="6665" posTag="2650"/><types:Token xmi:id="3116" sofa="431" begin="586" end="588" componentId="JULIE Token Boundary Detector" id="102" lemma="3438" posTag="10596"/><types:Token xmi:id="6682" sofa="431" begin="589" end="602" componentId="JULIE Token Boundary Detector" id="103" lemma="10556" posTag="9354"/><types:Token xmi:id="6090" sofa="431" begin="603" end="604" componentId="JULIE Token Boundary Detector" id="104" lemma="7196" posTag="5449"/><types:Token xmi:id="4787" sofa="431" begin="604" end="610" componentId="JULIE Token Boundary Detector" id="105" lemma="10251" posTag="8560"/><types:Token xmi:id="9401" sofa="431" begin="611" end="612" componentId="JULIE Token Boundary Detector" id="106" lemma="7688" posTag="8130"/><types:Token xmi:id="1044" sofa="431" begin="612" end="613" componentId="JULIE Token Boundary Detector" id="107" lemma="919" posTag="4753"/><types:Token xmi:id="6106" sofa="431" begin="614" end="617" componentId="JULIE Token Boundary Detector" id="108" lemma="6612" posTag="7424"/><types:Token xmi:id="8796" sofa="431" begin="618" end="623" componentId="JULIE Token Boundary Detector" id="109" lemma="10831" posTag="2469"/><types:Token xmi:id="9338" sofa="431" begin="624" end="625" componentId="JULIE Token Boundary Detector" id="110" lemma="10989" posTag="9015"/><types:Token xmi:id="7150" sofa="431" begin="625" end="626" componentId="JULIE Token Boundary Detector" id="111" lemma="7127" posTag="7193"/><types:Token xmi:id="10407" sofa="431" begin="627" end="628" componentId="JULIE Token Boundary Detector" id="112" lemma="9212" posTag="8533"/><types:Token xmi:id="8233" sofa="431" begin="629" end="635" componentId="JULIE Token Boundary Detector" id="113" lemma="10779" posTag="12412"/><types:Token xmi:id="7968" sofa="431" begin="636" end="637" componentId="JULIE Token Boundary Detector" id="114" lemma="1708" posTag="9209"/><types:Token xmi:id="12300" sofa="431" begin="637" end="638" componentId="JULIE Token Boundary Detector" id="115" lemma="4932" posTag="8289"/><types:Token xmi:id="4513" sofa="431" begin="639" end="647" componentId="JULIE Token Boundary Detector" id="116" lemma="8008" posTag="6210"/><types:Token xmi:id="7253" sofa="431" begin="648" end="657" componentId="JULIE Token Boundary Detector" id="117" lemma="11882" posTag="10324"/><types:Token xmi:id="2834" sofa="431" begin="658" end="670" componentId="JULIE Token Boundary Detector" id="118" lemma="11260" posTag="7039"/><types:Token xmi:id="7476" sofa="431" begin="671" end="684" componentId="JULIE Token Boundary Detector" id="119" lemma="8856" posTag="1853"/><types:Token xmi:id="9292" sofa="431" begin="685" end="686" componentId="JULIE Token Boundary Detector" id="120" lemma="3225" posTag="3233"/><types:Token xmi:id="4039" sofa="431" begin="686" end="692" componentId="JULIE Token Boundary Detector" id="121" lemma="1949" posTag="3657"/><types:Token xmi:id="2195" sofa="431" begin="693" end="696" componentId="JULIE Token Boundary Detector" id="122" lemma="2642" posTag="10489"/><types:Token xmi:id="4873" sofa="431" begin="696" end="697" componentId="JULIE Token Boundary Detector" id="123" lemma="2681" posTag="1739"/><types:Token xmi:id="1180" sofa="431" begin="698" end="704" componentId="JULIE Token Boundary Detector" id="124" lemma="11305" posTag="11868"/><types:Token xmi:id="1933" sofa="431" begin="704" end="705" componentId="JULIE Token Boundary Detector" id="125" lemma="11014" posTag="12136"/><types:Token xmi:id="5283" sofa="431" begin="706" end="713" componentId="JULIE Token Boundary Detector" id="126" lemma="11236" posTag="11476"/><types:Token xmi:id="8376" sofa="431" begin="714" end="724" componentId="JULIE Token Boundary Detector" id="127" lemma="10839" posTag="6504"/><types:Token xmi:id="3553" sofa="431" begin="725" end="729" componentId="JULIE Token Boundary Detector" id="128" lemma="4592" posTag="710"/><types:Token xmi:id="6370" sofa="431" begin="730" end="744" componentId="JULIE Token Boundary Detector" id="129" lemma="11614" posTag="5984"/><types:Token xmi:id="4004" sofa="431" begin="744" end="745" componentId="JULIE Token Boundary Detector" id="130" lemma="2739" posTag="7743"/><types:Token xmi:id="11433" sofa="431" begin="746" end="755" componentId="JULIE Token Boundary Detector" id="131" lemma="4846" posTag="946"/><types:Token xmi:id="2927" sofa="431" begin="756" end="768" componentId="JULIE Token Boundary Detector" id="132" lemma="12210" posTag="2124"/><types:Token xmi:id="4529" sofa="431" begin="769" end="782" componentId="JULIE Token Boundary Detector" id="133" lemma="8395" posTag="4251"/><types:Token xmi:id="7204" sofa="431" begin="783" end="784" componentId="JULIE Token Boundary Detector" id="134" lemma="10948" posTag="3511"/><types:Token xmi:id="7169" sofa="431" begin="784" end="790" componentId="JULIE Token Boundary Detector" id="135" lemma="11066" posTag="2441"/><types:Token xmi:id="8691" sofa="431" begin="791" end="794" componentId="JULIE Token Boundary Detector" id="136" lemma="10423" posTag="4784"/><types:Token xmi:id="3886" sofa="431" begin="794" end="795" componentId="JULIE Token Boundary Detector" id="137" lemma="8591" posTag="1818"/><types:Token xmi:id="2135" sofa="431" begin="796" end="802" componentId="JULIE Token Boundary Detector" id="138" lemma="1700" posTag="983"/><types:Token xmi:id="2976" sofa="431" begin="803" end="807" componentId="JULIE Token Boundary Detector" id="139" lemma="9330" posTag="4929"/><types:Token xmi:id="6018" sofa="431" begin="808" end="818" componentId="JULIE Token Boundary Detector" id="140" lemma="750" posTag="9308"/><types:Token xmi:id="10973" sofa="431" begin="818" end="819" componentId="JULIE Token Boundary Detector" id="141" lemma="9759" posTag="6457"/><types:Token xmi:id="11220" sofa="431" begin="823" end="826" componentId="JULIE Token Boundary Detector" id="142" lemma="5470" posTag="6232"/><types:Token xmi:id="10858" sofa="431" begin="827" end="840" componentId="JULIE Token Boundary Detector" id="143" lemma="11975" posTag="3314"/><types:Token xmi:id="3279" sofa="431" begin="840" end="841" componentId="JULIE Token Boundary Detector" id="144" lemma="12440" posTag="4870"/><types:Token xmi:id="1078" sofa="431" begin="841" end="851" componentId="JULIE Token Boundary Detector" id="145" lemma="7050" posTag="8572"/><types:Token xmi:id="4474" sofa="431" begin="852" end="858" componentId="JULIE Token Boundary Detector" id="146" lemma="10997" posTag="7696"/><types:Token xmi:id="7405" sofa="431" begin="859" end="862" componentId="JULIE Token Boundary Detector" id="147" lemma="9087" posTag="7147"/><types:Token xmi:id="11655" sofa="431" begin="863" end="876" componentId="JULIE Token Boundary Detector" id="148" lemma="5557" posTag="9262"/><types:Token xmi:id="10697" sofa="431" begin="877" end="880" componentId="JULIE Token Boundary Detector" id="149" lemma="1689" posTag="747"/><types:Token xmi:id="12459" sofa="431" begin="881" end="886" componentId="JULIE Token Boundary Detector" id="150" lemma="5705" posTag="3303"/><types:Token xmi:id="4765" sofa="431" begin="887" end="891" componentId="JULIE Token Boundary Detector" id="151" lemma="2113" posTag="1276"/><types:Token xmi:id="7835" sofa="431" begin="892" end="899" componentId="JULIE Token Boundary Detector" id="152" lemma="9284" posTag="1399"/><types:Token xmi:id="1980" sofa="431" begin="900" end="902" componentId="JULIE Token Boundary Detector" id="153" lemma="5111" posTag="2353"/><types:Token xmi:id="12151" sofa="431" begin="903" end="906" componentId="JULIE Token Boundary Detector" id="154" lemma="1036" posTag="2670"/><types:Token xmi:id="7277" sofa="431" begin="907" end="912" componentId="JULIE Token Boundary Detector" id="155" lemma="2038" posTag="5546"/><types:Token xmi:id="8675" sofa="431" begin="913" end="915" componentId="JULIE Token Boundary Detector" id="156" lemma="7185" posTag="12148"/><types:Token xmi:id="1884" sofa="431" begin="916" end="925" componentId="JULIE Token Boundary Detector" id="157" lemma="9802" posTag="4611"/><types:Token xmi:id="8926" sofa="431" begin="926" end="930" componentId="JULIE Token Boundary Detector" id="158" lemma="652" posTag="5973"/><types:Token xmi:id="2425" sofa="431" begin="931" end="932" componentId="JULIE Token Boundary Detector" id="159" lemma="7438" posTag="11544"/><types:Token xmi:id="11033" sofa="431" begin="932" end="933" componentId="JULIE Token Boundary Detector" id="160" lemma="6804" posTag="9107"/><types:Token xmi:id="10519" sofa="431" begin="934" end="935" componentId="JULIE Token Boundary Detector" id="161" lemma="6948" posTag="11596"/><types:Token xmi:id="4568" sofa="431" begin="936" end="942" componentId="JULIE Token Boundary Detector" id="162" lemma="7824" posTag="11287"/><types:Token xmi:id="3687" sofa="431" begin="943" end="944" componentId="JULIE Token Boundary Detector" id="163" lemma="1972" posTag="9819"/><types:Token xmi:id="9683" sofa="431" begin="945" end="948" componentId="JULIE Token Boundary Detector" id="164" lemma="2826" posTag="6445"/><types:Token xmi:id="12364" sofa="431" begin="949" end="955" componentId="JULIE Token Boundary Detector" id="165" lemma="1259" posTag="9311"/><types:Token xmi:id="9190" sofa="431" begin="956" end="957" componentId="JULIE Token Boundary Detector" id="166" lemma="11874" posTag="12350"/><types:Token xmi:id="9825" sofa="431" begin="957" end="958" componentId="JULIE Token Boundary Detector" id="167" lemma="11989" posTag="11465"/><types:Token xmi:id="11836" sofa="431" begin="958" end="959" componentId="JULIE Token Boundary Detector" id="168" lemma="2634" posTag="1343"/><types:Token xmi:id="2714" sofa="431" begin="960" end="962" componentId="JULIE Token Boundary Detector" id="169" lemma="8958" posTag="9939"/><types:Token xmi:id="7699" sofa="431" begin="963" end="972" componentId="JULIE Token Boundary Detector" id="170" lemma="2548" posTag="11871"/><types:Token xmi:id="9268" sofa="431" begin="973" end="975" componentId="JULIE Token Boundary Detector" id="171" lemma="12173" posTag="11940"/><types:Token xmi:id="11123" sofa="431" begin="976" end="983" componentId="JULIE Token Boundary Detector" id="172" lemma="6007" posTag="12167"/><types:Token xmi:id="5259" sofa="431" begin="984" end="993" componentId="JULIE Token Boundary Detector" id="173" lemma="1430" posTag="9655"/><types:Token xmi:id="2698" sofa="431" begin="994" end="1001" componentId="JULIE Token Boundary Detector" id="174" lemma="4079" posTag="1799"/><types:Token xmi:id="1537" sofa="431" begin="1001" end="1002" componentId="JULIE Token Boundary Detector" id="175" lemma="1326" posTag="8607"/><types:Token xmi:id="2235" sofa="431" begin="1003" end="1006" componentId="JULIE Token Boundary Detector" id="176" lemma="10022" posTag="4405"/><types:Token xmi:id="713" sofa="431" begin="1007" end="1009" componentId="JULIE Token Boundary Detector" id="177" lemma="3345" posTag="11983"/><types:Token xmi:id="808" sofa="431" begin="1010" end="1013" componentId="JULIE Token Boundary Detector" id="178" lemma="5478" posTag="1666"/><types:Token xmi:id="7008" sofa="431" begin="1014" end="1018" componentId="JULIE Token Boundary Detector" id="179" lemma="1873" posTag="7435"/><types:Token xmi:id="6640" sofa="431" begin="1019" end="1021" componentId="JULIE Token Boundary Detector" id="180" lemma="2030" posTag="9636"/><types:Token xmi:id="5068" sofa="431" begin="1022" end="1031" componentId="JULIE Token Boundary Detector" id="181" lemma="7642" posTag="11102"/><types:Token xmi:id="11852" sofa="431" begin="1032" end="1033" componentId="JULIE Token Boundary Detector" id="182" lemma="8368" posTag="12170"/><types:Token xmi:id="6213" sofa="431" begin="1033" end="1034" componentId="JULIE Token Boundary Detector" id="183" lemma="3545" posTag="8319"/><types:Token xmi:id="8840" sofa="431" begin="1035" end="1036" componentId="JULIE Token Boundary Detector" id="184" lemma="6595" posTag="9892"/><types:Token xmi:id="12380" sofa="431" begin="1037" end="1043" componentId="JULIE Token Boundary Detector" id="185" lemma="7042" posTag="7337"/><types:Token xmi:id="8473" sofa="431" begin="1044" end="1045" componentId="JULIE Token Boundary Detector" id="186" lemma="5841" posTag="9624"/><types:Token xmi:id="11155" sofa="431" begin="1046" end="1049" componentId="JULIE Token Boundary Detector" id="187" lemma="3061" posTag="11313"/><types:Token xmi:id="5383" sofa="431" begin="1050" end="1056" componentId="JULIE Token Boundary Detector" id="188" lemma="5030" posTag="4781"/><types:Token xmi:id="11244" sofa="431" begin="1057" end="1058" componentId="JULIE Token Boundary Detector" id="189" lemma="5647" posTag="5047"/><types:Token xmi:id="8707" sofa="431" begin="1058" end="1059" componentId="JULIE Token Boundary Detector" id="190" lemma="4466" posTag="9715"/><types:Token xmi:id="7389" sofa="431" begin="1059" end="1060" componentId="JULIE Token Boundary Detector" id="191" lemma="2084" posTag="11379"/><types:Token xmi:id="7614" sofa="431" begin="1061" end="1063" componentId="JULIE Token Boundary Detector" id="192" lemma="9858" posTag="836"/><types:Token xmi:id="1094" sofa="431" begin="1064" end="1073" componentId="JULIE Token Boundary Detector" id="193" lemma="1251" posTag="7135"/><types:Token xmi:id="7309" sofa="431" begin="1074" end="1076" componentId="JULIE Token Boundary Detector" id="194" lemma="5163" posTag="5510"/><types:Token xmi:id="11720" sofa="431" begin="1077" end="1083" componentId="JULIE Token Boundary Detector" id="195" lemma="7541" posTag="4959"/><types:Token xmi:id="6992" sofa="431" begin="1084" end="1087" componentId="JULIE Token Boundary Detector" id="196" lemma="12448" posTag="10945"/><types:Token xmi:id="11580" sofa="431" begin="1088" end="1105" componentId="JULIE Token Boundary Detector" id="197" lemma="3790" posTag="2565"/><types:Token xmi:id="5615" sofa="431" begin="1105" end="1106" componentId="JULIE Token Boundary Detector" id="198" lemma="4243" posTag="10886"/><types:Token xmi:id="9314" sofa="431" begin="1110" end="1115" componentId="JULIE Token Boundary Detector" id="199" lemma="10689" posTag="5773"/><types:Token xmi:id="9786" sofa="431" begin="1116" end="1124" componentId="JULIE Token Boundary Detector" id="200" lemma="12181" posTag="5414"/><types:Token xmi:id="4913" sofa="431" begin="1125" end="1127" componentId="JULIE Token Boundary Detector" id="201" lemma="4971" posTag="3858"/><types:Token xmi:id="4122" sofa="431" begin="1128" end="1136" componentId="JULIE Token Boundary Detector" id="202" lemma="7813" posTag="8820"/><types:Token xmi:id="4020" sofa="431" begin="1137" end="1145" componentId="JULIE Token Boundary Detector" id="203" lemma="5833" posTag="10930"/><types:Token xmi:id="6050" sofa="431" begin="1146" end="1148" componentId="JULIE Token Boundary Detector" id="204" lemma="4584" posTag="4435"/><types:Token xmi:id="10308" sofa="431" begin="1149" end="1152" componentId="JULIE Token Boundary Detector" id="205" lemma="9907" posTag="3712"/><types:Token xmi:id="6179" sofa="431" begin="1153" end="1162" componentId="JULIE Token Boundary Detector" id="206" lemma="11425" posTag="12288"/><types:Token xmi:id="5207" sofa="431" begin="1162" end="1163" componentId="JULIE Token Boundary Detector" id="207" lemma="5549" posTag="1066"/><types:Token xmi:id="8489" sofa="431" begin="1163" end="1172" componentId="JULIE Token Boundary Detector" id="208" lemma="12356" posTag="6367"/><types:Token xmi:id="5530" sofa="431" begin="1173" end="1186" componentId="JULIE Token Boundary Detector" id="209" lemma="5976" posTag="11643"/><types:Token xmi:id="1742" sofa="431" begin="1187" end="1189" componentId="JULIE Token Boundary Detector" id="210" lemma="6629" posTag="5603"/><types:Token xmi:id="6956" sofa="431" begin="1190" end="1203" componentId="JULIE Token Boundary Detector" id="211" lemma="9959" posTag="10795"/><types:Token xmi:id="6122" sofa="431" begin="1204" end="1205" componentId="JULIE Token Boundary Detector" id="212" lemma="7549" posTag="2110"/><types:Token xmi:id="12396" sofa="431" begin="1205" end="1211" componentId="JULIE Token Boundary Detector" id="213" lemma="10771" posTag="1534"/><types:Token xmi:id="2472" sofa="431" begin="1212" end="1213" componentId="JULIE Token Boundary Detector" id="214" lemma="2068" posTag="11755"/><types:Token xmi:id="12098" sofa="431" begin="1213" end="1214" componentId="JULIE Token Boundary Detector" id="215" lemma="3487" posTag="1060"/><types:Token xmi:id="894" sofa="431" begin="1214" end="1215" componentId="JULIE Token Boundary Detector" id="216" lemma="2513" posTag="789"/><types:Token xmi:id="5776" sofa="431" begin="1216" end="1219" componentId="JULIE Token Boundary Detector" id="217" lemma="1731" posTag="5738"/><types:Token xmi:id="3362" sofa="431" begin="1220" end="1224" componentId="JULIE Token Boundary Detector" id="218" lemma="1489" posTag="11736"/><types:Token xmi:id="10713" sofa="431" begin="1225" end="1229" componentId="JULIE Token Boundary Detector" id="219" lemma="9767" posTag="7361"/><types:Token xmi:id="6560" sofa="431" begin="1230" end="1235" componentId="JULIE Token Boundary Detector" id="220" lemma="8328" posTag="1716"/><types:Token xmi:id="10647" sofa="431" begin="1236" end="1238" componentId="JULIE Token Boundary Detector" id="221" lemma="3200" posTag="1279"/><types:Token xmi:id="11074" sofa="431" begin="1239" end="1244" componentId="JULIE Token Boundary Detector" id="222" lemma="6874" posTag="11302"/><types:Token xmi:id="2497" sofa="431" begin="1245" end="1248" componentId="JULIE Token Boundary Detector" id="223" lemma="3798" posTag="4803"/><types:Token xmi:id="8517" sofa="431" begin="1249" end="1252" componentId="JULIE Token Boundary Detector" id="224" lemma="10747" posTag="7421"/><types:Token xmi:id="1515" sofa="431" begin="1253" end="1259" componentId="JULIE Token Boundary Detector" id="225" lemma="9951" posTag="8283"/><types:Token xmi:id="10755" sofa="431" begin="1260" end="1273" componentId="JULIE Token Boundary Detector" id="226" lemma="9507" posTag="5443"/><types:Token xmi:id="8544" sofa="431" begin="1273" end="1274" componentId="JULIE Token Boundary Detector" id="227" lemma="8181" posTag="2771"/><types:Token xmi:id="8942" sofa="431" begin="1275" end="1283" componentId="JULIE Token Boundary Detector" id="228" lemma="6493" posTag="2328"/><types:Token xmi:id="8133" sofa="431" begin="1284" end="1288" componentId="JULIE Token Boundary Detector" id="229" lemma="3638" posTag="758"/><types:Token xmi:id="10889" sofa="431" begin="1289" end="1291" componentId="JULIE Token Boundary Detector" id="230" lemma="7900" posTag="9556"/><types:Token xmi:id="11758" sofa="431" begin="1292" end="1305" componentId="JULIE Token Boundary Detector" id="231" lemma="3455" posTag="7882"/><types:Token xmi:id="2992" sofa="431" begin="1306" end="1315" componentId="JULIE Token Boundary Detector" id="232" lemma="1354" posTag="2908"/><types:Token xmi:id="6460" sofa="431" begin="1316" end="1321" componentId="JULIE Token Boundary Detector" id="233" lemma="4071" posTag="10855"/><types:Token xmi:id="12325" sofa="431" begin="1322" end="1324" componentId="JULIE Token Boundary Detector" id="234" lemma="4138" posTag="3578"/><types:Token xmi:id="5486" sofa="431" begin="1325" end="1329" componentId="JULIE Token Boundary Detector" id="235" lemma="2292" posTag="10336"/><types:Token xmi:id="6842" sofa="431" begin="1329" end="1330" componentId="JULIE Token Boundary Detector" id="236" lemma="5375" posTag="12341"/><types:Token xmi:id="8734" sofa="431" begin="1331" end="1336" componentId="JULIE Token Boundary Detector" id="237" lemma="8876" posTag="10222"/><types:Token xmi:id="12033" sofa="431" begin="1337" end="1345" componentId="JULIE Token Boundary Detector" id="238" lemma="2057" posTag="3514"/><types:Token xmi:id="10464" sofa="431" begin="1346" end="1348" componentId="JULIE Token Boundary Detector" id="239" lemma="8918" posTag="2350"/><types:Token xmi:id="1821" sofa="431" begin="1349" end="1352" componentId="JULIE Token Boundary Detector" id="240" lemma="4227" posTag="9171"/><types:Token xmi:id="1110" sofa="431" begin="1353" end="1362" componentId="JULIE Token Boundary Detector" id="241" lemma="9866" posTag="12117"/><types:Token xmi:id="12218" sofa="431" begin="1362" end="1363" componentId="JULIE Token Boundary Detector" id="242" lemma="8173" posTag="8074"/><types:Token xmi:id="9152" sofa="431" begin="1363" end="1372" componentId="JULIE Token Boundary Detector" id="243" lemma="4490" posTag="1697"/><types:Token xmi:id="12049" sofa="431" begin="1373" end="1386" componentId="JULIE Token Boundary Detector" id="244" lemma="4664" posTag="12344"/><types:Token xmi:id="1142" sofa="431" begin="1387" end="1389" componentId="JULIE Token Boundary Detector" id="245" lemma="632" posTag="1307"/><types:Token xmi:id="12120" sofa="431" begin="1390" end="1395" componentId="JULIE Token Boundary Detector" id="246" lemma="10030" posTag="3394"/><types:Token xmi:id="3045" sofa="431" begin="1396" end="1397" componentId="JULIE Token Boundary Detector" id="247" lemma="966" posTag="927"/><types:Token xmi:id="3955" sofa="431" begin="1397" end="1403" componentId="JULIE Token Boundary Detector" id="248" lemma="3676" posTag="9365"/><types:Token xmi:id="9136" sofa="431" begin="1404" end="1405" componentId="JULIE Token Boundary Detector" id="249" lemma="5513" posTag="10016"/><types:Token xmi:id="4257" sofa="431" begin="1405" end="1406" componentId="JULIE Token Boundary Detector" id="250" lemma="10065" posTag="7851"/><types:Token xmi:id="9540" sofa="431" begin="1406" end="1407" componentId="JULIE Token Boundary Detector" id="251" lemma="4854" posTag="6272"/><types:Token xmi:id="11688" sofa="431" begin="1408" end="1411" componentId="JULIE Token Boundary Detector" id="252" lemma="8457" posTag="679"/><types:Token xmi:id="1996" sofa="431" begin="1412" end="1416" componentId="JULIE Token Boundary Detector" id="253" lemma="10787" posTag="11608"/><types:Token xmi:id="6524" sofa="431" begin="1417" end="1421" componentId="JULIE Token Boundary Detector" id="254" lemma="9915" posTag="5867"/><types:Token xmi:id="5006" sofa="431" begin="1422" end="1427" componentId="JULIE Token Boundary Detector" id="255" lemma="1631" posTag="11955"/><types:Token xmi:id="1802" sofa="431" begin="1428" end="1430" componentId="JULIE Token Boundary Detector" id="256" lemma="3630" posTag="4423"/><types:Token xmi:id="6331" sofa="431" begin="1431" end="1436" componentId="JULIE Token Boundary Detector" id="257" lemma="10573" posTag="8249"/><types:Token xmi:id="5741" sofa="431" begin="1437" end="1446" componentId="JULIE Token Boundary Detector" id="258" lemma="1291" posTag="4408"/><types:Token xmi:id="7457" sofa="431" begin="1447" end="1452" componentId="JULIE Token Boundary Detector" id="259" lemma="11712" posTag="864"/><types:Token xmi:id="4889" sofa="431" begin="1453" end="1454" componentId="JULIE Token Boundary Detector" id="260" lemma="2211" posTag="10581"/><types:Token xmi:id="3397" sofa="431" begin="1455" end="1459" componentId="JULIE Token Boundary Detector" id="261" lemma="3132" posTag="10203"/><types:Token xmi:id="2883" sofa="431" begin="1460" end="1463" componentId="JULIE Token Boundary Detector" id="262" lemma="1856" posTag="1237"/><types:Token xmi:id="6858" sofa="431" begin="1464" end="1468" componentId="JULIE Token Boundary Detector" id="263" lemma="8412" posTag="3684"/><types:Token xmi:id="8300" sofa="431" begin="1469" end="1471" componentId="JULIE Token Boundary Detector" id="264" lemma="5913" posTag="9095"/><types:Token xmi:id="10145" sofa="431" begin="1472" end="1485" componentId="JULIE Token Boundary Detector" id="265" lemma="6507" posTag="10584"/><types:Token xmi:id="8990" sofa="431" begin="1486" end="1491" componentId="JULIE Token Boundary Detector" id="266" lemma="1681" posTag="11290"/><types:Token xmi:id="9174" sofa="431" begin="1492" end="1494" componentId="JULIE Token Boundary Detector" id="267" lemma="4114" posTag="2537"/><types:Token xmi:id="2365" sofa="431" begin="1495" end="1499" componentId="JULIE Token Boundary Detector" id="268" lemma="8723" posTag="5600"/><types:Token xmi:id="10503" sofa="431" begin="1499" end="1500" componentId="JULIE Token Boundary Detector" id="269" lemma="2943" posTag="7987"/><types:Token xmi:id="11316" sofa="431" begin="1504" end="1509" componentId="JULIE Token Boundary Detector" id="270" lemma="1380" posTag="7454"/><types:Token xmi:id="1438" sofa="431" begin="1510" end="1517" componentId="JULIE Token Boundary Detector" id="271" lemma="8016" posTag="7650"/><types:Token xmi:id="8575" sofa="431" begin="1518" end="1525" componentId="JULIE Token Boundary Detector" id="272" lemma="1658" posTag="7924"/><types:Token xmi:id="8618" sofa="431" begin="1526" end="1530" componentId="JULIE Token Boundary Detector" id="273" lemma="3167" posTag="3027"/><types:Token xmi:id="9639" sofa="431" begin="1531" end="1534" componentId="JULIE Token Boundary Detector" id="274" lemma="8292" posTag="7897"/><types:Token xmi:id="5757" sofa="431" begin="1535" end="1547" componentId="JULIE Token Boundary Detector" id="275" lemma="1758" posTag="12114"/><types:Token xmi:id="7076" sofa="431" begin="1548" end="1554" componentId="JULIE Token Boundary Detector" id="276" lemma="5190" posTag="5402"/><types:Token xmi:id="1454" sofa="431" begin="1555" end="1557" componentId="JULIE Token Boundary Detector" id="277" lemma="6709" posTag="7092"/><types:Token xmi:id="2911" sofa="431" begin="1558" end="1563" componentId="JULIE Token Boundary Detector" id="278" lemma="5565" posTag="1411"/><types:Token xmi:id="1766" sofa="431" begin="1563" end="1564" componentId="JULIE Token Boundary Detector" id="279" lemma="4333" posTag="1628"/><types:Token xmi:id="7111" sofa="431" begin="1565" end="1569" componentId="JULIE Token Boundary Detector" id="280" lemma="4368" posTag="8255"/><types:Token xmi:id="1639" sofa="431" begin="1570" end="1574" componentId="JULIE Token Boundary Detector" id="281" lemma="9245" posTag="10942"/><types:Token xmi:id="7724" sofa="431" begin="1575" end="1577" componentId="JULIE Token Boundary Detector" id="282" lemma="5631" posTag="2054"/><types:Token xmi:id="7598" sofa="431" begin="1578" end="1591" componentId="JULIE Token Boundary Detector" id="283" lemma="9030" posTag="8286"/><types:Token xmi:id="1837" sofa="431" begin="1591" end="1592" componentId="JULIE Token Boundary Detector" id="284" lemma="7501" posTag="2331"/><types:Token xmi:id="8634" sofa="431" begin="1593" end="1595" componentId="JULIE Token Boundary Detector" id="285" lemma="3479" posTag="986"/><types:Token xmi:id="8149" sofa="431" begin="1596" end="1599" componentId="JULIE Token Boundary Detector" id="286" lemma="2046" posTag="6821"/><types:Token xmi:id="5343" sofa="431" begin="1600" end="1602" componentId="JULIE Token Boundary Detector" id="287" lemma="6540" posTag="10492"/><types:Token xmi:id="6146" sofa="431" begin="1603" end="1613" componentId="JULIE Token Boundary Detector" id="288" lemma="3140" posTag="4036"/><types:Token xmi:id="4195" sofa="431" begin="1614" end="1616" componentId="JULIE Token Boundary Detector" id="289" lemma="2251" posTag="4394"/><types:Token xmi:id="3589" sofa="431" begin="1617" end="1624" componentId="JULIE Token Boundary Detector" id="290" lemma="12268" posTag="3952"/><types:Token xmi:id="6767" sofa="431" begin="1625" end="1634" componentId="JULIE Token Boundary Detector" id="291" lemma="930" posTag="11200"/><types:Token xmi:id="11268" sofa="431" begin="1635" end="1642" componentId="JULIE Token Boundary Detector" id="292" lemma="3208" posTag="10348"/><types:Token xmi:id="761" sofa="431" begin="1643" end="1646" componentId="JULIE Token Boundary Detector" id="293" lemma="3306" posTag="7639"/><types:Token xmi:id="10206" sofa="431" begin="1647" end="1648" componentId="JULIE Token Boundary Detector" id="294" lemma="1243" posTag="5232"/><types:Token xmi:id="11449" sofa="431" begin="1649" end="1658" componentId="JULIE Token Boundary Detector" id="295" lemma="5417" posTag="9265"/><types:Token xmi:id="4285" sofa="431" begin="1659" end="1667" componentId="JULIE Token Boundary Detector" id="296" lemma="11812" posTag="1719"/><types:Token xmi:id="10187" sofa="431" begin="1668" end="1670" componentId="JULIE Token Boundary Detector" id="297" lemma="10663" posTag="2325"/><types:Token xmi:id="11820" sofa="431" begin="1671" end="1684" componentId="JULIE Token Boundary Detector" id="298" lemma="7229" posTag="8325"/><types:Token xmi:id="11479" sofa="431" begin="1685" end="1691" componentId="JULIE Token Boundary Detector" id="299" lemma="1318" posTag="6501"/><types:Token xmi:id="3378" sofa="431" begin="1692" end="1695" componentId="JULIE Token Boundary Detector" id="300" lemma="11192" posTag="3164"/><types:Token xmi:id="3927" sofa="431" begin="1695" end="1696" componentId="JULIE Token Boundary Detector" id="301" lemma="2673" posTag="5987"/><types:Token xmi:id="5921" sofa="431" begin="1697" end="1704" componentId="JULIE Token Boundary Detector" id="302" lemma="1388" posTag="3778"/><types:Token xmi:id="5870" sofa="431" begin="1704" end="1705" componentId="JULIE Token Boundary Detector" id="303" lemma="6275" posTag="11925"/><types:Token xmi:id="7908" sofa="431" begin="1706" end="1714" componentId="JULIE Token Boundary Detector" id="304" lemma="9515" posTag="11611"/><types:Token xmi:id="6395" sofa="431" begin="1715" end="1719" componentId="JULIE Token Boundary Detector" id="305" lemma="11022" posTag="11376"/><types:Token xmi:id="8902" sofa="431" begin="1720" end="1733" componentId="JULIE Token Boundary Detector" id="306" lemma="5502" posTag="9018"/><types:Token xmi:id="5087" sofa="431" begin="1733" end="1734" componentId="JULIE Token Boundary Detector" id="307" lemma="7789" posTag="6792"/><types:Token xmi:id="6733" sofa="431" begin="1735" end="1740" componentId="JULIE Token Boundary Detector" id="308" lemma="624" posTag="8316"/><types:Token xmi:id="4055" sofa="431" begin="1741" end="1744" componentId="JULIE Token Boundary Detector" id="309" lemma="2850" posTag="9594"/><types:Token xmi:id="3317" sofa="431" begin="1745" end="1749" componentId="JULIE Token Boundary Detector" id="310" lemma="6924" posTag="5151"/><types:Token xmi:id="3463" sofa="431" begin="1750" end="1754" componentId="JULIE Token Boundary Detector" id="311" lemma="2127" posTag="8731"/><types:Token xmi:id="5243" sofa="431" begin="1754" end="1755" componentId="JULIE Token Boundary Detector" id="312" lemma="9110" posTag="11180"/><types:Token xmi:id="12252" sofa="431" begin="1755" end="1764" componentId="JULIE Token Boundary Detector" id="313" lemma="7927" posTag="3030"/><types:Token xmi:id="8966" sofa="431" begin="1765" end="1767" componentId="JULIE Token Boundary Detector" id="314" lemma="958" posTag="3766"/><types:Token xmi:id="9433" sofa="431" begin="1768" end="1771" componentId="JULIE Token Boundary Detector" id="315" lemma="8610" posTag="7894"/><types:Token xmi:id="7293" sofa="431" begin="1772" end="1780" componentId="JULIE Token Boundary Detector" id="316" lemma="5235" posTag="7166"/><types:Token xmi:id="11512" sofa="431" begin="1781" end="1788" componentId="JULIE Token Boundary Detector" id="317" lemma="4905" posTag="2121"/><types:Token xmi:id="10914" sofa="431" begin="1789" end="1791" componentId="JULIE Token Boundary Detector" id="318" lemma="1299" posTag="9783"/><types:Token xmi:id="7797" sofa="431" begin="1792" end="1797" componentId="JULIE Token Boundary Detector" id="319" lemma="3646" posTag="7821"/><types:Token xmi:id="11400" sofa="431" begin="1798" end="1801" componentId="JULIE Token Boundary Detector" id="320" lemma="11468" posTag="2408"/><types:Token xmi:id="8659" sofa="431" begin="1802" end="1812" componentId="JULIE Token Boundary Detector" id="321" lemma="4171" posTag="4565"/><types:Token xmi:id="4630" sofa="431" begin="1813" end="1819" componentId="JULIE Token Boundary Detector" id="322" lemma="7364" posTag="6015"/><types:Token xmi:id="792" sofa="431" begin="1820" end="1825" componentId="JULIE Token Boundary Detector" id="323" lemma="5639" posTag="5187"/><types:Token xmi:id="3605" sofa="431" begin="1825" end="1826" componentId="JULIE Token Boundary Detector" id="324" lemma="6717" posTag="6283"/><types:Token xmi:id="9417" sofa="431" begin="1826" end="1835" componentId="JULIE Token Boundary Detector" id="325" lemma="7427" posTag="1174"/><types:Token xmi:id="6932" sofa="431" begin="1836" end="1843" componentId="JULIE Token Boundary Detector" id="326" lemma="699" posTag="10019"/><types:Token xmi:id="6882" sofa="431" begin="1844" end="1847" componentId="JULIE Token Boundary Detector" id="327" lemma="10234" posTag="3980"/><types:Token xmi:id="1126" sofa="431" begin="1848" end="1851" componentId="JULIE Token Boundary Detector" id="328" lemma="8982" posTag="10392"/><types:Token xmi:id="8336" sofa="431" begin="1852" end="1858" componentId="JULIE Token Boundary Detector" id="329" lemma="9038" posTag="1881"/><types:Token xmi:id="5817" sofa="431" begin="1858" end="1859" componentId="JULIE Token Boundary Detector" id="330" lemma="11504" posTag="9895"/><tcas:DocumentAnnotation xmi:id="12478" sofa="431" begin="0" end="1859" language="x-unspecified"/><ext:DBProcessingMetaData xmi:id="12483" sofa="431" begin="0" end="0"><primaryKey>1681975</primaryKey></ext:DBProcessingMetaData><cas:Sofa xmi:id="431" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Comparison of the effects of the novel vasodilator FK409 with those of nitroglycerin in isolated coronary artery of the dog.&#10;1. The vasorelaxant effects of FK409, a new nitrovasodilator synthesized from a microbial product, were compared with those of nitroglycerin in isolated coronary artery rings of the dog contracted with U46619 (10(-7) M). 2. FK409 (10(-11)-10(-5) M) and nitroglycerin (10(-9)-10(-4) M) each produced a concentration-dependent relaxation. Comparison of EC50 values showed that FK409 was about 25 times more potent than nitroglycerin. 3. Submaximum concentrations of nitroglycerin (10(-6) M) and FK409 (3 x 10(-8) M) elevated guanosine 3':5'-cyclic monophosphate (cyclic GMP) levels, effects associated with vasorelaxation. Adenosine 3':5'-cyclic monophosphate (cyclic AMP) levels were unaffected. 4. The concentration-relaxation curves for nitroglycerin and FK409 were shifted to the right by methylene blue (3 x 10(-6) - 3 x 10(-5) M), an inhibitor of soluble guanylate cyclase, and to the left by M&amp;B22,948 (3 x 10(-6) - 3 x 10(-5) M), an inhibitor of cyclic GMP phosphodiesterase. 5. After exposure of coronary arteries to the maximally-effective concentration of nitroglycerin (10(-4) M), the mean EC50 value of FK409 did not change significantly, although that of nitroglycerin increased about 60 fold. After exposure to the maximally-effective concentration of FK409 (10(-5) M), the mean EC50 value of FK409 increased about 6 fold and that of nitroglycerin about 11 fold. 6. These results suggest that the vasorelaxant effect of FK409, like that of nitroglycerin, is due to activation of soluble guanylate cyclase and a resultant increase in intracellular cyclic GMP. However, compared with nitroglycerin, there was less self-tolerance to the relaxant effects of FK409 and relatively little cross-tolerance between the two agents."/><cas:FSArray xmi:id="380" elements="398 60 449 95 258 420 409 38 367 49 269 128 27 117 106 179"/><cas:FSArray xmi:id="342" elements="315 324 9 18 86 333 440 225 297 489 306"/><cas:FSArray xmi:id="460" elements=""/><cas:FSArray xmi:id="438" elements=""/><cas:FSArray xmi:id="173" elements="151 355 498 139"/><cas:FSArray xmi:id="378" elements=""/><cas:FSArray xmi:id="3847" elements="6294"/><cas:FSArray xmi:id="4956" elements="4545"/><cas:FSArray xmi:id="12189" elements="5223"/><cas:FSArray xmi:id="11986" elements="6162"/><cas:FSArray xmi:id="8864" elements="12006"/><cas:FSArray xmi:id="3333" elements="5334"/><cas:FSArray xmi:id="1669" elements="9615"/><cas:FSArray xmi:id="1966" elements="1864"/><cas:FSArray xmi:id="7358" elements="1010"/><cas:FSArray xmi:id="6592" elements="10564"/><cas:FSArray xmi:id="3517" elements="11943"/><cas:FSArray xmi:id="11631" elements="4105"/><cas:FSArray xmi:id="9822" elements="11890"/><cas:FSArray xmi:id="3024" elements="12431"/><cas:FSArray xmi:id="6195" elements="2275"/><cas:FSArray xmi:id="998" elements="11005"/><cas:FSArray xmi:id="8793" elements="3353"/><cas:FSArray xmi:id="6229" elements="8121"/><cas:FSArray xmi:id="1531" elements="12015"/><cas:FSArray xmi:id="786" elements="3095"/><cas:FSArray xmi:id="7473" elements="3569"/><cas:FSArray xmi:id="1655" elements="7951"/><cas:FSArray xmi:id="8109" elements="2899"/><cas:FSArray xmi:id="7832" elements="6783"/><cas:FSArray xmi:id="3113" elements="5299"/><cas:FSArray xmi:id="6364" elements="3703"/><cas:FSArray xmi:id="7325" elements="11495"/><cas:FSArray xmi:id="6548" elements="11114"/><cas:FSArray xmi:id="9206" elements="974"/><cas:FSArray xmi:id="11090" elements="5895"/><cas:FSArray xmi:id="1177" elements="7670"/><cas:FSArray xmi:id="4562" elements="9942"/><cas:FSArray xmi:id="7984" elements="8505"/><cas:FSArray xmi:id="10807" elements="855"/><cas:FSArray xmi:id="10062" elements="3740"/><cas:FSArray xmi:id="10883" elements="4426"/><cas:FSArray xmi:id="2610" elements="2012"/><cas:FSArray xmi:id="2411" elements="8759"/><cas:FSArray xmi:id="9168" elements="3520"/><cas:FSArray xmi:id="2065" elements="10822"/><cas:FSArray xmi:id="707" elements="10455"/><cas:FSArray xmi:id="3654" elements="2625"/><cas:FSArray xmi:id="10056" elements="10047"/><cas:FSArray xmi:id="11937" elements="7573"/><cas:FSArray xmi:id="1969" elements="8274"/><cas:FSArray xmi:id="4734" elements="6411"/><cas:FSArray xmi:id="4672" elements="3877"/><cas:FSArray xmi:id="6198" elements="12065"/><cas:FSArray xmi:id="4454" elements="12234"/><cas:FSArray xmi:id="4420" elements="11391"/><cas:FSArray xmi:id="8205" elements="7746"/><cas:FSArray xmi:id="6328" elements="9597"/><cas:FSArray xmi:id="3042" elements="11928"/><cas:FSArray xmi:id="833" elements="6673"/><cas:FSArray xmi:id="8514" elements="5059"/><cas:FSArray xmi:id="7740" elements="6303"/><cas:FSArray xmi:id="10819" elements="10671"/><cas:FSArray xmi:id="5084" elements="11803"/><cas:FSArray xmi:id="11030" elements="5582"/><cas:FSArray xmi:id="11774" elements="5792"/><cas:FSArray xmi:id="12456" elements="5904"/><cas:FSArray xmi:id="11952" elements="6907"/><cas:FSArray xmi:id="12475" elements="11794"/><cas:FSArray xmi:id="8322" elements="2390"/><cas:FSArray xmi:id="7033" elements="10956"/><cas:FSArray xmi:id="10098" elements="11332"/><cas:FSArray xmi:id="4600" elements="12243"/><cas:FSArray xmi:id="1396" elements="2186"/><cas:FSArray xmi:id="1240" elements="7220"/><cas:FSArray xmi:id="6637" elements="3943"/><cas:FSArray xmi:id="8420" elements="5317"/><cas:FSArray xmi:id="2613" elements="11358"/><cas:FSArray xmi:id="3983" elements="5308"/><cas:FSArray xmi:id="4498" elements="9523"/><cas:FSArray xmi:id="10395" elements="598"/><cas:FSArray xmi:id="12347" elements="3781"/><cas:FSArray xmi:id="4501" elements="11349"/><cas:FSArray xmi:id="2414" elements="9874"/><cas:FSArray xmi:id="4273" elements="867"/><cas:FSArray xmi:id="5937" elements="1267"/><cas:FSArray xmi:id="7879" elements="2951"/><cas:FSArray xmi:id="8252" elements="4411"/><cas:FSArray xmi:id="11284" elements="4962"/><cas:FSArray xmi:id="660" elements="11093"/><cas:FSArray xmi:id="12276" elements="8884"/><cas:FSArray xmi:id="5399" elements="8650"/><cas:FSArray xmi:id="10535" elements="3814"/><cas:FSArray xmi:id="10059" elements="6603"/><cas:FSArray xmi:id="1470" elements="7679"/><cas:FSArray xmi:id="2183" elements="876"/><cas:FSArray xmi:id="5446" elements="1228"/><cas:FSArray xmi:id="6989" elements="6620"/><cas:FSArray xmi:id="640" elements="7138"/><cas:FSArray xmi:id="6698" elements="9810"/><cas:FSArray xmi:id="6251" elements="6484"/><cas:FSArray xmi:id="8392" elements="10089"/><cas:FSArray xmi:id="12353" elements="4350"/><cas:FSArray xmi:id="1063" elements="10127"/><cas:FSArray xmi:id="7036" elements="10874"/><cas:FSArray xmi:id="4254" elements="7058"/><cas:FSArray xmi:id="2650" elements="4655"/><cas:FSArray xmi:id="10596" elements="2592"/><cas:FSArray xmi:id="9354" elements="3216"/><cas:FSArray xmi:id="5449" elements="7854"/><cas:FSArray xmi:id="8560" elements="10242"/><cas:FSArray xmi:id="8130" elements="3986"/><cas:FSArray xmi:id="4753" elements="1334"/><cas:FSArray xmi:id="7424" elements="10225"/><cas:FSArray xmi:id="2469" elements="5154"/><cas:FSArray xmi:id="9015" elements="5521"/><cas:FSArray xmi:id="7193" elements="7885"/><cas:FSArray xmi:id="8533" elements="10398"/><cas:FSArray xmi:id="12412" elements="11599"/><cas:FSArray xmi:id="9209" elements="10101"/><cas:FSArray xmi:id="8289" elements="3757"/><cas:FSArray xmi:id="6210" elements="3270"/><cas:FSArray xmi:id="10324" elements="6448"/><cas:FSArray xmi:id="7039" elements="3077"/><cas:FSArray xmi:id="1853" elements="11057"/><cas:FSArray xmi:id="3233" elements="10729"/><cas:FSArray xmi:id="3657" elements="7990"/><cas:FSArray xmi:id="10489" elements="11634"/><cas:FSArray xmi:id="1739" elements="11367"/><cas:FSArray xmi:id="11868" elements="9006"/><cas:FSArray xmi:id="12136" elements="4359"/><cas:FSArray xmi:id="11476" elements="4276"/><cas:FSArray xmi:id="6504" elements="1610"/><cas:FSArray xmi:id="710" elements="2689"/><cas:FSArray xmi:id="5984" elements="4684"/><cas:FSArray xmi:id="7743" elements="2399"/><cas:FSArray xmi:id="946" elements="8893"/><cas:FSArray xmi:id="2124" elements="5434"/><cas:FSArray xmi:id="4251" elements="2874"/><cas:FSArray xmi:id="3511" elements="11646"/><cas:FSArray xmi:id="2441" elements="2556"/><cas:FSArray xmi:id="4784" elements="2300"/><cas:FSArray xmi:id="1818" elements="9253"/><cas:FSArray xmi:id="983" elements="3621"/><cas:FSArray xmi:id="4929" elements="7380"/><cas:FSArray xmi:id="9308" elements="10587"/><cas:FSArray xmi:id="6457" elements="910"/><cas:FSArray xmi:id="6232" elements="3446"/><cas:FSArray xmi:id="3314" elements="9568"/><cas:FSArray xmi:id="4870" elements="9898"/><cas:FSArray xmi:id="8572" elements="2021"/><cas:FSArray xmi:id="7696" elements="4693"/><cas:FSArray xmi:id="7147" elements="5849"/><cas:FSArray xmi:id="9262" elements="10538"/><cas:FSArray xmi:id="747" elements="3769"/><cas:FSArray xmi:id="3303" elements="11105"/><cas:FSArray xmi:id="1276" elements="10547"/><cas:FSArray xmi:id="1399" elements="12139"/><cas:FSArray xmi:id="2353" elements="3413"/><cas:FSArray xmi:id="2670" elements="6833"/><cas:FSArray xmi:id="5546" elements="7328"/><cas:FSArray xmi:id="12148" elements="9726"/><cas:FSArray xmi:id="4611" elements="1782"/><cas:FSArray xmi:id="5973" elements="9674"/><cas:FSArray xmi:id="11544" elements="9606"/><cas:FSArray xmi:id="9107" elements="9465"/><cas:FSArray xmi:id="11596" elements="5452"/><cas:FSArray xmi:id="11287" elements="2616"/><cas:FSArray xmi:id="9819" elements="1497"/><cas:FSArray xmi:id="6445" elements="2460"/><cas:FSArray xmi:id="9311" elements="3995"/><cas:FSArray xmi:id="12350" elements="643"/><cas:FSArray xmi:id="11465" elements="9118"/><cas:FSArray xmi:id="1343" elements="2774"/><cas:FSArray xmi:id="9939" elements="3191"/><cas:FSArray xmi:id="11871" elements="6551"/><cas:FSArray xmi:id="11940" elements="7780"/><cas:FSArray xmi:id="12167" elements="5998"/><cas:FSArray xmi:id="9655" elements="3086"/><cas:FSArray xmi:id="1799" elements="738"/><cas:FSArray xmi:id="8607" elements="9627"/><cas:FSArray xmi:id="4405" elements="4376"/><cas:FSArray xmi:id="11983" elements="824"/><cas:FSArray xmi:id="1666" elements="2730"/><cas:FSArray xmi:id="7435" elements="1672"/><cas:FSArray xmi:id="9636" elements="11293"/><cas:FSArray xmi:id="11102" elements="11777"/><cas:FSArray xmi:id="12170" elements="2488"/><cas:FSArray xmi:id="8319" elements="9999"/><cas:FSArray xmi:id="9892" elements="5038"/><cas:FSArray xmi:id="7337" elements="1362"/><cas:FSArray xmi:id="9624" elements="2356"/><cas:FSArray xmi:id="11313" elements="1506"/><cas:FSArray xmi:id="4781" elements="6355"/><cas:FSArray xmi:id="5047" elements="4096"/><cas:FSArray xmi:id="9715" elements="5425"/><cas:FSArray xmi:id="11379" elements="1371"/><cas:FSArray xmi:id="836" elements="5713"/><cas:FSArray xmi:id="7135" elements="1722"/><cas:FSArray xmi:id="5510" elements="6795"/><cas:FSArray xmi:id="4959" elements="6898"/><cas:FSArray xmi:id="10945" elements="6972"/><cas:FSArray xmi:id="2565" elements="6263"/><cas:FSArray xmi:id="10886" elements="6656"/><cas:FSArray xmi:id="5773" elements="9046"/><cas:FSArray xmi:id="5414" elements="9220"/><cas:FSArray xmi:id="3858" elements="989"/><cas:FSArray xmi:id="8820" elements="9559"/><cas:FSArray xmi:id="10930" elements="5886"/><cas:FSArray xmi:id="4435" elements="3902"/><cas:FSArray xmi:id="3712" elements="2101"/><cas:FSArray xmi:id="12288" elements="8867"/><cas:FSArray xmi:id="1066" elements="8224"/><cas:FSArray xmi:id="6367" elements="10680"/><cas:FSArray xmi:id="11643" elements="4646"/><cas:FSArray xmi:id="5603" elements="8784"/><cas:FSArray xmi:id="10795" elements="8032"/><cas:FSArray xmi:id="2110" elements="885"/><cas:FSArray xmi:id="1534" elements="7771"/><cas:FSArray xmi:id="11755" elements="5606"/><cas:FSArray xmi:id="1060" elements="6201"/><cas:FSArray xmi:id="789" elements="8750"/><cas:FSArray xmi:id="5738" elements="4087"/><cas:FSArray xmi:id="11736" elements="10118"/><cas:FSArray xmi:id="7361" elements="4504"/><cas:FSArray xmi:id="1716" elements="12024"/><cas:FSArray xmi:id="1279" elements="6254"/><cas:FSArray xmi:id="11302" elements="8041"/><cas:FSArray xmi:id="4803" elements="8423"/><cas:FSArray xmi:id="7421" elements="1019"/><cas:FSArray xmi:id="8283" elements="10933"/><cas:FSArray xmi:id="5443" elements="7630"/><cas:FSArray xmi:id="2771" elements="11183"/><cas:FSArray xmi:id="2328" elements="10905"/><cas:FSArray xmi:id="758" elements="9098"/><cas:FSArray xmi:id="9556" elements="11622"/><cas:FSArray xmi:id="7882" elements="5405"/><cas:FSArray xmi:id="2908" elements="4675"/><cas:FSArray xmi:id="10855" elements="5655"/><cas:FSArray xmi:id="3578" elements="6812"/><cas:FSArray xmi:id="10336" elements="9577"/><cas:FSArray xmi:id="12341" elements="4979"/><cas:FSArray xmi:id="10222" elements="5858"/><cas:FSArray xmi:id="3514" elements="6758"/><cas:FSArray xmi:id="2350" elements="4457"/><cas:FSArray xmi:id="9171" elements="6749"/><cas:FSArray xmi:id="12117" elements="1402"/><cas:FSArray xmi:id="8074" elements="1069"/><cas:FSArray xmi:id="1697" elements="7067"/><cas:FSArray xmi:id="12344" elements="5948"/><cas:FSArray xmi:id="1307" elements="3104"/><cas:FSArray xmi:id="3394" elements="3336"/><cas:FSArray xmi:id="927" elements="11382"/><cas:FSArray xmi:id="9365" elements="9498"/><cas:FSArray xmi:id="10016" elements="2817"/><cas:FSArray xmi:id="7851" elements="9392"/><cas:FSArray xmi:id="6272" elements="2653"/><cas:FSArray xmi:id="679" elements="11171"/><cas:FSArray xmi:id="11608" elements="7492"/><cas:FSArray xmi:id="5867" elements="11997"/><cas:FSArray xmi:id="11955" elements="5198"/><cas:FSArray xmi:id="4423" elements="10339"/><cas:FSArray xmi:id="8249" elements="8403"/><cas:FSArray xmi:id="4408" elements="12201"/><cas:FSArray xmi:id="864" elements="10136"/><cas:FSArray xmi:id="10581" elements="6386"/><cas:FSArray xmi:id="10203" elements="9127"/><cas:FSArray xmi:id="1237" elements="12279"/><cas:FSArray xmi:id="3684" elements="5591"/><cas:FSArray xmi:id="9095" elements="12192"/><cas:FSArray xmi:id="10584" elements="10798"/><cas:FSArray xmi:id="11290" elements="10480"/><cas:FSArray xmi:id="2537" elements="7715"/><cas:FSArray xmi:id="5600" elements="8823"/><cas:FSArray xmi:id="7987" elements="4385"/><cas:FSArray xmi:id="7454" elements="729"/><cas:FSArray xmi:id="7650" elements="1001"/><cas:FSArray xmi:id="7924" elements="5696"/><cas:FSArray xmi:id="3027" elements="6436"/><cas:FSArray xmi:id="7897" elements="7024"/><cas:FSArray xmi:id="12114" elements="2799"/><cas:FSArray xmi:id="5402" elements="12316"/><cas:FSArray xmi:id="7092" elements="3731"/><cas:FSArray xmi:id="1411" elements="7340"/><cas:FSArray xmi:id="1628" elements="10810"/><cas:FSArray xmi:id="8255" elements="5050"/><cas:FSArray xmi:id="10942" elements="3245"/><cas:FSArray xmi:id="2054" elements="1900"/><cas:FSArray xmi:id="8286" elements="4756"/><cas:FSArray xmi:id="2331" elements="8563"/><cas:FSArray xmi:id="986" elements="4988"/><cas:FSArray xmi:id="6821" elements="11966"/><cas:FSArray xmi:id="10492" elements="11916"/><cas:FSArray xmi:id="4036" elements="2808"/><cas:FSArray xmi:id="4394" elements="949"/><cas:FSArray xmi:id="3952" elements="2381"/><cas:FSArray xmi:id="11200" elements="10178"/><cas:FSArray xmi:id="10348" elements="7349"/><cas:FSArray xmi:id="7639" elements="12291"/><cas:FSArray xmi:id="5232" elements="4162"/><cas:FSArray xmi:id="9265" elements="7653"/><cas:FSArray xmi:id="1719" elements="3236"/><cas:FSArray xmi:id="2325" elements="777"/><cas:FSArray xmi:id="8325" elements="8432"/><cas:FSArray xmi:id="6501" elements="4997"/><cas:FSArray xmi:id="3164" elements="11679"/><cas:FSArray xmi:id="5987" elements="1619"/><cas:FSArray xmi:id="3778" elements="11547"/><cas:FSArray xmi:id="11925" elements="11203"/><cas:FSArray xmi:id="11611" elements="2092"/><cas:FSArray xmi:id="11376" elements="1585"/><cas:FSArray xmi:id="9018" elements="9883"/><cas:FSArray xmi:id="6792" elements="4341"/><cas:FSArray xmi:id="8316" elements="615"/><cas:FSArray xmi:id="9594" elements="6824"/><cas:FSArray xmi:id="5151" elements="10038"/><cas:FSArray xmi:id="8731" elements="10275"/><cas:FSArray xmi:id="11180" elements="1957"/><cas:FSArray xmi:id="3030" elements="11416"/><cas:FSArray xmi:id="3766" elements="3971"/><cas:FSArray xmi:id="7894" elements="5461"/><cas:FSArray xmi:id="7166" elements="5573"/><cas:FSArray xmi:id="2121" elements="10738"/><cas:FSArray xmi:id="9783" elements="2601"/><cas:FSArray xmi:id="7821" elements="682"/><cas:FSArray xmi:id="2408" elements="7999"/><cas:FSArray xmi:id="4565" elements="9021"/><cas:FSArray xmi:id="6015" elements="6515"/><cas:FSArray xmi:id="5187" elements="10351"/><cas:FSArray xmi:id="6283" elements="9841"/><cas:FSArray xmi:id="1174" elements="10161"/><cas:FSArray xmi:id="10019" elements="11907"/><cas:FSArray xmi:id="3980" elements="3033"/><cas:FSArray xmi:id="10392" elements="8112"/><cas:FSArray xmi:id="1881" elements="1282"/><cas:FSArray xmi:id="9895" elements="10327"/><cas:View sofa="431" members="1 9 18 86 225 297 306 315 324 333 440 489 27 38 49 60 95 106 117 128 179 258 269 367 398 409 420 449 71 139 151 355 498 163 215 462 190 198 280 472 234 526 582 534 542 590 558 510 574 550 566 518 6294 4545 5223 6162 12006 5334 9615 1864 1010 10564 11943 4105 11890 12431 2275 11005 3353 8121 12015 3095 3569 7951 2899 6783 5299 3703 11495 11114 974 5895 7670 9942 8505 855 3740 4426 2012 8759 3520 10822 10455 2625 10047 7573 8274 6411 3877 12065 12234 11391 7746 9597 11928 6673 5059 6303 10671 11803 5582 5792 5904 6907 11794 2390 10956 11332 12243 2186 7220 3943 5317 11358 5308 9523 598 3781 11349 9874 867 1267 2951 4411 4962 11093 8884 8650 3814 6603 7679 876 1228 6620 7138 9810 6484 10089 4350 10127 10874 7058 4655 2592 3216 7854 10242 3986 1334 10225 5154 5521 7885 10398 11599 10101 3757 3270 6448 3077 11057 10729 7990 11634 11367 9006 4359 4276 1610 2689 4684 2399 8893 5434 2874 11646 2556 2300 9253 3621 7380 10587 910 3446 9568 9898 2021 4693 5849 10538 3769 11105 10547 12139 3413 6833 7328 9726 1782 9674 9606 9465 5452 2616 1497 2460 3995 643 9118 2774 3191 6551 7780 5998 3086 738 9627 4376 824 2730 1672 11293 11777 2488 9999 5038 1362 2356 1506 6355 4096 5425 1371 5713 1722 6795 6898 6972 6263 6656 9046 9220 989 9559 5886 3902 2101 8867 8224 10680 4646 8784 8032 885 7771 5606 6201 8750 4087 10118 4504 12024 6254 8041 8423 1019 10933 7630 11183 10905 9098 11622 5405 4675 5655 6812 9577 4979 5858 6758 4457 6749 1402 1069 7067 5948 3104 3336 11382 9498 2817 9392 2653 11171 7492 11997 5198 10339 8403 12201 10136 6386 9127 12279 5591 12192 10798 10480 7715 8823 4385 729 1001 5696 6436 7024 2799 12316 3731 7340 10810 5050 3245 1900 4756 8563 4988 11966 11916 2808 949 2381 10178 7349 12291 4162 7653 3236 777 8432 4997 11679 1619 11547 11203 2092 1585 9883 4341 615 6824 10038 10275 1957 11416 3971 5461 5573 10738 2601 682 7999 9021 6515 10351 9841 10161 11907 3033 8112 1282 10327 6476 3749 11958 10615 3069 9532 11704 6138 938 9357 11786 2076 1346 6171 6916 7590 10847 10170 8768 4235 6286 7372 11341 2284 3581 8024 11049 8050 9490 7662 3823 11899 2747 2167 4862 2175 9457 5326 8465 8599 11528 2662 11671 4838 5022 10284 5275 4397 7582 3850 2540 10008 6725 8812 10495 691 10110 7446 11747 8832 6347 4603 6981 12090 11739 6701 6034 3806 6042 10447 5940 10639 9775 9449 11536 9718 10965 2584 8165 11556 7269 9751 5103 1310 8776 1028 9850 3295 5990 11212 607 6082 1925 7960 4554 9586 1791 2417 8536 9384 6665 3438 10556 7196 10251 7688 919 6612 10831 10989 7127 9212 10779 1708 4932 8008 11882 11260 8856 3225 1949 2642 2681 11305 11014 11236 10839 4592 11614 2739 4846 12210 8395 10948 11066 10423 8591 1700 9330 750 9759 5470 11975 12440 7050 10997 9087 5557 1689 5705 2113 9284 5111 1036 2038 7185 9802 652 7438 6804 6948 7824 1972 2826 1259 11874 11989 2634 8958 2548 12173 6007 1430 4079 1326 10022 3345 5478 1873 2030 7642 8368 3545 6595 7042 5841 3061 5030 5647 4466 2084 9858 1251 5163 7541 12448 3790 4243 10689 12181 4971 7813 5833 4584 9907 11425 5549 12356 5976 6629 9959 7549 10771 2068 3487 2513 1731 1489 9767 8328 3200 6874 3798 10747 9951 9507 8181 6493 3638 7900 3455 1354 4071 4138 2292 5375 8876 2057 8918 4227 9866 8173 4490 4664 632 10030 966 3676 5513 10065 4854 8457 10787 9915 1631 3630 10573 1291 11712 2211 3132 1856 8412 5913 6507 1681 4114 8723 2943 1380 8016 1658 3167 8292 1758 5190 6709 5565 4333 4368 9245 5631 9030 7501 3479 2046 6540 3140 2251 12268 930 3208 3306 1243 5417 11812 10663 7229 1318 11192 2673 1388 6275 9515 11022 5502 7789 624 2850 6924 2127 9110 7927 958 8610 5235 4905 1299 3646 11468 4171 7364 5639 6717 7427 699 10234 8982 9038 11504 10292 3911 5801 4146 5359 8189 9735 3529 7509 6576 8208 7755 2444 2755 3148 7935 3861 7525 2259 1569 4301 10431 12074 12415 9474 9658 10360 9983 4211 4179 4806 5957 3008 663 2521 10259 3831 4940 2960 9967 2151 5171 10073 8077 5119 8441 8093 5722 8058 2309 1212 9229 1158 3254 5664 1909 2783 10376 2219 3495 6420 1414 10623 7095 8352 839 2568 9055 4737 10599 8258 1594 11139 9699 6066 1553 3715 5680 4822 7237 4438 7557 2334 6235 4317 2858 1196 4718 9923 1473 7863 5135 9071 3660 4702 3422 11564 3175 9368 6312 4614 3116 6682 6090 4787 9401 1044 6106 8796 9338 7150 10407 8233 7968 12300 4513 7253 2834 7476 9292 4039 2195 4873 1180 1933 5283 8376 3553 6370 4004 11433 2927 4529 7204 7169 8691 3886 2135 2976 6018 10973 11220 10858 3279 1078 4474 7405 11655 10697 12459 4765 7835 1980 12151 7277 8675 1884 8926 2425 11033 10519 4568 3687 9683 12364 9190 9825 11836 2714 7699 9268 11123 5259 2698 1537 2235 713 808 7008 6640 5068 11852 6213 8840 12380 8473 11155 5383 11244 8707 7389 7614 1094 7309 11720 6992 11580 5615 9314 9786 4913 4122 4020 6050 10308 6179 5207 8489 5530 1742 6956 6122 12396 2472 12098 894 5776 3362 10713 6560 10647 11074 2497 8517 1515 10755 8544 8942 8133 10889 11758 2992 6460 12325 5486 6842 8734 12033 10464 1821 1110 12218 9152 12049 1142 12120 3045 3955 9136 4257 9540 11688 1996 6524 5006 1802 6331 5741 7457 4889 3397 2883 6858 8300 10145 8990 9174 2365 10503 11316 1438 8575 8618 9639 5757 7076 1454 2911 1766 7111 1639 7724 7598 1837 8634 8149 5343 6146 4195 3589 6767 11268 761 10206 11449 4285 10187 11820 11479 3378 3927 5921 5870 7908 6395 8902 5087 6733 4055 3317 3463 5243 12252 8966 9433 7293 11512 10914 7797 11400 8659 4630 792 3605 9417 6932 6882 1126 8336 5817 12478 12483"/></xmi:XMI>
\ No newline at end of file
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore"
+         xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore"
+         xmlns:ext="http:///de/julielab/jcore/types/ext.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore"
+         xmi:version="2.0"><cas:NULL xmi:id="0"/><types:AbstractText xmi:id="1" sofa="431" begin="125" end="1859"/><types:Chemical xmi:id="9" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Purinones"/><types:Chemical xmi:id="18" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Vasodilator Agents"/><types:Chemical xmi:id="86" sofa="431" begin="0" end="0" registryNumber="76898-47-0" nameOfSubstance="15-Hydroxy-11 alpha,9 alpha-(epoxymethano)prosta-5,13-dienoic Acid"/><types:Chemical xmi:id="225" sofa="431" begin="0" end="0" registryNumber="EC 4.6.1.2" nameOfSubstance="Guanylate Cyclase"/><types:Chemical xmi:id="297" sofa="431" begin="0" end="0" registryNumber="G59M7S0WS3" nameOfSubstance="Nitroglycerin"/><types:Chemical xmi:id="306" sofa="431" begin="0" end="0" registryNumber="T42P99266K" nameOfSubstance="Methylene Blue"/><types:Chemical xmi:id="315" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Nitro Compounds"/><types:Chemical xmi:id="324" sofa="431" begin="0" end="0" registryNumber="0" nameOfSubstance="Prostaglandin Endoperoxides, Synthetic"/><types:Chemical xmi:id="333" sofa="431" begin="0" end="0" registryNumber="92454-60-9" nameOfSubstance="FK 409"/><types:Chemical xmi:id="440" sofa="431" begin="0" end="0" registryNumber="EC 3.1.4.35" nameOfSubstance="3',5'-Cyclic-GMP Phosphodiesterases"/><types:Chemical xmi:id="489" sofa="431" begin="0" end="0" registryNumber="GXT25D5DS0" nameOfSubstance="zaprinast"/><types:MeshHeading xmi:id="27" sofa="431" begin="0" end="0" descriptorName="Nitroglycerin" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="38" sofa="431" begin="0" end="0" descriptorName="In Vitro Techniques" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="49" sofa="431" begin="0" end="0" descriptorName="Methylene Blue" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="60" sofa="431" begin="0" end="0" descriptorName="3',5'-Cyclic-GMP Phosphodiesterases" qualifierName="antagonists &amp; inhibitors" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="95" sofa="431" begin="0" end="0" descriptorName="Coronary Vessels" qualifierName="drug effects" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="106" sofa="431" begin="0" end="0" descriptorName="Purinones" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="117" sofa="431" begin="0" end="0" descriptorName="Prostaglandin Endoperoxides, Synthetic" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="128" sofa="431" begin="0" end="0" descriptorName="Nitro Compounds" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="179" sofa="431" begin="0" end="0" descriptorName="Vasodilator Agents" qualifierName="pharmacology" descriptorNameMajorTopic="false" qualifierNameMajorTopic="true"/><types:MeshHeading xmi:id="258" sofa="431" begin="0" end="0" descriptorName="Dogs" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="269" sofa="431" begin="0" end="0" descriptorName="Muscle Relaxation" qualifierName="drug effects" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="367" sofa="431" begin="0" end="0" descriptorName="Male" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="398" sofa="431" begin="0" end="0" descriptorName="15-Hydroxy-11 alpha,9 alpha-(epoxymethano)prosta-5,13-dienoic Acid" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="409" sofa="431" begin="0" end="0" descriptorName="Guanylate Cyclase" qualifierName="antagonists &amp; inhibitors" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="420" sofa="431" begin="0" end="0" descriptorName="Female" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><types:MeshHeading xmi:id="449" sofa="431" begin="0" end="0" descriptorName="Animals" descriptorNameMajorTopic="false" qualifierNameMajorTopic="false"/><pubmed:ManualDescriptor xmi:id="71" sofa="431" begin="0" end="0" meSHList="380" chemicalList="342" dBInfoList="460" keywordList="438" geneSymbolList=""/><types:AuthorInfo xmi:id="139" sofa="431" begin="0" end="0" foreName="N" lastName="Taira" initials="N"/><types:AuthorInfo xmi:id="151" sofa="431" begin="0" end="0" foreName="H" affiliation="Department of Pharmacology, Tohoku University School of Medicine, Sendai, Japan." lastName="Yamada" initials="H"/><types:AuthorInfo xmi:id="355" sofa="431" begin="0" end="0" foreName="F" lastName="Yoneyama" initials="F"/><types:AuthorInfo xmi:id="498" sofa="431" begin="0" end="0" foreName="K" lastName="Satoh" initials="K"/><types:Date xmi:id="163" sofa="431" begin="0" end="0" day="0" month="7" year="1991"/><types:Date xmi:id="215" sofa="431" begin="0" end="0" day="0" month="7" year="1991"/><types:Date xmi:id="462" sofa="431" begin="0" end="0" day="0" month="7" year="1991"/><types:Title xmi:id="190" sofa="431" begin="0" end="124" titleType="document"/><types:Journal xmi:id="198" sofa="431" begin="0" end="0" name="Comparative Study" pubDate="163" ISSN="0007-1188" volume="103" title="British journal of pharmacology" shortTitle="Br J Pharmacol" issue="3" pages="1713-8" nlmId="7502536"/><types:Journal xmi:id="280" sofa="431" begin="0" end="0" name="Journal Article" pubDate="462" ISSN="0007-1188" volume="103" title="British journal of pharmacology" shortTitle="Br J Pharmacol" issue="3" pages="1713-8" nlmId="7502536"/><types:Journal xmi:id="472" sofa="431" begin="0" end="0" name="Research Support, Non-U.S. Gov't" pubDate="215" ISSN="0007-1188" volume="103" title="British journal of pharmacology" shortTitle="Br J Pharmacol" issue="3" pages="1713-8" nlmId="7502536"/><pubmed:Header xmi:id="234" sofa="431" begin="0" end="0" source="1681975" docId="1681975" truncated="false" authors="173" pubTypeList="198 280 472" language="eng" citationStatus="MEDLINE" otherIDs="378"/><types:Sentence xmi:id="526" sofa="431" begin="0" end="124" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="582" sofa="431" begin="128" end="345" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="534" sofa="431" begin="349" end="461" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="542" sofa="431" begin="462" end="556" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="590" sofa="431" begin="560" end="745" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="558" sofa="431" begin="746" end="819" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="510" sofa="431" begin="823" end="1106" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="574" sofa="431" begin="1110" end="1330" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="550" sofa="431" begin="1331" end="1500" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="566" sofa="431" begin="1504" end="1696" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="518" sofa="431" begin="1697" end="1859" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:PennBioIEPOSTag xmi:id="6294" sofa="431" begin="0" end="10" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4545" sofa="431" begin="11" end="13" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5223" sofa="431" begin="14" end="17" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="6162" sofa="431" begin="18" end="25" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="12006" sofa="431" begin="26" end="28" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5334" sofa="431" begin="29" end="32" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="9615" sofa="431" begin="33" end="38" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="1864" sofa="431" begin="39" end="50" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1010" sofa="431" begin="51" end="56" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10564" sofa="431" begin="57" end="61" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11943" sofa="431" begin="62" end="67" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="4105" sofa="431" begin="68" end="70" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11890" sofa="431" begin="71" end="84" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12431" sofa="431" begin="85" end="87" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="2275" sofa="431" begin="88" end="96" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="11005" sofa="431" begin="97" end="105" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3353" sofa="431" begin="106" end="112" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8121" sofa="431" begin="113" end="115" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="12015" sofa="431" begin="116" end="119" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="3095" sofa="431" begin="120" end="123" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3569" sofa="431" begin="123" end="124" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="7951" sofa="431" begin="128" end="131" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="2899" sofa="431" begin="132" end="144" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="6783" sofa="431" begin="145" end="152" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="5299" sofa="431" begin="153" end="155" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3703" sofa="431" begin="156" end="161" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11495" sofa="431" begin="161" end="162" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="11114" sofa="431" begin="163" end="164" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="974" sofa="431" begin="165" end="168" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="5895" sofa="431" begin="169" end="185" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7670" sofa="431" begin="186" end="197" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="9942" sofa="431" begin="198" end="202" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8505" sofa="431" begin="203" end="204" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="855" sofa="431" begin="205" end="214" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3740" sofa="431" begin="215" end="222" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4426" sofa="431" begin="222" end="223" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="2012" sofa="431" begin="224" end="228" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="8759" sofa="431" begin="229" end="237" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="3520" sofa="431" begin="238" end="242" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10822" sofa="431" begin="243" end="248" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="10455" sofa="431" begin="249" end="251" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="2625" sofa="431" begin="252" end="265" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10047" sofa="431" begin="266" end="268" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="7573" sofa="431" begin="269" end="277" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="8274" sofa="431" begin="278" end="286" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="6411" sofa="431" begin="287" end="293" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3877" sofa="431" begin="294" end="299" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="12065" sofa="431" begin="300" end="302" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="12234" sofa="431" begin="303" end="306" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="11391" sofa="431" begin="307" end="310" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7746" sofa="431" begin="311" end="321" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="9597" sofa="431" begin="322" end="326" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11928" sofa="431" begin="327" end="333" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6673" sofa="431" begin="334" end="335" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="5059" sofa="431" begin="335" end="341" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="6303" sofa="431" begin="342" end="343" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10671" sofa="431" begin="343" end="344" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="11803" sofa="431" begin="344" end="345" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="5582" sofa="431" begin="349" end="354" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5792" sofa="431" begin="355" end="356" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="5904" sofa="431" begin="356" end="362" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="6907" sofa="431" begin="362" end="363" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="11794" sofa="431" begin="363" end="364" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2390" sofa="431" begin="364" end="370" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="10956" sofa="431" begin="371" end="372" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11332" sofa="431" begin="372" end="373" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="12243" sofa="431" begin="374" end="377" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="2186" sofa="431" begin="378" end="391" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7220" sofa="431" begin="392" end="393" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="3943" sofa="431" begin="393" end="398" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5317" sofa="431" begin="398" end="399" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="11358" sofa="431" begin="399" end="400" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="5308" sofa="431" begin="400" end="406" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9523" sofa="431" begin="407" end="408" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="598" sofa="431" begin="408" end="409" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="3781" sofa="431" begin="410" end="414" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="11349" sofa="431" begin="415" end="423" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="9874" sofa="431" begin="424" end="425" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="867" sofa="431" begin="426" end="439" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1267" sofa="431" begin="439" end="440" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2951" sofa="431" begin="440" end="449" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4411" sofa="431" begin="450" end="460" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4962" sofa="431" begin="460" end="461" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="11093" sofa="431" begin="462" end="472" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8884" sofa="431" begin="473" end="475" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8650" sofa="431" begin="476" end="480" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3814" sofa="431" begin="481" end="487" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="6603" sofa="431" begin="488" end="494" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="7679" sofa="431" begin="495" end="499" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="876" sofa="431" begin="500" end="505" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1228" sofa="431" begin="506" end="509" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="6620" sofa="431" begin="510" end="515" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="7138" sofa="431" begin="516" end="518" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9810" sofa="431" begin="519" end="524" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="6484" sofa="431" begin="525" end="529" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RBR"/><types:PennBioIEPOSTag xmi:id="10089" sofa="431" begin="530" end="536" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4350" sofa="431" begin="537" end="541" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10127" sofa="431" begin="542" end="555" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10874" sofa="431" begin="555" end="556" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="7058" sofa="431" begin="560" end="570" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4655" sofa="431" begin="571" end="585" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="2592" sofa="431" begin="586" end="588" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3216" sofa="431" begin="589" end="602" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7854" sofa="431" begin="603" end="604" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="10242" sofa="431" begin="604" end="610" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="3986" sofa="431" begin="611" end="612" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1334" sofa="431" begin="612" end="613" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="10225" sofa="431" begin="614" end="617" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="5154" sofa="431" begin="618" end="623" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5521" sofa="431" begin="624" end="625" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7885" sofa="431" begin="625" end="626" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="10398" sofa="431" begin="627" end="628" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="SYM"/><types:PennBioIEPOSTag xmi:id="11599" sofa="431" begin="629" end="635" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="10101" sofa="431" begin="636" end="637" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3757" sofa="431" begin="637" end="638" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="3270" sofa="431" begin="639" end="647" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="6448" sofa="431" begin="648" end="657" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3077" sofa="431" begin="658" end="670" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11057" sofa="431" begin="671" end="684" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10729" sofa="431" begin="685" end="686" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7990" sofa="431" begin="686" end="692" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11634" sofa="431" begin="693" end="696" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11367" sofa="431" begin="696" end="697" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9006" sofa="431" begin="698" end="704" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="4359" sofa="431" begin="704" end="705" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4276" sofa="431" begin="706" end="713" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="1610" sofa="431" begin="714" end="724" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="2689" sofa="431" begin="725" end="729" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="4684" sofa="431" begin="730" end="744" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="2399" sofa="431" begin="744" end="745" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="8893" sofa="431" begin="746" end="755" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5434" sofa="431" begin="756" end="768" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="2874" sofa="431" begin="769" end="782" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11646" sofa="431" begin="783" end="784" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="2556" sofa="431" begin="784" end="790" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="2300" sofa="431" begin="791" end="794" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9253" sofa="431" begin="794" end="795" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="3621" sofa="431" begin="796" end="802" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="7380" sofa="431" begin="803" end="807" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="10587" sofa="431" begin="808" end="818" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="910" sofa="431" begin="818" end="819" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="3446" sofa="431" begin="823" end="826" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="9568" sofa="431" begin="827" end="840" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9898" sofa="431" begin="840" end="841" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2021" sofa="431" begin="841" end="851" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4693" sofa="431" begin="852" end="858" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="5849" sofa="431" begin="859" end="862" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10538" sofa="431" begin="863" end="876" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3769" sofa="431" begin="877" end="880" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="11105" sofa="431" begin="881" end="886" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10547" sofa="431" begin="887" end="891" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="12139" sofa="431" begin="892" end="899" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="3413" sofa="431" begin="900" end="902" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="6833" sofa="431" begin="903" end="906" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="7328" sofa="431" begin="907" end="912" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="9726" sofa="431" begin="913" end="915" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="1782" sofa="431" begin="916" end="925" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9674" sofa="431" begin="926" end="930" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="9606" sofa="431" begin="931" end="932" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9465" sofa="431" begin="932" end="933" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5452" sofa="431" begin="934" end="935" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="SYM"/><types:PennBioIEPOSTag xmi:id="2616" sofa="431" begin="936" end="942" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="1497" sofa="431" begin="943" end="944" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="2460" sofa="431" begin="945" end="948" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="3995" sofa="431" begin="949" end="955" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="643" sofa="431" begin="956" end="957" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9118" sofa="431" begin="957" end="958" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="2774" sofa="431" begin="958" end="959" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="3191" sofa="431" begin="960" end="962" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="6551" sofa="431" begin="963" end="972" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7780" sofa="431" begin="973" end="975" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5998" sofa="431" begin="976" end="983" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3086" sofa="431" begin="984" end="993" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="738" sofa="431" begin="994" end="1001" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9627" sofa="431" begin="1001" end="1002" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4376" sofa="431" begin="1003" end="1006" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="824" sofa="431" begin="1007" end="1009" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="2730" sofa="431" begin="1010" end="1013" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1672" sofa="431" begin="1014" end="1018" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11293" sofa="431" begin="1019" end="1021" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11777" sofa="431" begin="1022" end="1031" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="2488" sofa="431" begin="1032" end="1033" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9999" sofa="431" begin="1033" end="1034" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5038" sofa="431" begin="1035" end="1036" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="SYM"/><types:PennBioIEPOSTag xmi:id="1362" sofa="431" begin="1037" end="1043" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="2356" sofa="431" begin="1044" end="1045" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="1506" sofa="431" begin="1046" end="1049" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="6355" sofa="431" begin="1050" end="1056" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="4096" sofa="431" begin="1057" end="1058" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5425" sofa="431" begin="1058" end="1059" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="1371" sofa="431" begin="1059" end="1060" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="5713" sofa="431" begin="1061" end="1063" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1722" sofa="431" begin="1064" end="1073" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6795" sofa="431" begin="1074" end="1076" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="6898" sofa="431" begin="1077" end="1083" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="6972" sofa="431" begin="1084" end="1087" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6263" sofa="431" begin="1088" end="1105" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6656" sofa="431" begin="1105" end="1106" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="9046" sofa="431" begin="1110" end="1115" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9220" sofa="431" begin="1116" end="1124" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="989" sofa="431" begin="1125" end="1127" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9559" sofa="431" begin="1128" end="1136" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="5886" sofa="431" begin="1137" end="1145" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="3902" sofa="431" begin="1146" end="1148" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="2101" sofa="431" begin="1149" end="1152" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="8867" sofa="431" begin="1153" end="1162" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="8224" sofa="431" begin="1162" end="1163" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10680" sofa="431" begin="1163" end="1172" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4646" sofa="431" begin="1173" end="1186" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8784" sofa="431" begin="1187" end="1189" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8032" sofa="431" begin="1190" end="1203" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="885" sofa="431" begin="1204" end="1205" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7771" sofa="431" begin="1205" end="1211" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="5606" sofa="431" begin="1212" end="1213" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6201" sofa="431" begin="1213" end="1214" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="8750" sofa="431" begin="1214" end="1215" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4087" sofa="431" begin="1216" end="1219" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="10118" sofa="431" begin="1220" end="1224" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4504" sofa="431" begin="1225" end="1229" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12024" sofa="431" begin="1230" end="1235" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="6254" sofa="431" begin="1236" end="1238" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8041" sofa="431" begin="1239" end="1244" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8423" sofa="431" begin="1245" end="1248" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="1019" sofa="431" begin="1249" end="1252" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="10933" sofa="431" begin="1253" end="1259" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VB"/><types:PennBioIEPOSTag xmi:id="7630" sofa="431" begin="1260" end="1273" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="11183" sofa="431" begin="1273" end="1274" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="10905" sofa="431" begin="1275" end="1283" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9098" sofa="431" begin="1284" end="1288" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="11622" sofa="431" begin="1289" end="1291" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="5405" sofa="431" begin="1292" end="1305" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4675" sofa="431" begin="1306" end="1315" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="5655" sofa="431" begin="1316" end="1321" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="6812" sofa="431" begin="1322" end="1324" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9577" sofa="431" begin="1325" end="1329" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="4979" sofa="431" begin="1329" end="1330" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="5858" sofa="431" begin="1331" end="1336" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="6758" sofa="431" begin="1337" end="1345" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4457" sofa="431" begin="1346" end="1348" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="6749" sofa="431" begin="1349" end="1352" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1402" sofa="431" begin="1353" end="1362" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="1069" sofa="431" begin="1362" end="1363" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="7067" sofa="431" begin="1363" end="1372" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="5948" sofa="431" begin="1373" end="1386" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3104" sofa="431" begin="1387" end="1389" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3336" sofa="431" begin="1390" end="1395" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11382" sofa="431" begin="1396" end="1397" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9498" sofa="431" begin="1397" end="1403" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="2817" sofa="431" begin="1404" end="1405" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="9392" sofa="431" begin="1405" end="1406" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="2653" sofa="431" begin="1406" end="1407" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="11171" sofa="431" begin="1408" end="1411" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="7492" sofa="431" begin="1412" end="1416" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11997" sofa="431" begin="1417" end="1421" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="5198" sofa="431" begin="1422" end="1427" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10339" sofa="431" begin="1428" end="1430" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8403" sofa="431" begin="1431" end="1436" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12201" sofa="431" begin="1437" end="1446" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="10136" sofa="431" begin="1447" end="1452" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="6386" sofa="431" begin="1453" end="1454" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="9127" sofa="431" begin="1455" end="1459" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="12279" sofa="431" begin="1460" end="1463" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="5591" sofa="431" begin="1464" end="1468" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="12192" sofa="431" begin="1469" end="1471" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="10798" sofa="431" begin="1472" end="1485" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10480" sofa="431" begin="1486" end="1491" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="7715" sofa="431" begin="1492" end="1494" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="8823" sofa="431" begin="1495" end="1499" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="4385" sofa="431" begin="1499" end="1500" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="729" sofa="431" begin="1504" end="1509" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1001" sofa="431" begin="1510" end="1517" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="5696" sofa="431" begin="1518" end="1525" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBP"/><types:PennBioIEPOSTag xmi:id="6436" sofa="431" begin="1526" end="1530" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="7024" sofa="431" begin="1531" end="1534" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="2799" sofa="431" begin="1535" end="1547" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="12316" sofa="431" begin="1548" end="1554" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3731" sofa="431" begin="1555" end="1557" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="7340" sofa="431" begin="1558" end="1563" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="10810" sofa="431" begin="1563" end="1564" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="5050" sofa="431" begin="1565" end="1569" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3245" sofa="431" begin="1570" end="1574" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="1900" sofa="431" begin="1575" end="1577" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="4756" sofa="431" begin="1578" end="1591" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="8563" sofa="431" begin="1591" end="1592" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="4988" sofa="431" begin="1593" end="1595" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBZ"/><types:PennBioIEPOSTag xmi:id="11966" sofa="431" begin="1596" end="1599" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="11916" sofa="431" begin="1600" end="1602" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="2808" sofa="431" begin="1603" end="1613" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="949" sofa="431" begin="1614" end="1616" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="2381" sofa="431" begin="1617" end="1624" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="10178" sofa="431" begin="1625" end="1634" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7349" sofa="431" begin="1635" end="1642" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="12291" sofa="431" begin="1643" end="1646" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="4162" sofa="431" begin="1647" end="1648" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="7653" sofa="431" begin="1649" end="1658" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="3236" sofa="431" begin="1659" end="1667" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="777" sofa="431" begin="1668" end="1670" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="8432" sofa="431" begin="1671" end="1684" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="4997" sofa="431" begin="1685" end="1691" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="11679" sofa="431" begin="1692" end="1695" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="1619" sofa="431" begin="1695" end="1696" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:PennBioIEPOSTag xmi:id="11547" sofa="431" begin="1697" end="1704" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="11203" sofa="431" begin="1704" end="1705" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="2092" sofa="431" begin="1706" end="1714" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBN"/><types:PennBioIEPOSTag xmi:id="1585" sofa="431" begin="1715" end="1719" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="9883" sofa="431" begin="1720" end="1733" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="4341" sofa="431" begin="1733" end="1734" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value=","/><types:PennBioIEPOSTag xmi:id="615" sofa="431" begin="1735" end="1740" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="EX"/><types:PennBioIEPOSTag xmi:id="6824" sofa="431" begin="1741" end="1744" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="VBD"/><types:PennBioIEPOSTag xmi:id="10038" sofa="431" begin="1745" end="1749" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJR"/><types:PennBioIEPOSTag xmi:id="10275" sofa="431" begin="1750" end="1754" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="AFX"/><types:PennBioIEPOSTag xmi:id="1957" sofa="431" begin="1754" end="1755" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="11416" sofa="431" begin="1755" end="1764" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="3971" sofa="431" begin="1765" end="1767" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="TO"/><types:PennBioIEPOSTag xmi:id="5461" sofa="431" begin="1768" end="1771" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="5573" sofa="431" begin="1772" end="1780" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="10738" sofa="431" begin="1781" end="1788" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="2601" sofa="431" begin="1789" end="1791" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="682" sofa="431" begin="1792" end="1797" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="7999" sofa="431" begin="1798" end="1801" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CC"/><types:PennBioIEPOSTag xmi:id="9021" sofa="431" begin="1802" end="1812" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="RB"/><types:PennBioIEPOSTag xmi:id="6515" sofa="431" begin="1813" end="1819" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="JJ"/><types:PennBioIEPOSTag xmi:id="10351" sofa="431" begin="1820" end="1825" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="AFX"/><types:PennBioIEPOSTag xmi:id="9841" sofa="431" begin="1825" end="1826" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10161" sofa="431" begin="1826" end="1835" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NN"/><types:PennBioIEPOSTag xmi:id="11907" sofa="431" begin="1836" end="1843" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="IN"/><types:PennBioIEPOSTag xmi:id="3033" sofa="431" begin="1844" end="1847" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="DT"/><types:PennBioIEPOSTag xmi:id="8112" sofa="431" begin="1848" end="1851" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="CD"/><types:PennBioIEPOSTag xmi:id="1282" sofa="431" begin="1852" end="1858" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="NNS"/><types:PennBioIEPOSTag xmi:id="10327" sofa="431" begin="1858" end="1859" componentId="de.julielab.jcore.ae.opennlp.postag.PosTagAnnotator" value="."/><types:Lemma xmi:id="6476" sofa="431" begin="0" end="10" value="comparison"/><types:Lemma xmi:id="3749" sofa="431" begin="11" end="13" value="of"/><types:Lemma xmi:id="11958" sofa="431" begin="14" end="17" value="the"/><types:Lemma xmi:id="10615" sofa="431" begin="18" end="25" value="effect"/><types:Lemma xmi:id="3069" sofa="431" begin="26" end="28" value="of"/><types:Lemma xmi:id="9532" sofa="431" begin="29" end="32" value="the"/><types:Lemma xmi:id="11704" sofa="431" begin="33" end="38" value="novel"/><types:Lemma xmi:id="6138" sofa="431" begin="39" end="50" value="vasodilator"/><types:Lemma xmi:id="938" sofa="431" begin="51" end="56" value="FK409"/><types:Lemma xmi:id="9357" sofa="431" begin="57" end="61" value="with"/><types:Lemma xmi:id="11786" sofa="431" begin="62" end="67" value="those"/><types:Lemma xmi:id="2076" sofa="431" begin="68" end="70" value="of"/><types:Lemma xmi:id="1346" sofa="431" begin="71" end="84" value="nitroglycerin"/><types:Lemma xmi:id="6171" sofa="431" begin="85" end="87" value="in"/><types:Lemma xmi:id="6916" sofa="431" begin="88" end="96" value="isolate"/><types:Lemma xmi:id="7590" sofa="431" begin="97" end="105" value="coronary"/><types:Lemma xmi:id="10847" sofa="431" begin="106" end="112" value="artery"/><types:Lemma xmi:id="10170" sofa="431" begin="113" end="115" value="of"/><types:Lemma xmi:id="8768" sofa="431" begin="116" end="119" value="the"/><types:Lemma xmi:id="4235" sofa="431" begin="120" end="123" value="dog"/><types:Lemma xmi:id="6286" sofa="431" begin="123" end="124" value="."/><types:Lemma xmi:id="7372" sofa="431" begin="128" end="131" value="the"/><types:Lemma xmi:id="11341" sofa="431" begin="132" end="144" value="vasorelaxant"/><types:Lemma xmi:id="2284" sofa="431" begin="145" end="152" value="effect"/><types:Lemma xmi:id="3581" sofa="431" begin="153" end="155" value="of"/><types:Lemma xmi:id="8024" sofa="431" begin="156" end="161" value="FK409"/><types:Lemma xmi:id="11049" sofa="431" begin="161" end="162" value=","/><types:Lemma xmi:id="8050" sofa="431" begin="163" end="164" value="a"/><types:Lemma xmi:id="9490" sofa="431" begin="165" end="168" value="new"/><types:Lemma xmi:id="7662" sofa="431" begin="169" end="185" value="nitrovasodilator"/><types:Lemma xmi:id="3823" sofa="431" begin="186" end="197" value="synthesize"/><types:Lemma xmi:id="11899" sofa="431" begin="198" end="202" value="from"/><types:Lemma xmi:id="2747" sofa="431" begin="203" end="204" value="a"/><types:Lemma xmi:id="2167" sofa="431" begin="205" end="214" value="microbial"/><types:Lemma xmi:id="4862" sofa="431" begin="215" end="222" value="product"/><types:Lemma xmi:id="2175" sofa="431" begin="222" end="223" value=","/><types:Lemma xmi:id="9457" sofa="431" begin="224" end="228" value="be"/><types:Lemma xmi:id="5326" sofa="431" begin="229" end="237" value="compare"/><types:Lemma xmi:id="8465" sofa="431" begin="238" end="242" value="with"/><types:Lemma xmi:id="8599" sofa="431" begin="243" end="248" value="those"/><types:Lemma xmi:id="11528" sofa="431" begin="249" end="251" value="of"/><types:Lemma xmi:id="2662" sofa="431" begin="252" end="265" value="nitroglycerin"/><types:Lemma xmi:id="11671" sofa="431" begin="266" end="268" value="in"/><types:Lemma xmi:id="4838" sofa="431" begin="269" end="277" value="isolate"/><types:Lemma xmi:id="5022" sofa="431" begin="278" end="286" value="coronary"/><types:Lemma xmi:id="10284" sofa="431" begin="287" end="293" value="artery"/><types:Lemma xmi:id="5275" sofa="431" begin="294" end="299" value="ring"/><types:Lemma xmi:id="4397" sofa="431" begin="300" end="302" value="of"/><types:Lemma xmi:id="7582" sofa="431" begin="303" end="306" value="the"/><types:Lemma xmi:id="3850" sofa="431" begin="307" end="310" value="dog"/><types:Lemma xmi:id="2540" sofa="431" begin="311" end="321" value="contract"/><types:Lemma xmi:id="10008" sofa="431" begin="322" end="326" value="with"/><types:Lemma xmi:id="6725" sofa="431" begin="327" end="333" value="U46619"/><types:Lemma xmi:id="8812" sofa="431" begin="334" end="335" value="("/><types:Lemma xmi:id="10495" sofa="431" begin="335" end="341" value="10(-7)"/><types:Lemma xmi:id="691" sofa="431" begin="342" end="343" value="m"/><types:Lemma xmi:id="10110" sofa="431" begin="343" end="344" value=")"/><types:Lemma xmi:id="7446" sofa="431" begin="344" end="345" value="."/><types:Lemma xmi:id="11747" sofa="431" begin="349" end="354" value="FK409"/><types:Lemma xmi:id="8832" sofa="431" begin="355" end="356" value="("/><types:Lemma xmi:id="6347" sofa="431" begin="356" end="362" value="10(-11"/><types:Lemma xmi:id="4603" sofa="431" begin="362" end="363" value=")"/><types:Lemma xmi:id="6981" sofa="431" begin="363" end="364" value="-"/><types:Lemma xmi:id="12090" sofa="431" begin="364" end="370" value="10(-5)"/><types:Lemma xmi:id="11739" sofa="431" begin="371" end="372" value="m"/><types:Lemma xmi:id="6701" sofa="431" begin="372" end="373" value=")"/><types:Lemma xmi:id="6034" sofa="431" begin="374" end="377" value="and"/><types:Lemma xmi:id="3806" sofa="431" begin="378" end="391" value="nitroglycerin"/><types:Lemma xmi:id="6042" sofa="431" begin="392" end="393" value="("/><types:Lemma xmi:id="10447" sofa="431" begin="393" end="398" value="10(-9"/><types:Lemma xmi:id="5940" sofa="431" begin="398" end="399" value=")"/><types:Lemma xmi:id="10639" sofa="431" begin="399" end="400" value="-"/><types:Lemma xmi:id="9775" sofa="431" begin="400" end="406" value="10(-4)"/><types:Lemma xmi:id="9449" sofa="431" begin="407" end="408" value="m"/><types:Lemma xmi:id="11536" sofa="431" begin="408" end="409" value=")"/><types:Lemma xmi:id="9718" sofa="431" begin="410" end="414" value="each"/><types:Lemma xmi:id="10965" sofa="431" begin="415" end="423" value="produce"/><types:Lemma xmi:id="2584" sofa="431" begin="424" end="425" value="a"/><types:Lemma xmi:id="8165" sofa="431" begin="426" end="439" value="concentration"/><types:Lemma xmi:id="11556" sofa="431" begin="439" end="440" value="-"/><types:Lemma xmi:id="7269" sofa="431" begin="440" end="449" value="dependent"/><types:Lemma xmi:id="9751" sofa="431" begin="450" end="460" value="relaxation"/><types:Lemma xmi:id="5103" sofa="431" begin="460" end="461" value="."/><types:Lemma xmi:id="1310" sofa="431" begin="462" end="472" value="comparison"/><types:Lemma xmi:id="8776" sofa="431" begin="473" end="475" value="of"/><types:Lemma xmi:id="1028" sofa="431" begin="476" end="480" value="EC50"/><types:Lemma xmi:id="9850" sofa="431" begin="481" end="487" value="value"/><types:Lemma xmi:id="3295" sofa="431" begin="488" end="494" value="show"/><types:Lemma xmi:id="5990" sofa="431" begin="495" end="499" value="that"/><types:Lemma xmi:id="11212" sofa="431" begin="500" end="505" value="FK409"/><types:Lemma xmi:id="607" sofa="431" begin="506" end="509" value="be"/><types:Lemma xmi:id="6082" sofa="431" begin="510" end="515" value="about"/><types:Lemma xmi:id="1925" sofa="431" begin="516" end="518" value="25"/><types:Lemma xmi:id="7960" sofa="431" begin="519" end="524" value="time"/><types:Lemma xmi:id="4554" sofa="431" begin="525" end="529" value="much"/><types:Lemma xmi:id="9586" sofa="431" begin="530" end="536" value="potent"/><types:Lemma xmi:id="1791" sofa="431" begin="537" end="541" value="than"/><types:Lemma xmi:id="2417" sofa="431" begin="542" end="555" value="nitroglycerin"/><types:Lemma xmi:id="8536" sofa="431" begin="555" end="556" value="."/><types:Lemma xmi:id="9384" sofa="431" begin="560" end="570" value="submaximum"/><types:Lemma xmi:id="6665" sofa="431" begin="571" end="585" value="concentration"/><types:Lemma xmi:id="3438" sofa="431" begin="586" end="588" value="of"/><types:Lemma xmi:id="10556" sofa="431" begin="589" end="602" value="nitroglycerin"/><types:Lemma xmi:id="7196" sofa="431" begin="603" end="604" value="("/><types:Lemma xmi:id="10251" sofa="431" begin="604" end="610" value="10(-6)"/><types:Lemma xmi:id="7688" sofa="431" begin="611" end="612" value="m"/><types:Lemma xmi:id="919" sofa="431" begin="612" end="613" value=")"/><types:Lemma xmi:id="6612" sofa="431" begin="614" end="617" value="and"/><types:Lemma xmi:id="10831" sofa="431" begin="618" end="623" value="FK409"/><types:Lemma xmi:id="10989" sofa="431" begin="624" end="625" value="("/><types:Lemma xmi:id="7127" sofa="431" begin="625" end="626" value="3"/><types:Lemma xmi:id="9212" sofa="431" begin="627" end="628" value="x"/><types:Lemma xmi:id="10779" sofa="431" begin="629" end="635" value="10(-8)"/><types:Lemma xmi:id="1708" sofa="431" begin="636" end="637" value="m"/><types:Lemma xmi:id="4932" sofa="431" begin="637" end="638" value=")"/><types:Lemma xmi:id="8008" sofa="431" begin="639" end="647" value="elevate"/><types:Lemma xmi:id="11882" sofa="431" begin="648" end="657" value="guanosine"/><types:Lemma xmi:id="11260" sofa="431" begin="658" end="670" value="3':5'-cyclic"/><types:Lemma xmi:id="8856" sofa="431" begin="671" end="684" value="monophosphate"/><types:Lemma xmi:id="3225" sofa="431" begin="685" end="686" value="("/><types:Lemma xmi:id="1949" sofa="431" begin="686" end="692" value="cyclic"/><types:Lemma xmi:id="2642" sofa="431" begin="693" end="696" value="gmp"/><types:Lemma xmi:id="2681" sofa="431" begin="696" end="697" value=")"/><types:Lemma xmi:id="11305" sofa="431" begin="698" end="704" value="level"/><types:Lemma xmi:id="11014" sofa="431" begin="704" end="705" value=","/><types:Lemma xmi:id="11236" sofa="431" begin="706" end="713" value="effect"/><types:Lemma xmi:id="10839" sofa="431" begin="714" end="724" value="associate"/><types:Lemma xmi:id="4592" sofa="431" begin="725" end="729" value="with"/><types:Lemma xmi:id="11614" sofa="431" begin="730" end="744" value="vasorelaxation"/><types:Lemma xmi:id="2739" sofa="431" begin="744" end="745" value="."/><types:Lemma xmi:id="4846" sofa="431" begin="746" end="755" value="adenosine"/><types:Lemma xmi:id="12210" sofa="431" begin="756" end="768" value="3':5'-cyclic"/><types:Lemma xmi:id="8395" sofa="431" begin="769" end="782" value="monophosphate"/><types:Lemma xmi:id="10948" sofa="431" begin="783" end="784" value="("/><types:Lemma xmi:id="11066" sofa="431" begin="784" end="790" value="cyclic"/><types:Lemma xmi:id="10423" sofa="431" begin="791" end="794" value="amp"/><types:Lemma xmi:id="8591" sofa="431" begin="794" end="795" value=")"/><types:Lemma xmi:id="1700" sofa="431" begin="796" end="802" value="level"/><types:Lemma xmi:id="9330" sofa="431" begin="803" end="807" value="be"/><types:Lemma xmi:id="750" sofa="431" begin="808" end="818" value="unaffected"/><types:Lemma xmi:id="9759" sofa="431" begin="818" end="819" value="."/><types:Lemma xmi:id="5470" sofa="431" begin="823" end="826" value="the"/><types:Lemma xmi:id="11975" sofa="431" begin="827" end="840" value="concentration"/><types:Lemma xmi:id="12440" sofa="431" begin="840" end="841" value="-"/><types:Lemma xmi:id="7050" sofa="431" begin="841" end="851" value="relaxation"/><types:Lemma xmi:id="10997" sofa="431" begin="852" end="858" value="curve"/><types:Lemma xmi:id="9087" sofa="431" begin="859" end="862" value="for"/><types:Lemma xmi:id="5557" sofa="431" begin="863" end="876" value="nitroglycerin"/><types:Lemma xmi:id="1689" sofa="431" begin="877" end="880" value="and"/><types:Lemma xmi:id="5705" sofa="431" begin="881" end="886" value="FK409"/><types:Lemma xmi:id="2113" sofa="431" begin="887" end="891" value="be"/><types:Lemma xmi:id="9284" sofa="431" begin="892" end="899" value="shift"/><types:Lemma xmi:id="5111" sofa="431" begin="900" end="902" value="to"/><types:Lemma xmi:id="1036" sofa="431" begin="903" end="906" value="the"/><types:Lemma xmi:id="2038" sofa="431" begin="907" end="912" value="right"/><types:Lemma xmi:id="7185" sofa="431" begin="913" end="915" value="by"/><types:Lemma xmi:id="9802" sofa="431" begin="916" end="925" value="methylene"/><types:Lemma xmi:id="652" sofa="431" begin="926" end="930" value="blue"/><types:Lemma xmi:id="7438" sofa="431" begin="931" end="932" value="("/><types:Lemma xmi:id="6804" sofa="431" begin="932" end="933" value="3"/><types:Lemma xmi:id="6948" sofa="431" begin="934" end="935" value="x"/><types:Lemma xmi:id="7824" sofa="431" begin="936" end="942" value="10(-6)"/><types:Lemma xmi:id="1972" sofa="431" begin="943" end="944" value="-"/><types:Lemma xmi:id="2826" sofa="431" begin="945" end="948" value="3 x"/><types:Lemma xmi:id="1259" sofa="431" begin="949" end="955" value="10(-5)"/><types:Lemma xmi:id="11874" sofa="431" begin="956" end="957" value="m"/><types:Lemma xmi:id="11989" sofa="431" begin="957" end="958" value=")"/><types:Lemma xmi:id="2634" sofa="431" begin="958" end="959" value=","/><types:Lemma xmi:id="8958" sofa="431" begin="960" end="962" value="a"/><types:Lemma xmi:id="2548" sofa="431" begin="963" end="972" value="inhibitor"/><types:Lemma xmi:id="12173" sofa="431" begin="973" end="975" value="of"/><types:Lemma xmi:id="6007" sofa="431" begin="976" end="983" value="soluble"/><types:Lemma xmi:id="1430" sofa="431" begin="984" end="993" value="guanylate"/><types:Lemma xmi:id="4079" sofa="431" begin="994" end="1001" value="cyclase"/><types:Lemma xmi:id="1326" sofa="431" begin="1001" end="1002" value=","/><types:Lemma xmi:id="10022" sofa="431" begin="1003" end="1006" value="and"/><types:Lemma xmi:id="3345" sofa="431" begin="1007" end="1009" value="to"/><types:Lemma xmi:id="5478" sofa="431" begin="1010" end="1013" value="the"/><types:Lemma xmi:id="1873" sofa="431" begin="1014" end="1018" value="left"/><types:Lemma xmi:id="2030" sofa="431" begin="1019" end="1021" value="by"/><types:Lemma xmi:id="7642" sofa="431" begin="1022" end="1031" value="M&amp;B22,948"/><types:Lemma xmi:id="8368" sofa="431" begin="1032" end="1033" value="("/><types:Lemma xmi:id="3545" sofa="431" begin="1033" end="1034" value="3"/><types:Lemma xmi:id="6595" sofa="431" begin="1035" end="1036" value="x"/><types:Lemma xmi:id="7042" sofa="431" begin="1037" end="1043" value="10(-6)"/><types:Lemma xmi:id="5841" sofa="431" begin="1044" end="1045" value="-"/><types:Lemma xmi:id="3061" sofa="431" begin="1046" end="1049" value="3 x"/><types:Lemma xmi:id="5030" sofa="431" begin="1050" end="1056" value="10(-5)"/><types:Lemma xmi:id="5647" sofa="431" begin="1057" end="1058" value="m"/><types:Lemma xmi:id="4466" sofa="431" begin="1058" end="1059" value=")"/><types:Lemma xmi:id="2084" sofa="431" begin="1059" end="1060" value=","/><types:Lemma xmi:id="9858" sofa="431" begin="1061" end="1063" value="a"/><types:Lemma xmi:id="1251" sofa="431" begin="1064" end="1073" value="inhibitor"/><types:Lemma xmi:id="5163" sofa="431" begin="1074" end="1076" value="of"/><types:Lemma xmi:id="7541" sofa="431" begin="1077" end="1083" value="cyclic"/><types:Lemma xmi:id="12448" sofa="431" begin="1084" end="1087" value="gmp"/><types:Lemma xmi:id="3790" sofa="431" begin="1088" end="1105" value="phosphodiesterase"/><types:Lemma xmi:id="4243" sofa="431" begin="1105" end="1106" value="."/><types:Lemma xmi:id="10689" sofa="431" begin="1110" end="1115" value="after"/><types:Lemma xmi:id="12181" sofa="431" begin="1116" end="1124" value="exposure"/><types:Lemma xmi:id="4971" sofa="431" begin="1125" end="1127" value="of"/><types:Lemma xmi:id="7813" sofa="431" begin="1128" end="1136" value="coronary"/><types:Lemma xmi:id="5833" sofa="431" begin="1137" end="1145" value="artery"/><types:Lemma xmi:id="4584" sofa="431" begin="1146" end="1148" value="to"/><types:Lemma xmi:id="9907" sofa="431" begin="1149" end="1152" value="the"/><types:Lemma xmi:id="11425" sofa="431" begin="1153" end="1162" value="maximal"/><types:Lemma xmi:id="5549" sofa="431" begin="1162" end="1163" value="-"/><types:Lemma xmi:id="12356" sofa="431" begin="1163" end="1172" value="effective"/><types:Lemma xmi:id="5976" sofa="431" begin="1173" end="1186" value="concentration"/><types:Lemma xmi:id="6629" sofa="431" begin="1187" end="1189" value="of"/><types:Lemma xmi:id="9959" sofa="431" begin="1190" end="1203" value="nitroglycerin"/><types:Lemma xmi:id="7549" sofa="431" begin="1204" end="1205" value="("/><types:Lemma xmi:id="10771" sofa="431" begin="1205" end="1211" value="10(-4)"/><types:Lemma xmi:id="2068" sofa="431" begin="1212" end="1213" value="m"/><types:Lemma xmi:id="3487" sofa="431" begin="1213" end="1214" value=")"/><types:Lemma xmi:id="2513" sofa="431" begin="1214" end="1215" value=","/><types:Lemma xmi:id="1731" sofa="431" begin="1216" end="1219" value="the"/><types:Lemma xmi:id="1489" sofa="431" begin="1220" end="1224" value="mean"/><types:Lemma xmi:id="9767" sofa="431" begin="1225" end="1229" value="EC50"/><types:Lemma xmi:id="8328" sofa="431" begin="1230" end="1235" value="value"/><types:Lemma xmi:id="3200" sofa="431" begin="1236" end="1238" value="of"/><types:Lemma xmi:id="6874" sofa="431" begin="1239" end="1244" value="FK409"/><types:Lemma xmi:id="3798" sofa="431" begin="1245" end="1248" value="do"/><types:Lemma xmi:id="10747" sofa="431" begin="1249" end="1252" value="not"/><types:Lemma xmi:id="9951" sofa="431" begin="1253" end="1259" value="change"/><types:Lemma xmi:id="9507" sofa="431" begin="1260" end="1273" value="significant"/><types:Lemma xmi:id="8181" sofa="431" begin="1273" end="1274" value=","/><types:Lemma xmi:id="6493" sofa="431" begin="1275" end="1283" value="although"/><types:Lemma xmi:id="3638" sofa="431" begin="1284" end="1288" value="that"/><types:Lemma xmi:id="7900" sofa="431" begin="1289" end="1291" value="of"/><types:Lemma xmi:id="3455" sofa="431" begin="1292" end="1305" value="nitroglycerin"/><types:Lemma xmi:id="1354" sofa="431" begin="1306" end="1315" value="increase"/><types:Lemma xmi:id="4071" sofa="431" begin="1316" end="1321" value="about"/><types:Lemma xmi:id="4138" sofa="431" begin="1322" end="1324" value="60"/><types:Lemma xmi:id="2292" sofa="431" begin="1325" end="1329" value="fold"/><types:Lemma xmi:id="5375" sofa="431" begin="1329" end="1330" value="."/><types:Lemma xmi:id="8876" sofa="431" begin="1331" end="1336" value="after"/><types:Lemma xmi:id="2057" sofa="431" begin="1337" end="1345" value="exposure"/><types:Lemma xmi:id="8918" sofa="431" begin="1346" end="1348" value="to"/><types:Lemma xmi:id="4227" sofa="431" begin="1349" end="1352" value="the"/><types:Lemma xmi:id="9866" sofa="431" begin="1353" end="1362" value="maximal"/><types:Lemma xmi:id="8173" sofa="431" begin="1362" end="1363" value="-"/><types:Lemma xmi:id="4490" sofa="431" begin="1363" end="1372" value="effective"/><types:Lemma xmi:id="4664" sofa="431" begin="1373" end="1386" value="concentration"/><types:Lemma xmi:id="632" sofa="431" begin="1387" end="1389" value="of"/><types:Lemma xmi:id="10030" sofa="431" begin="1390" end="1395" value="FK409"/><types:Lemma xmi:id="966" sofa="431" begin="1396" end="1397" value="("/><types:Lemma xmi:id="3676" sofa="431" begin="1397" end="1403" value="10(-5)"/><types:Lemma xmi:id="5513" sofa="431" begin="1404" end="1405" value="m"/><types:Lemma xmi:id="10065" sofa="431" begin="1405" end="1406" value=")"/><types:Lemma xmi:id="4854" sofa="431" begin="1406" end="1407" value=","/><types:Lemma xmi:id="8457" sofa="431" begin="1408" end="1411" value="the"/><types:Lemma xmi:id="10787" sofa="431" begin="1412" end="1416" value="mean"/><types:Lemma xmi:id="9915" sofa="431" begin="1417" end="1421" value="EC50"/><types:Lemma xmi:id="1631" sofa="431" begin="1422" end="1427" value="value"/><types:Lemma xmi:id="3630" sofa="431" begin="1428" end="1430" value="of"/><types:Lemma xmi:id="10573" sofa="431" begin="1431" end="1436" value="FK409"/><types:Lemma xmi:id="1291" sofa="431" begin="1437" end="1446" value="increase"/><types:Lemma xmi:id="11712" sofa="431" begin="1447" end="1452" value="about"/><types:Lemma xmi:id="2211" sofa="431" begin="1453" end="1454" value="6"/><types:Lemma xmi:id="3132" sofa="431" begin="1455" end="1459" value="fold"/><types:Lemma xmi:id="1856" sofa="431" begin="1460" end="1463" value="and"/><types:Lemma xmi:id="8412" sofa="431" begin="1464" end="1468" value="that"/><types:Lemma xmi:id="5913" sofa="431" begin="1469" end="1471" value="of"/><types:Lemma xmi:id="6507" sofa="431" begin="1472" end="1485" value="nitroglycerin"/><types:Lemma xmi:id="1681" sofa="431" begin="1486" end="1491" value="about"/><types:Lemma xmi:id="4114" sofa="431" begin="1492" end="1494" value="11"/><types:Lemma xmi:id="8723" sofa="431" begin="1495" end="1499" value="fold"/><types:Lemma xmi:id="2943" sofa="431" begin="1499" end="1500" value="."/><types:Lemma xmi:id="1380" sofa="431" begin="1504" end="1509" value="these"/><types:Lemma xmi:id="8016" sofa="431" begin="1510" end="1517" value="result"/><types:Lemma xmi:id="1658" sofa="431" begin="1518" end="1525" value="suggest"/><types:Lemma xmi:id="3167" sofa="431" begin="1526" end="1530" value="that"/><types:Lemma xmi:id="8292" sofa="431" begin="1531" end="1534" value="the"/><types:Lemma xmi:id="1758" sofa="431" begin="1535" end="1547" value="vasorelaxant"/><types:Lemma xmi:id="5190" sofa="431" begin="1548" end="1554" value="effect"/><types:Lemma xmi:id="6709" sofa="431" begin="1555" end="1557" value="of"/><types:Lemma xmi:id="5565" sofa="431" begin="1558" end="1563" value="FK409"/><types:Lemma xmi:id="4333" sofa="431" begin="1563" end="1564" value=","/><types:Lemma xmi:id="4368" sofa="431" begin="1565" end="1569" value="like"/><types:Lemma xmi:id="9245" sofa="431" begin="1570" end="1574" value="that"/><types:Lemma xmi:id="5631" sofa="431" begin="1575" end="1577" value="of"/><types:Lemma xmi:id="9030" sofa="431" begin="1578" end="1591" value="nitroglycerin"/><types:Lemma xmi:id="7501" sofa="431" begin="1591" end="1592" value=","/><types:Lemma xmi:id="3479" sofa="431" begin="1593" end="1595" value="be"/><types:Lemma xmi:id="2046" sofa="431" begin="1596" end="1599" value="due"/><types:Lemma xmi:id="6540" sofa="431" begin="1600" end="1602" value="to"/><types:Lemma xmi:id="3140" sofa="431" begin="1603" end="1613" value="activation"/><types:Lemma xmi:id="2251" sofa="431" begin="1614" end="1616" value="of"/><types:Lemma xmi:id="12268" sofa="431" begin="1617" end="1624" value="soluble"/><types:Lemma xmi:id="930" sofa="431" begin="1625" end="1634" value="guanylate"/><types:Lemma xmi:id="3208" sofa="431" begin="1635" end="1642" value="cyclase"/><types:Lemma xmi:id="3306" sofa="431" begin="1643" end="1646" value="and"/><types:Lemma xmi:id="1243" sofa="431" begin="1647" end="1648" value="a"/><types:Lemma xmi:id="5417" sofa="431" begin="1649" end="1658" value="resultant"/><types:Lemma xmi:id="11812" sofa="431" begin="1659" end="1667" value="increase"/><types:Lemma xmi:id="10663" sofa="431" begin="1668" end="1670" value="in"/><types:Lemma xmi:id="7229" sofa="431" begin="1671" end="1684" value="intracellular"/><types:Lemma xmi:id="1318" sofa="431" begin="1685" end="1691" value="cyclic"/><types:Lemma xmi:id="11192" sofa="431" begin="1692" end="1695" value="gmp"/><types:Lemma xmi:id="2673" sofa="431" begin="1695" end="1696" value="."/><types:Lemma xmi:id="1388" sofa="431" begin="1697" end="1704" value="however"/><types:Lemma xmi:id="6275" sofa="431" begin="1704" end="1705" value=","/><types:Lemma xmi:id="9515" sofa="431" begin="1706" end="1714" value="compare"/><types:Lemma xmi:id="11022" sofa="431" begin="1715" end="1719" value="with"/><types:Lemma xmi:id="5502" sofa="431" begin="1720" end="1733" value="nitroglycerin"/><types:Lemma xmi:id="7789" sofa="431" begin="1733" end="1734" value=","/><types:Lemma xmi:id="624" sofa="431" begin="1735" end="1740" value="there"/><types:Lemma xmi:id="2850" sofa="431" begin="1741" end="1744" value="be"/><types:Lemma xmi:id="6924" sofa="431" begin="1745" end="1749" value="little"/><types:Lemma xmi:id="2127" sofa="431" begin="1750" end="1754" value="self"/><types:Lemma xmi:id="9110" sofa="431" begin="1754" end="1755" value="-"/><types:Lemma xmi:id="7927" sofa="431" begin="1755" end="1764" value="tolerance"/><types:Lemma xmi:id="958" sofa="431" begin="1765" end="1767" value="to"/><types:Lemma xmi:id="8610" sofa="431" begin="1768" end="1771" value="the"/><types:Lemma xmi:id="5235" sofa="431" begin="1772" end="1780" value="relaxant"/><types:Lemma xmi:id="4905" sofa="431" begin="1781" end="1788" value="effect"/><types:Lemma xmi:id="1299" sofa="431" begin="1789" end="1791" value="of"/><types:Lemma xmi:id="3646" sofa="431" begin="1792" end="1797" value="FK409"/><types:Lemma xmi:id="11468" sofa="431" begin="1798" end="1801" value="and"/><types:Lemma xmi:id="4171" sofa="431" begin="1802" end="1812" value="relative"/><types:Lemma xmi:id="7364" sofa="431" begin="1813" end="1819" value="little"/><types:Lemma xmi:id="5639" sofa="431" begin="1820" end="1825" value="cross"/><types:Lemma xmi:id="6717" sofa="431" begin="1825" end="1826" value="-"/><types:Lemma xmi:id="7427" sofa="431" begin="1826" end="1835" value="tolerance"/><types:Lemma xmi:id="699" sofa="431" begin="1836" end="1843" value="between"/><types:Lemma xmi:id="10234" sofa="431" begin="1844" end="1847" value="the"/><types:Lemma xmi:id="8982" sofa="431" begin="1848" end="1851" value="two"/><types:Lemma xmi:id="9038" sofa="431" begin="1852" end="1858" value="agent"/><types:Lemma xmi:id="11504" sofa="431" begin="1858" end="1859" value="."/><types:Token xmi:id="10292" sofa="431" begin="0" end="10" componentId="JULIE Token Boundary Detector" id="1" lemma="6476" posTag="3847"/><types:Token xmi:id="3911" sofa="431" begin="11" end="13" componentId="JULIE Token Boundary Detector" id="2" lemma="3749" posTag="4956"/><types:Token xmi:id="5801" sofa="431" begin="14" end="17" componentId="JULIE Token Boundary Detector" id="3" lemma="11958" posTag="12189"/><types:Token xmi:id="4146" sofa="431" begin="18" end="25" componentId="JULIE Token Boundary Detector" id="4" lemma="10615" posTag="11986"/><types:Token xmi:id="5359" sofa="431" begin="26" end="28" componentId="JULIE Token Boundary Detector" id="5" lemma="3069" posTag="8864"/><types:Token xmi:id="8189" sofa="431" begin="29" end="32" componentId="JULIE Token Boundary Detector" id="6" lemma="9532" posTag="3333"/><types:Token xmi:id="9735" sofa="431" begin="33" end="38" componentId="JULIE Token Boundary Detector" id="7" lemma="11704" posTag="1669"/><types:Token xmi:id="3529" sofa="431" begin="39" end="50" componentId="JULIE Token Boundary Detector" id="8" lemma="6138" posTag="1966"/><types:Token xmi:id="7509" sofa="431" begin="51" end="56" componentId="JULIE Token Boundary Detector" id="9" lemma="938" posTag="7358"/><types:Token xmi:id="6576" sofa="431" begin="57" end="61" componentId="JULIE Token Boundary Detector" id="10" lemma="9357" posTag="6592"/><types:Token xmi:id="8208" sofa="431" begin="62" end="67" componentId="JULIE Token Boundary Detector" id="11" lemma="11786" posTag="3517"/><types:Token xmi:id="7755" sofa="431" begin="68" end="70" componentId="JULIE Token Boundary Detector" id="12" lemma="2076" posTag="11631"/><types:Token xmi:id="2444" sofa="431" begin="71" end="84" componentId="JULIE Token Boundary Detector" id="13" lemma="1346" posTag="9822"/><types:Token xmi:id="2755" sofa="431" begin="85" end="87" componentId="JULIE Token Boundary Detector" id="14" lemma="6171" posTag="3024"/><types:Token xmi:id="3148" sofa="431" begin="88" end="96" componentId="JULIE Token Boundary Detector" id="15" lemma="6916" posTag="6195"/><types:Token xmi:id="7935" sofa="431" begin="97" end="105" componentId="JULIE Token Boundary Detector" id="16" lemma="7590" posTag="998"/><types:Token xmi:id="3861" sofa="431" begin="106" end="112" componentId="JULIE Token Boundary Detector" id="17" lemma="10847" posTag="8793"/><types:Token xmi:id="7525" sofa="431" begin="113" end="115" componentId="JULIE Token Boundary Detector" id="18" lemma="10170" posTag="6229"/><types:Token xmi:id="2259" sofa="431" begin="116" end="119" componentId="JULIE Token Boundary Detector" id="19" lemma="8768" posTag="1531"/><types:Token xmi:id="1569" sofa="431" begin="120" end="123" componentId="JULIE Token Boundary Detector" id="20" lemma="4235" posTag="786"/><types:Token xmi:id="4301" sofa="431" begin="123" end="124" componentId="JULIE Token Boundary Detector" id="21" lemma="6286" posTag="7473"/><types:Token xmi:id="10431" sofa="431" begin="128" end="131" componentId="JULIE Token Boundary Detector" id="22" lemma="7372" posTag="1655"/><types:Token xmi:id="12074" sofa="431" begin="132" end="144" componentId="JULIE Token Boundary Detector" id="23" lemma="11341" posTag="8109"/><types:Token xmi:id="12415" sofa="431" begin="145" end="152" componentId="JULIE Token Boundary Detector" id="24" lemma="2284" posTag="7832"/><types:Token xmi:id="9474" sofa="431" begin="153" end="155" componentId="JULIE Token Boundary Detector" id="25" lemma="3581" posTag="3113"/><types:Token xmi:id="9658" sofa="431" begin="156" end="161" componentId="JULIE Token Boundary Detector" id="26" lemma="8024" posTag="6364"/><types:Token xmi:id="10360" sofa="431" begin="161" end="162" componentId="JULIE Token Boundary Detector" id="27" lemma="11049" posTag="7325"/><types:Token xmi:id="9983" sofa="431" begin="163" end="164" componentId="JULIE Token Boundary Detector" id="28" lemma="8050" posTag="6548"/><types:Token xmi:id="4211" sofa="431" begin="165" end="168" componentId="JULIE Token Boundary Detector" id="29" lemma="9490" posTag="9206"/><types:Token xmi:id="4179" sofa="431" begin="169" end="185" componentId="JULIE Token Boundary Detector" id="30" lemma="7662" posTag="11090"/><types:Token xmi:id="4806" sofa="431" begin="186" end="197" componentId="JULIE Token Boundary Detector" id="31" lemma="3823" posTag="1177"/><types:Token xmi:id="5957" sofa="431" begin="198" end="202" componentId="JULIE Token Boundary Detector" id="32" lemma="11899" posTag="4562"/><types:Token xmi:id="3008" sofa="431" begin="203" end="204" componentId="JULIE Token Boundary Detector" id="33" lemma="2747" posTag="7984"/><types:Token xmi:id="663" sofa="431" begin="205" end="214" componentId="JULIE Token Boundary Detector" id="34" lemma="2167" posTag="10807"/><types:Token xmi:id="2521" sofa="431" begin="215" end="222" componentId="JULIE Token Boundary Detector" id="35" lemma="4862" posTag="10062"/><types:Token xmi:id="10259" sofa="431" begin="222" end="223" componentId="JULIE Token Boundary Detector" id="36" lemma="2175" posTag="10883"/><types:Token xmi:id="3831" sofa="431" begin="224" end="228" componentId="JULIE Token Boundary Detector" id="37" lemma="9457" posTag="2610"/><types:Token xmi:id="4940" sofa="431" begin="229" end="237" componentId="JULIE Token Boundary Detector" id="38" lemma="5326" posTag="2411"/><types:Token xmi:id="2960" sofa="431" begin="238" end="242" componentId="JULIE Token Boundary Detector" id="39" lemma="8465" posTag="9168"/><types:Token xmi:id="9967" sofa="431" begin="243" end="248" componentId="JULIE Token Boundary Detector" id="40" lemma="8599" posTag="2065"/><types:Token xmi:id="2151" sofa="431" begin="249" end="251" componentId="JULIE Token Boundary Detector" id="41" lemma="11528" posTag="707"/><types:Token xmi:id="5171" sofa="431" begin="252" end="265" componentId="JULIE Token Boundary Detector" id="42" lemma="2662" posTag="3654"/><types:Token xmi:id="10073" sofa="431" begin="266" end="268" componentId="JULIE Token Boundary Detector" id="43" lemma="11671" posTag="10056"/><types:Token xmi:id="8077" sofa="431" begin="269" end="277" componentId="JULIE Token Boundary Detector" id="44" lemma="4838" posTag="11937"/><types:Token xmi:id="5119" sofa="431" begin="278" end="286" componentId="JULIE Token Boundary Detector" id="45" lemma="5022" posTag="1969"/><types:Token xmi:id="8441" sofa="431" begin="287" end="293" componentId="JULIE Token Boundary Detector" id="46" lemma="10284" posTag="4734"/><types:Token xmi:id="8093" sofa="431" begin="294" end="299" componentId="JULIE Token Boundary Detector" id="47" lemma="5275" posTag="4672"/><types:Token xmi:id="5722" sofa="431" begin="300" end="302" componentId="JULIE Token Boundary Detector" id="48" lemma="4397" posTag="6198"/><types:Token xmi:id="8058" sofa="431" begin="303" end="306" componentId="JULIE Token Boundary Detector" id="49" lemma="7582" posTag="4454"/><types:Token xmi:id="2309" sofa="431" begin="307" end="310" componentId="JULIE Token Boundary Detector" id="50" lemma="3850" posTag="4420"/><types:Token xmi:id="1212" sofa="431" begin="311" end="321" componentId="JULIE Token Boundary Detector" id="51" lemma="2540" posTag="8205"/><types:Token xmi:id="9229" sofa="431" begin="322" end="326" componentId="JULIE Token Boundary Detector" id="52" lemma="10008" posTag="6328"/><types:Token xmi:id="1158" sofa="431" begin="327" end="333" componentId="JULIE Token Boundary Detector" id="53" lemma="6725" posTag="3042"/><types:Token xmi:id="3254" sofa="431" begin="334" end="335" componentId="JULIE Token Boundary Detector" id="54" lemma="8812" posTag="833"/><types:Token xmi:id="5664" sofa="431" begin="335" end="341" componentId="JULIE Token Boundary Detector" id="55" lemma="10495" posTag="8514"/><types:Token xmi:id="1909" sofa="431" begin="342" end="343" componentId="JULIE Token Boundary Detector" id="56" lemma="691" posTag="7740"/><types:Token xmi:id="2783" sofa="431" begin="343" end="344" componentId="JULIE Token Boundary Detector" id="57" lemma="10110" posTag="10819"/><types:Token xmi:id="10376" sofa="431" begin="344" end="345" componentId="JULIE Token Boundary Detector" id="58" lemma="7446" posTag="5084"/><types:Token xmi:id="2219" sofa="431" begin="349" end="354" componentId="JULIE Token Boundary Detector" id="59" lemma="11747" posTag="11030"/><types:Token xmi:id="3495" sofa="431" begin="355" end="356" componentId="JULIE Token Boundary Detector" id="60" lemma="8832" posTag="11774"/><types:Token xmi:id="6420" sofa="431" begin="356" end="362" componentId="JULIE Token Boundary Detector" id="61" lemma="6347" posTag="12456"/><types:Token xmi:id="1414" sofa="431" begin="362" end="363" componentId="JULIE Token Boundary Detector" id="62" lemma="4603" posTag="11952"/><types:Token xmi:id="10623" sofa="431" begin="363" end="364" componentId="JULIE Token Boundary Detector" id="63" lemma="6981" posTag="12475"/><types:Token xmi:id="7095" sofa="431" begin="364" end="370" componentId="JULIE Token Boundary Detector" id="64" lemma="12090" posTag="8322"/><types:Token xmi:id="8352" sofa="431" begin="371" end="372" componentId="JULIE Token Boundary Detector" id="65" lemma="11739" posTag="7033"/><types:Token xmi:id="839" sofa="431" begin="372" end="373" componentId="JULIE Token Boundary Detector" id="66" lemma="6701" posTag="10098"/><types:Token xmi:id="2568" sofa="431" begin="374" end="377" componentId="JULIE Token Boundary Detector" id="67" lemma="6034" posTag="4600"/><types:Token xmi:id="9055" sofa="431" begin="378" end="391" componentId="JULIE Token Boundary Detector" id="68" lemma="3806" posTag="1396"/><types:Token xmi:id="4737" sofa="431" begin="392" end="393" componentId="JULIE Token Boundary Detector" id="69" lemma="6042" posTag="1240"/><types:Token xmi:id="10599" sofa="431" begin="393" end="398" componentId="JULIE Token Boundary Detector" id="70" lemma="10447" posTag="6637"/><types:Token xmi:id="8258" sofa="431" begin="398" end="399" componentId="JULIE Token Boundary Detector" id="71" lemma="5940" posTag="8420"/><types:Token xmi:id="1594" sofa="431" begin="399" end="400" componentId="JULIE Token Boundary Detector" id="72" lemma="10639" posTag="2613"/><types:Token xmi:id="11139" sofa="431" begin="400" end="406" componentId="JULIE Token Boundary Detector" id="73" lemma="9775" posTag="3983"/><types:Token xmi:id="9699" sofa="431" begin="407" end="408" componentId="JULIE Token Boundary Detector" id="74" lemma="9449" posTag="4498"/><types:Token xmi:id="6066" sofa="431" begin="408" end="409" componentId="JULIE Token Boundary Detector" id="75" lemma="11536" posTag="10395"/><types:Token xmi:id="1553" sofa="431" begin="410" end="414" componentId="JULIE Token Boundary Detector" id="76" lemma="9718" posTag="12347"/><types:Token xmi:id="3715" sofa="431" begin="415" end="423" componentId="JULIE Token Boundary Detector" id="77" lemma="10965" posTag="4501"/><types:Token xmi:id="5680" sofa="431" begin="424" end="425" componentId="JULIE Token Boundary Detector" id="78" lemma="2584" posTag="2414"/><types:Token xmi:id="4822" sofa="431" begin="426" end="439" componentId="JULIE Token Boundary Detector" id="79" lemma="8165" posTag="4273"/><types:Token xmi:id="7237" sofa="431" begin="439" end="440" componentId="JULIE Token Boundary Detector" id="80" lemma="11556" posTag="5937"/><types:Token xmi:id="4438" sofa="431" begin="440" end="449" componentId="JULIE Token Boundary Detector" id="81" lemma="7269" posTag="7879"/><types:Token xmi:id="7557" sofa="431" begin="450" end="460" componentId="JULIE Token Boundary Detector" id="82" lemma="9751" posTag="8252"/><types:Token xmi:id="2334" sofa="431" begin="460" end="461" componentId="JULIE Token Boundary Detector" id="83" lemma="5103" posTag="11284"/><types:Token xmi:id="6235" sofa="431" begin="462" end="472" componentId="JULIE Token Boundary Detector" id="84" lemma="1310" posTag="660"/><types:Token xmi:id="4317" sofa="431" begin="473" end="475" componentId="JULIE Token Boundary Detector" id="85" lemma="8776" posTag="12276"/><types:Token xmi:id="2858" sofa="431" begin="476" end="480" componentId="JULIE Token Boundary Detector" id="86" lemma="1028" posTag="5399"/><types:Token xmi:id="1196" sofa="431" begin="481" end="487" componentId="JULIE Token Boundary Detector" id="87" lemma="9850" posTag="10535"/><types:Token xmi:id="4718" sofa="431" begin="488" end="494" componentId="JULIE Token Boundary Detector" id="88" lemma="3295" posTag="10059"/><types:Token xmi:id="9923" sofa="431" begin="495" end="499" componentId="JULIE Token Boundary Detector" id="89" lemma="5990" posTag="1470"/><types:Token xmi:id="1473" sofa="431" begin="500" end="505" componentId="JULIE Token Boundary Detector" id="90" lemma="11212" posTag="2183"/><types:Token xmi:id="7863" sofa="431" begin="506" end="509" componentId="JULIE Token Boundary Detector" id="91" lemma="607" posTag="5446"/><types:Token xmi:id="5135" sofa="431" begin="510" end="515" componentId="JULIE Token Boundary Detector" id="92" lemma="6082" posTag="6989"/><types:Token xmi:id="9071" sofa="431" begin="516" end="518" componentId="JULIE Token Boundary Detector" id="93" lemma="1925" posTag="640"/><types:Token xmi:id="3660" sofa="431" begin="519" end="524" componentId="JULIE Token Boundary Detector" id="94" lemma="7960" posTag="6698"/><types:Token xmi:id="4702" sofa="431" begin="525" end="529" componentId="JULIE Token Boundary Detector" id="95" lemma="4554" posTag="6251"/><types:Token xmi:id="3422" sofa="431" begin="530" end="536" componentId="JULIE Token Boundary Detector" id="96" lemma="9586" posTag="8392"/><types:Token xmi:id="11564" sofa="431" begin="537" end="541" componentId="JULIE Token Boundary Detector" id="97" lemma="1791" posTag="12353"/><types:Token xmi:id="3175" sofa="431" begin="542" end="555" componentId="JULIE Token Boundary Detector" id="98" lemma="2417" posTag="1063"/><types:Token xmi:id="9368" sofa="431" begin="555" end="556" componentId="JULIE Token Boundary Detector" id="99" lemma="8536" posTag="7036"/><types:Token xmi:id="6312" sofa="431" begin="560" end="570" componentId="JULIE Token Boundary Detector" id="100" lemma="9384" posTag="4254"/><types:Token xmi:id="4614" sofa="431" begin="571" end="585" componentId="JULIE Token Boundary Detector" id="101" lemma="6665" posTag="2650"/><types:Token xmi:id="3116" sofa="431" begin="586" end="588" componentId="JULIE Token Boundary Detector" id="102" lemma="3438" posTag="10596"/><types:Token xmi:id="6682" sofa="431" begin="589" end="602" componentId="JULIE Token Boundary Detector" id="103" lemma="10556" posTag="9354"/><types:Token xmi:id="6090" sofa="431" begin="603" end="604" componentId="JULIE Token Boundary Detector" id="104" lemma="7196" posTag="5449"/><types:Token xmi:id="4787" sofa="431" begin="604" end="610" componentId="JULIE Token Boundary Detector" id="105" lemma="10251" posTag="8560"/><types:Token xmi:id="9401" sofa="431" begin="611" end="612" componentId="JULIE Token Boundary Detector" id="106" lemma="7688" posTag="8130"/><types:Token xmi:id="1044" sofa="431" begin="612" end="613" componentId="JULIE Token Boundary Detector" id="107" lemma="919" posTag="4753"/><types:Token xmi:id="6106" sofa="431" begin="614" end="617" componentId="JULIE Token Boundary Detector" id="108" lemma="6612" posTag="7424"/><types:Token xmi:id="8796" sofa="431" begin="618" end="623" componentId="JULIE Token Boundary Detector" id="109" lemma="10831" posTag="2469"/><types:Token xmi:id="9338" sofa="431" begin="624" end="625" componentId="JULIE Token Boundary Detector" id="110" lemma="10989" posTag="9015"/><types:Token xmi:id="7150" sofa="431" begin="625" end="626" componentId="JULIE Token Boundary Detector" id="111" lemma="7127" posTag="7193"/><types:Token xmi:id="10407" sofa="431" begin="627" end="628" componentId="JULIE Token Boundary Detector" id="112" lemma="9212" posTag="8533"/><types:Token xmi:id="8233" sofa="431" begin="629" end="635" componentId="JULIE Token Boundary Detector" id="113" lemma="10779" posTag="12412"/><types:Token xmi:id="7968" sofa="431" begin="636" end="637" componentId="JULIE Token Boundary Detector" id="114" lemma="1708" posTag="9209"/><types:Token xmi:id="12300" sofa="431" begin="637" end="638" componentId="JULIE Token Boundary Detector" id="115" lemma="4932" posTag="8289"/><types:Token xmi:id="4513" sofa="431" begin="639" end="647" componentId="JULIE Token Boundary Detector" id="116" lemma="8008" posTag="6210"/><types:Token xmi:id="7253" sofa="431" begin="648" end="657" componentId="JULIE Token Boundary Detector" id="117" lemma="11882" posTag="10324"/><types:Token xmi:id="2834" sofa="431" begin="658" end="670" componentId="JULIE Token Boundary Detector" id="118" lemma="11260" posTag="7039"/><types:Token xmi:id="7476" sofa="431" begin="671" end="684" componentId="JULIE Token Boundary Detector" id="119" lemma="8856" posTag="1853"/><types:Token xmi:id="9292" sofa="431" begin="685" end="686" componentId="JULIE Token Boundary Detector" id="120" lemma="3225" posTag="3233"/><types:Token xmi:id="4039" sofa="431" begin="686" end="692" componentId="JULIE Token Boundary Detector" id="121" lemma="1949" posTag="3657"/><types:Token xmi:id="2195" sofa="431" begin="693" end="696" componentId="JULIE Token Boundary Detector" id="122" lemma="2642" posTag="10489"/><types:Token xmi:id="4873" sofa="431" begin="696" end="697" componentId="JULIE Token Boundary Detector" id="123" lemma="2681" posTag="1739"/><types:Token xmi:id="1180" sofa="431" begin="698" end="704" componentId="JULIE Token Boundary Detector" id="124" lemma="11305" posTag="11868"/><types:Token xmi:id="1933" sofa="431" begin="704" end="705" componentId="JULIE Token Boundary Detector" id="125" lemma="11014" posTag="12136"/><types:Token xmi:id="5283" sofa="431" begin="706" end="713" componentId="JULIE Token Boundary Detector" id="126" lemma="11236" posTag="11476"/><types:Token xmi:id="8376" sofa="431" begin="714" end="724" componentId="JULIE Token Boundary Detector" id="127" lemma="10839" posTag="6504"/><types:Token xmi:id="3553" sofa="431" begin="725" end="729" componentId="JULIE Token Boundary Detector" id="128" lemma="4592" posTag="710"/><types:Token xmi:id="6370" sofa="431" begin="730" end="744" componentId="JULIE Token Boundary Detector" id="129" lemma="11614" posTag="5984"/><types:Token xmi:id="4004" sofa="431" begin="744" end="745" componentId="JULIE Token Boundary Detector" id="130" lemma="2739" posTag="7743"/><types:Token xmi:id="11433" sofa="431" begin="746" end="755" componentId="JULIE Token Boundary Detector" id="131" lemma="4846" posTag="946"/><types:Token xmi:id="2927" sofa="431" begin="756" end="768" componentId="JULIE Token Boundary Detector" id="132" lemma="12210" posTag="2124"/><types:Token xmi:id="4529" sofa="431" begin="769" end="782" componentId="JULIE Token Boundary Detector" id="133" lemma="8395" posTag="4251"/><types:Token xmi:id="7204" sofa="431" begin="783" end="784" componentId="JULIE Token Boundary Detector" id="134" lemma="10948" posTag="3511"/><types:Token xmi:id="7169" sofa="431" begin="784" end="790" componentId="JULIE Token Boundary Detector" id="135" lemma="11066" posTag="2441"/><types:Token xmi:id="8691" sofa="431" begin="791" end="794" componentId="JULIE Token Boundary Detector" id="136" lemma="10423" posTag="4784"/><types:Token xmi:id="3886" sofa="431" begin="794" end="795" componentId="JULIE Token Boundary Detector" id="137" lemma="8591" posTag="1818"/><types:Token xmi:id="2135" sofa="431" begin="796" end="802" componentId="JULIE Token Boundary Detector" id="138" lemma="1700" posTag="983"/><types:Token xmi:id="2976" sofa="431" begin="803" end="807" componentId="JULIE Token Boundary Detector" id="139" lemma="9330" posTag="4929"/><types:Token xmi:id="6018" sofa="431" begin="808" end="818" componentId="JULIE Token Boundary Detector" id="140" lemma="750" posTag="9308"/><types:Token xmi:id="10973" sofa="431" begin="818" end="819" componentId="JULIE Token Boundary Detector" id="141" lemma="9759" posTag="6457"/><types:Token xmi:id="11220" sofa="431" begin="823" end="826" componentId="JULIE Token Boundary Detector" id="142" lemma="5470" posTag="6232"/><types:Token xmi:id="10858" sofa="431" begin="827" end="840" componentId="JULIE Token Boundary Detector" id="143" lemma="11975" posTag="3314"/><types:Token xmi:id="3279" sofa="431" begin="840" end="841" componentId="JULIE Token Boundary Detector" id="144" lemma="12440" posTag="4870"/><types:Token xmi:id="1078" sofa="431" begin="841" end="851" componentId="JULIE Token Boundary Detector" id="145" lemma="7050" posTag="8572"/><types:Token xmi:id="4474" sofa="431" begin="852" end="858" componentId="JULIE Token Boundary Detector" id="146" lemma="10997" posTag="7696"/><types:Token xmi:id="7405" sofa="431" begin="859" end="862" componentId="JULIE Token Boundary Detector" id="147" lemma="9087" posTag="7147"/><types:Token xmi:id="11655" sofa="431" begin="863" end="876" componentId="JULIE Token Boundary Detector" id="148" lemma="5557" posTag="9262"/><types:Token xmi:id="10697" sofa="431" begin="877" end="880" componentId="JULIE Token Boundary Detector" id="149" lemma="1689" posTag="747"/><types:Token xmi:id="12459" sofa="431" begin="881" end="886" componentId="JULIE Token Boundary Detector" id="150" lemma="5705" posTag="3303"/><types:Token xmi:id="4765" sofa="431" begin="887" end="891" componentId="JULIE Token Boundary Detector" id="151" lemma="2113" posTag="1276"/><types:Token xmi:id="7835" sofa="431" begin="892" end="899" componentId="JULIE Token Boundary Detector" id="152" lemma="9284" posTag="1399"/><types:Token xmi:id="1980" sofa="431" begin="900" end="902" componentId="JULIE Token Boundary Detector" id="153" lemma="5111" posTag="2353"/><types:Token xmi:id="12151" sofa="431" begin="903" end="906" componentId="JULIE Token Boundary Detector" id="154" lemma="1036" posTag="2670"/><types:Token xmi:id="7277" sofa="431" begin="907" end="912" componentId="JULIE Token Boundary Detector" id="155" lemma="2038" posTag="5546"/><types:Token xmi:id="8675" sofa="431" begin="913" end="915" componentId="JULIE Token Boundary Detector" id="156" lemma="7185" posTag="12148"/><types:Token xmi:id="1884" sofa="431" begin="916" end="925" componentId="JULIE Token Boundary Detector" id="157" lemma="9802" posTag="4611"/><types:Token xmi:id="8926" sofa="431" begin="926" end="930" componentId="JULIE Token Boundary Detector" id="158" lemma="652" posTag="5973"/><types:Token xmi:id="2425" sofa="431" begin="931" end="932" componentId="JULIE Token Boundary Detector" id="159" lemma="7438" posTag="11544"/><types:Token xmi:id="11033" sofa="431" begin="932" end="933" componentId="JULIE Token Boundary Detector" id="160" lemma="6804" posTag="9107"/><types:Token xmi:id="10519" sofa="431" begin="934" end="935" componentId="JULIE Token Boundary Detector" id="161" lemma="6948" posTag="11596"/><types:Token xmi:id="4568" sofa="431" begin="936" end="942" componentId="JULIE Token Boundary Detector" id="162" lemma="7824" posTag="11287"/><types:Token xmi:id="3687" sofa="431" begin="943" end="944" componentId="JULIE Token Boundary Detector" id="163" lemma="1972" posTag="9819"/><types:Token xmi:id="9683" sofa="431" begin="945" end="948" componentId="JULIE Token Boundary Detector" id="164" lemma="2826" posTag="6445"/><types:Token xmi:id="12364" sofa="431" begin="949" end="955" componentId="JULIE Token Boundary Detector" id="165" lemma="1259" posTag="9311"/><types:Token xmi:id="9190" sofa="431" begin="956" end="957" componentId="JULIE Token Boundary Detector" id="166" lemma="11874" posTag="12350"/><types:Token xmi:id="9825" sofa="431" begin="957" end="958" componentId="JULIE Token Boundary Detector" id="167" lemma="11989" posTag="11465"/><types:Token xmi:id="11836" sofa="431" begin="958" end="959" componentId="JULIE Token Boundary Detector" id="168" lemma="2634" posTag="1343"/><types:Token xmi:id="2714" sofa="431" begin="960" end="962" componentId="JULIE Token Boundary Detector" id="169" lemma="8958" posTag="9939"/><types:Token xmi:id="7699" sofa="431" begin="963" end="972" componentId="JULIE Token Boundary Detector" id="170" lemma="2548" posTag="11871"/><types:Token xmi:id="9268" sofa="431" begin="973" end="975" componentId="JULIE Token Boundary Detector" id="171" lemma="12173" posTag="11940"/><types:Token xmi:id="11123" sofa="431" begin="976" end="983" componentId="JULIE Token Boundary Detector" id="172" lemma="6007" posTag="12167"/><types:Token xmi:id="5259" sofa="431" begin="984" end="993" componentId="JULIE Token Boundary Detector" id="173" lemma="1430" posTag="9655"/><types:Token xmi:id="2698" sofa="431" begin="994" end="1001" componentId="JULIE Token Boundary Detector" id="174" lemma="4079" posTag="1799"/><types:Token xmi:id="1537" sofa="431" begin="1001" end="1002" componentId="JULIE Token Boundary Detector" id="175" lemma="1326" posTag="8607"/><types:Token xmi:id="2235" sofa="431" begin="1003" end="1006" componentId="JULIE Token Boundary Detector" id="176" lemma="10022" posTag="4405"/><types:Token xmi:id="713" sofa="431" begin="1007" end="1009" componentId="JULIE Token Boundary Detector" id="177" lemma="3345" posTag="11983"/><types:Token xmi:id="808" sofa="431" begin="1010" end="1013" componentId="JULIE Token Boundary Detector" id="178" lemma="5478" posTag="1666"/><types:Token xmi:id="7008" sofa="431" begin="1014" end="1018" componentId="JULIE Token Boundary Detector" id="179" lemma="1873" posTag="7435"/><types:Token xmi:id="6640" sofa="431" begin="1019" end="1021" componentId="JULIE Token Boundary Detector" id="180" lemma="2030" posTag="9636"/><types:Token xmi:id="5068" sofa="431" begin="1022" end="1031" componentId="JULIE Token Boundary Detector" id="181" lemma="7642" posTag="11102"/><types:Token xmi:id="11852" sofa="431" begin="1032" end="1033" componentId="JULIE Token Boundary Detector" id="182" lemma="8368" posTag="12170"/><types:Token xmi:id="6213" sofa="431" begin="1033" end="1034" componentId="JULIE Token Boundary Detector" id="183" lemma="3545" posTag="8319"/><types:Token xmi:id="8840" sofa="431" begin="1035" end="1036" componentId="JULIE Token Boundary Detector" id="184" lemma="6595" posTag="9892"/><types:Token xmi:id="12380" sofa="431" begin="1037" end="1043" componentId="JULIE Token Boundary Detector" id="185" lemma="7042" posTag="7337"/><types:Token xmi:id="8473" sofa="431" begin="1044" end="1045" componentId="JULIE Token Boundary Detector" id="186" lemma="5841" posTag="9624"/><types:Token xmi:id="11155" sofa="431" begin="1046" end="1049" componentId="JULIE Token Boundary Detector" id="187" lemma="3061" posTag="11313"/><types:Token xmi:id="5383" sofa="431" begin="1050" end="1056" componentId="JULIE Token Boundary Detector" id="188" lemma="5030" posTag="4781"/><types:Token xmi:id="11244" sofa="431" begin="1057" end="1058" componentId="JULIE Token Boundary Detector" id="189" lemma="5647" posTag="5047"/><types:Token xmi:id="8707" sofa="431" begin="1058" end="1059" componentId="JULIE Token Boundary Detector" id="190" lemma="4466" posTag="9715"/><types:Token xmi:id="7389" sofa="431" begin="1059" end="1060" componentId="JULIE Token Boundary Detector" id="191" lemma="2084" posTag="11379"/><types:Token xmi:id="7614" sofa="431" begin="1061" end="1063" componentId="JULIE Token Boundary Detector" id="192" lemma="9858" posTag="836"/><types:Token xmi:id="1094" sofa="431" begin="1064" end="1073" componentId="JULIE Token Boundary Detector" id="193" lemma="1251" posTag="7135"/><types:Token xmi:id="7309" sofa="431" begin="1074" end="1076" componentId="JULIE Token Boundary Detector" id="194" lemma="5163" posTag="5510"/><types:Token xmi:id="11720" sofa="431" begin="1077" end="1083" componentId="JULIE Token Boundary Detector" id="195" lemma="7541" posTag="4959"/><types:Token xmi:id="6992" sofa="431" begin="1084" end="1087" componentId="JULIE Token Boundary Detector" id="196" lemma="12448" posTag="10945"/><types:Token xmi:id="11580" sofa="431" begin="1088" end="1105" componentId="JULIE Token Boundary Detector" id="197" lemma="3790" posTag="2565"/><types:Token xmi:id="5615" sofa="431" begin="1105" end="1106" componentId="JULIE Token Boundary Detector" id="198" lemma="4243" posTag="10886"/><types:Token xmi:id="9314" sofa="431" begin="1110" end="1115" componentId="JULIE Token Boundary Detector" id="199" lemma="10689" posTag="5773"/><types:Token xmi:id="9786" sofa="431" begin="1116" end="1124" componentId="JULIE Token Boundary Detector" id="200" lemma="12181" posTag="5414"/><types:Token xmi:id="4913" sofa="431" begin="1125" end="1127" componentId="JULIE Token Boundary Detector" id="201" lemma="4971" posTag="3858"/><types:Token xmi:id="4122" sofa="431" begin="1128" end="1136" componentId="JULIE Token Boundary Detector" id="202" lemma="7813" posTag="8820"/><types:Token xmi:id="4020" sofa="431" begin="1137" end="1145" componentId="JULIE Token Boundary Detector" id="203" lemma="5833" posTag="10930"/><types:Token xmi:id="6050" sofa="431" begin="1146" end="1148" componentId="JULIE Token Boundary Detector" id="204" lemma="4584" posTag="4435"/><types:Token xmi:id="10308" sofa="431" begin="1149" end="1152" componentId="JULIE Token Boundary Detector" id="205" lemma="9907" posTag="3712"/><types:Token xmi:id="6179" sofa="431" begin="1153" end="1162" componentId="JULIE Token Boundary Detector" id="206" lemma="11425" posTag="12288"/><types:Token xmi:id="5207" sofa="431" begin="1162" end="1163" componentId="JULIE Token Boundary Detector" id="207" lemma="5549" posTag="1066"/><types:Token xmi:id="8489" sofa="431" begin="1163" end="1172" componentId="JULIE Token Boundary Detector" id="208" lemma="12356" posTag="6367"/><types:Token xmi:id="5530" sofa="431" begin="1173" end="1186" componentId="JULIE Token Boundary Detector" id="209" lemma="5976" posTag="11643"/><types:Token xmi:id="1742" sofa="431" begin="1187" end="1189" componentId="JULIE Token Boundary Detector" id="210" lemma="6629" posTag="5603"/><types:Token xmi:id="6956" sofa="431" begin="1190" end="1203" componentId="JULIE Token Boundary Detector" id="211" lemma="9959" posTag="10795"/><types:Token xmi:id="6122" sofa="431" begin="1204" end="1205" componentId="JULIE Token Boundary Detector" id="212" lemma="7549" posTag="2110"/><types:Token xmi:id="12396" sofa="431" begin="1205" end="1211" componentId="JULIE Token Boundary Detector" id="213" lemma="10771" posTag="1534"/><types:Token xmi:id="2472" sofa="431" begin="1212" end="1213" componentId="JULIE Token Boundary Detector" id="214" lemma="2068" posTag="11755"/><types:Token xmi:id="12098" sofa="431" begin="1213" end="1214" componentId="JULIE Token Boundary Detector" id="215" lemma="3487" posTag="1060"/><types:Token xmi:id="894" sofa="431" begin="1214" end="1215" componentId="JULIE Token Boundary Detector" id="216" lemma="2513" posTag="789"/><types:Token xmi:id="5776" sofa="431" begin="1216" end="1219" componentId="JULIE Token Boundary Detector" id="217" lemma="1731" posTag="5738"/><types:Token xmi:id="3362" sofa="431" begin="1220" end="1224" componentId="JULIE Token Boundary Detector" id="218" lemma="1489" posTag="11736"/><types:Token xmi:id="10713" sofa="431" begin="1225" end="1229" componentId="JULIE Token Boundary Detector" id="219" lemma="9767" posTag="7361"/><types:Token xmi:id="6560" sofa="431" begin="1230" end="1235" componentId="JULIE Token Boundary Detector" id="220" lemma="8328" posTag="1716"/><types:Token xmi:id="10647" sofa="431" begin="1236" end="1238" componentId="JULIE Token Boundary Detector" id="221" lemma="3200" posTag="1279"/><types:Token xmi:id="11074" sofa="431" begin="1239" end="1244" componentId="JULIE Token Boundary Detector" id="222" lemma="6874" posTag="11302"/><types:Token xmi:id="2497" sofa="431" begin="1245" end="1248" componentId="JULIE Token Boundary Detector" id="223" lemma="3798" posTag="4803"/><types:Token xmi:id="8517" sofa="431" begin="1249" end="1252" componentId="JULIE Token Boundary Detector" id="224" lemma="10747" posTag="7421"/><types:Token xmi:id="1515" sofa="431" begin="1253" end="1259" componentId="JULIE Token Boundary Detector" id="225" lemma="9951" posTag="8283"/><types:Token xmi:id="10755" sofa="431" begin="1260" end="1273" componentId="JULIE Token Boundary Detector" id="226" lemma="9507" posTag="5443"/><types:Token xmi:id="8544" sofa="431" begin="1273" end="1274" componentId="JULIE Token Boundary Detector" id="227" lemma="8181" posTag="2771"/><types:Token xmi:id="8942" sofa="431" begin="1275" end="1283" componentId="JULIE Token Boundary Detector" id="228" lemma="6493" posTag="2328"/><types:Token xmi:id="8133" sofa="431" begin="1284" end="1288" componentId="JULIE Token Boundary Detector" id="229" lemma="3638" posTag="758"/><types:Token xmi:id="10889" sofa="431" begin="1289" end="1291" componentId="JULIE Token Boundary Detector" id="230" lemma="7900" posTag="9556"/><types:Token xmi:id="11758" sofa="431" begin="1292" end="1305" componentId="JULIE Token Boundary Detector" id="231" lemma="3455" posTag="7882"/><types:Token xmi:id="2992" sofa="431" begin="1306" end="1315" componentId="JULIE Token Boundary Detector" id="232" lemma="1354" posTag="2908"/><types:Token xmi:id="6460" sofa="431" begin="1316" end="1321" componentId="JULIE Token Boundary Detector" id="233" lemma="4071" posTag="10855"/><types:Token xmi:id="12325" sofa="431" begin="1322" end="1324" componentId="JULIE Token Boundary Detector" id="234" lemma="4138" posTag="3578"/><types:Token xmi:id="5486" sofa="431" begin="1325" end="1329" componentId="JULIE Token Boundary Detector" id="235" lemma="2292" posTag="10336"/><types:Token xmi:id="6842" sofa="431" begin="1329" end="1330" componentId="JULIE Token Boundary Detector" id="236" lemma="5375" posTag="12341"/><types:Token xmi:id="8734" sofa="431" begin="1331" end="1336" componentId="JULIE Token Boundary Detector" id="237" lemma="8876" posTag="10222"/><types:Token xmi:id="12033" sofa="431" begin="1337" end="1345" componentId="JULIE Token Boundary Detector" id="238" lemma="2057" posTag="3514"/><types:Token xmi:id="10464" sofa="431" begin="1346" end="1348" componentId="JULIE Token Boundary Detector" id="239" lemma="8918" posTag="2350"/><types:Token xmi:id="1821" sofa="431" begin="1349" end="1352" componentId="JULIE Token Boundary Detector" id="240" lemma="4227" posTag="9171"/><types:Token xmi:id="1110" sofa="431" begin="1353" end="1362" componentId="JULIE Token Boundary Detector" id="241" lemma="9866" posTag="12117"/><types:Token xmi:id="12218" sofa="431" begin="1362" end="1363" componentId="JULIE Token Boundary Detector" id="242" lemma="8173" posTag="8074"/><types:Token xmi:id="9152" sofa="431" begin="1363" end="1372" componentId="JULIE Token Boundary Detector" id="243" lemma="4490" posTag="1697"/><types:Token xmi:id="12049" sofa="431" begin="1373" end="1386" componentId="JULIE Token Boundary Detector" id="244" lemma="4664" posTag="12344"/><types:Token xmi:id="1142" sofa="431" begin="1387" end="1389" componentId="JULIE Token Boundary Detector" id="245" lemma="632" posTag="1307"/><types:Token xmi:id="12120" sofa="431" begin="1390" end="1395" componentId="JULIE Token Boundary Detector" id="246" lemma="10030" posTag="3394"/><types:Token xmi:id="3045" sofa="431" begin="1396" end="1397" componentId="JULIE Token Boundary Detector" id="247" lemma="966" posTag="927"/><types:Token xmi:id="3955" sofa="431" begin="1397" end="1403" componentId="JULIE Token Boundary Detector" id="248" lemma="3676" posTag="9365"/><types:Token xmi:id="9136" sofa="431" begin="1404" end="1405" componentId="JULIE Token Boundary Detector" id="249" lemma="5513" posTag="10016"/><types:Token xmi:id="4257" sofa="431" begin="1405" end="1406" componentId="JULIE Token Boundary Detector" id="250" lemma="10065" posTag="7851"/><types:Token xmi:id="9540" sofa="431" begin="1406" end="1407" componentId="JULIE Token Boundary Detector" id="251" lemma="4854" posTag="6272"/><types:Token xmi:id="11688" sofa="431" begin="1408" end="1411" componentId="JULIE Token Boundary Detector" id="252" lemma="8457" posTag="679"/><types:Token xmi:id="1996" sofa="431" begin="1412" end="1416" componentId="JULIE Token Boundary Detector" id="253" lemma="10787" posTag="11608"/><types:Token xmi:id="6524" sofa="431" begin="1417" end="1421" componentId="JULIE Token Boundary Detector" id="254" lemma="9915" posTag="5867"/><types:Token xmi:id="5006" sofa="431" begin="1422" end="1427" componentId="JULIE Token Boundary Detector" id="255" lemma="1631" posTag="11955"/><types:Token xmi:id="1802" sofa="431" begin="1428" end="1430" componentId="JULIE Token Boundary Detector" id="256" lemma="3630" posTag="4423"/><types:Token xmi:id="6331" sofa="431" begin="1431" end="1436" componentId="JULIE Token Boundary Detector" id="257" lemma="10573" posTag="8249"/><types:Token xmi:id="5741" sofa="431" begin="1437" end="1446" componentId="JULIE Token Boundary Detector" id="258" lemma="1291" posTag="4408"/><types:Token xmi:id="7457" sofa="431" begin="1447" end="1452" componentId="JULIE Token Boundary Detector" id="259" lemma="11712" posTag="864"/><types:Token xmi:id="4889" sofa="431" begin="1453" end="1454" componentId="JULIE Token Boundary Detector" id="260" lemma="2211" posTag="10581"/><types:Token xmi:id="3397" sofa="431" begin="1455" end="1459" componentId="JULIE Token Boundary Detector" id="261" lemma="3132" posTag="10203"/><types:Token xmi:id="2883" sofa="431" begin="1460" end="1463" componentId="JULIE Token Boundary Detector" id="262" lemma="1856" posTag="1237"/><types:Token xmi:id="6858" sofa="431" begin="1464" end="1468" componentId="JULIE Token Boundary Detector" id="263" lemma="8412" posTag="3684"/><types:Token xmi:id="8300" sofa="431" begin="1469" end="1471" componentId="JULIE Token Boundary Detector" id="264" lemma="5913" posTag="9095"/><types:Token xmi:id="10145" sofa="431" begin="1472" end="1485" componentId="JULIE Token Boundary Detector" id="265" lemma="6507" posTag="10584"/><types:Token xmi:id="8990" sofa="431" begin="1486" end="1491" componentId="JULIE Token Boundary Detector" id="266" lemma="1681" posTag="11290"/><types:Token xmi:id="9174" sofa="431" begin="1492" end="1494" componentId="JULIE Token Boundary Detector" id="267" lemma="4114" posTag="2537"/><types:Token xmi:id="2365" sofa="431" begin="1495" end="1499" componentId="JULIE Token Boundary Detector" id="268" lemma="8723" posTag="5600"/><types:Token xmi:id="10503" sofa="431" begin="1499" end="1500" componentId="JULIE Token Boundary Detector" id="269" lemma="2943" posTag="7987"/><types:Token xmi:id="11316" sofa="431" begin="1504" end="1509" componentId="JULIE Token Boundary Detector" id="270" lemma="1380" posTag="7454"/><types:Token xmi:id="1438" sofa="431" begin="1510" end="1517" componentId="JULIE Token Boundary Detector" id="271" lemma="8016" posTag="7650"/><types:Token xmi:id="8575" sofa="431" begin="1518" end="1525" componentId="JULIE Token Boundary Detector" id="272" lemma="1658" posTag="7924"/><types:Token xmi:id="8618" sofa="431" begin="1526" end="1530" componentId="JULIE Token Boundary Detector" id="273" lemma="3167" posTag="3027"/><types:Token xmi:id="9639" sofa="431" begin="1531" end="1534" componentId="JULIE Token Boundary Detector" id="274" lemma="8292" posTag="7897"/><types:Token xmi:id="5757" sofa="431" begin="1535" end="1547" componentId="JULIE Token Boundary Detector" id="275" lemma="1758" posTag="12114"/><types:Token xmi:id="7076" sofa="431" begin="1548" end="1554" componentId="JULIE Token Boundary Detector" id="276" lemma="5190" posTag="5402"/><types:Token xmi:id="1454" sofa="431" begin="1555" end="1557" componentId="JULIE Token Boundary Detector" id="277" lemma="6709" posTag="7092"/><types:Token xmi:id="2911" sofa="431" begin="1558" end="1563" componentId="JULIE Token Boundary Detector" id="278" lemma="5565" posTag="1411"/><types:Token xmi:id="1766" sofa="431" begin="1563" end="1564" componentId="JULIE Token Boundary Detector" id="279" lemma="4333" posTag="1628"/><types:Token xmi:id="7111" sofa="431" begin="1565" end="1569" componentId="JULIE Token Boundary Detector" id="280" lemma="4368" posTag="8255"/><types:Token xmi:id="1639" sofa="431" begin="1570" end="1574" componentId="JULIE Token Boundary Detector" id="281" lemma="9245" posTag="10942"/><types:Token xmi:id="7724" sofa="431" begin="1575" end="1577" componentId="JULIE Token Boundary Detector" id="282" lemma="5631" posTag="2054"/><types:Token xmi:id="7598" sofa="431" begin="1578" end="1591" componentId="JULIE Token Boundary Detector" id="283" lemma="9030" posTag="8286"/><types:Token xmi:id="1837" sofa="431" begin="1591" end="1592" componentId="JULIE Token Boundary Detector" id="284" lemma="7501" posTag="2331"/><types:Token xmi:id="8634" sofa="431" begin="1593" end="1595" componentId="JULIE Token Boundary Detector" id="285" lemma="3479" posTag="986"/><types:Token xmi:id="8149" sofa="431" begin="1596" end="1599" componentId="JULIE Token Boundary Detector" id="286" lemma="2046" posTag="6821"/><types:Token xmi:id="5343" sofa="431" begin="1600" end="1602" componentId="JULIE Token Boundary Detector" id="287" lemma="6540" posTag="10492"/><types:Token xmi:id="6146" sofa="431" begin="1603" end="1613" componentId="JULIE Token Boundary Detector" id="288" lemma="3140" posTag="4036"/><types:Token xmi:id="4195" sofa="431" begin="1614" end="1616" componentId="JULIE Token Boundary Detector" id="289" lemma="2251" posTag="4394"/><types:Token xmi:id="3589" sofa="431" begin="1617" end="1624" componentId="JULIE Token Boundary Detector" id="290" lemma="12268" posTag="3952"/><types:Token xmi:id="6767" sofa="431" begin="1625" end="1634" componentId="JULIE Token Boundary Detector" id="291" lemma="930" posTag="11200"/><types:Token xmi:id="11268" sofa="431" begin="1635" end="1642" componentId="JULIE Token Boundary Detector" id="292" lemma="3208" posTag="10348"/><types:Token xmi:id="761" sofa="431" begin="1643" end="1646" componentId="JULIE Token Boundary Detector" id="293" lemma="3306" posTag="7639"/><types:Token xmi:id="10206" sofa="431" begin="1647" end="1648" componentId="JULIE Token Boundary Detector" id="294" lemma="1243" posTag="5232"/><types:Token xmi:id="11449" sofa="431" begin="1649" end="1658" componentId="JULIE Token Boundary Detector" id="295" lemma="5417" posTag="9265"/><types:Token xmi:id="4285" sofa="431" begin="1659" end="1667" componentId="JULIE Token Boundary Detector" id="296" lemma="11812" posTag="1719"/><types:Token xmi:id="10187" sofa="431" begin="1668" end="1670" componentId="JULIE Token Boundary Detector" id="297" lemma="10663" posTag="2325"/><types:Token xmi:id="11820" sofa="431" begin="1671" end="1684" componentId="JULIE Token Boundary Detector" id="298" lemma="7229" posTag="8325"/><types:Token xmi:id="11479" sofa="431" begin="1685" end="1691" componentId="JULIE Token Boundary Detector" id="299" lemma="1318" posTag="6501"/><types:Token xmi:id="3378" sofa="431" begin="1692" end="1695" componentId="JULIE Token Boundary Detector" id="300" lemma="11192" posTag="3164"/><types:Token xmi:id="3927" sofa="431" begin="1695" end="1696" componentId="JULIE Token Boundary Detector" id="301" lemma="2673" posTag="5987"/><types:Token xmi:id="5921" sofa="431" begin="1697" end="1704" componentId="JULIE Token Boundary Detector" id="302" lemma="1388" posTag="3778"/><types:Token xmi:id="5870" sofa="431" begin="1704" end="1705" componentId="JULIE Token Boundary Detector" id="303" lemma="6275" posTag="11925"/><types:Token xmi:id="7908" sofa="431" begin="1706" end="1714" componentId="JULIE Token Boundary Detector" id="304" lemma="9515" posTag="11611"/><types:Token xmi:id="6395" sofa="431" begin="1715" end="1719" componentId="JULIE Token Boundary Detector" id="305" lemma="11022" posTag="11376"/><types:Token xmi:id="8902" sofa="431" begin="1720" end="1733" componentId="JULIE Token Boundary Detector" id="306" lemma="5502" posTag="9018"/><types:Token xmi:id="5087" sofa="431" begin="1733" end="1734" componentId="JULIE Token Boundary Detector" id="307" lemma="7789" posTag="6792"/><types:Token xmi:id="6733" sofa="431" begin="1735" end="1740" componentId="JULIE Token Boundary Detector" id="308" lemma="624" posTag="8316"/><types:Token xmi:id="4055" sofa="431" begin="1741" end="1744" componentId="JULIE Token Boundary Detector" id="309" lemma="2850" posTag="9594"/><types:Token xmi:id="3317" sofa="431" begin="1745" end="1749" componentId="JULIE Token Boundary Detector" id="310" lemma="6924" posTag="5151"/><types:Token xmi:id="3463" sofa="431" begin="1750" end="1754" componentId="JULIE Token Boundary Detector" id="311" lemma="2127" posTag="8731"/><types:Token xmi:id="5243" sofa="431" begin="1754" end="1755" componentId="JULIE Token Boundary Detector" id="312" lemma="9110" posTag="11180"/><types:Token xmi:id="12252" sofa="431" begin="1755" end="1764" componentId="JULIE Token Boundary Detector" id="313" lemma="7927" posTag="3030"/><types:Token xmi:id="8966" sofa="431" begin="1765" end="1767" componentId="JULIE Token Boundary Detector" id="314" lemma="958" posTag="3766"/><types:Token xmi:id="9433" sofa="431" begin="1768" end="1771" componentId="JULIE Token Boundary Detector" id="315" lemma="8610" posTag="7894"/><types:Token xmi:id="7293" sofa="431" begin="1772" end="1780" componentId="JULIE Token Boundary Detector" id="316" lemma="5235" posTag="7166"/><types:Token xmi:id="11512" sofa="431" begin="1781" end="1788" componentId="JULIE Token Boundary Detector" id="317" lemma="4905" posTag="2121"/><types:Token xmi:id="10914" sofa="431" begin="1789" end="1791" componentId="JULIE Token Boundary Detector" id="318" lemma="1299" posTag="9783"/><types:Token xmi:id="7797" sofa="431" begin="1792" end="1797" componentId="JULIE Token Boundary Detector" id="319" lemma="3646" posTag="7821"/><types:Token xmi:id="11400" sofa="431" begin="1798" end="1801" componentId="JULIE Token Boundary Detector" id="320" lemma="11468" posTag="2408"/><types:Token xmi:id="8659" sofa="431" begin="1802" end="1812" componentId="JULIE Token Boundary Detector" id="321" lemma="4171" posTag="4565"/><types:Token xmi:id="4630" sofa="431" begin="1813" end="1819" componentId="JULIE Token Boundary Detector" id="322" lemma="7364" posTag="6015"/><types:Token xmi:id="792" sofa="431" begin="1820" end="1825" componentId="JULIE Token Boundary Detector" id="323" lemma="5639" posTag="5187"/><types:Token xmi:id="3605" sofa="431" begin="1825" end="1826" componentId="JULIE Token Boundary Detector" id="324" lemma="6717" posTag="6283"/><types:Token xmi:id="9417" sofa="431" begin="1826" end="1835" componentId="JULIE Token Boundary Detector" id="325" lemma="7427" posTag="1174"/><types:Token xmi:id="6932" sofa="431" begin="1836" end="1843" componentId="JULIE Token Boundary Detector" id="326" lemma="699" posTag="10019"/><types:Token xmi:id="6882" sofa="431" begin="1844" end="1847" componentId="JULIE Token Boundary Detector" id="327" lemma="10234" posTag="3980"/><types:Token xmi:id="1126" sofa="431" begin="1848" end="1851" componentId="JULIE Token Boundary Detector" id="328" lemma="8982" posTag="10392"/><types:Token xmi:id="8336" sofa="431" begin="1852" end="1858" componentId="JULIE Token Boundary Detector" id="329" lemma="9038" posTag="1881"/><types:Token xmi:id="5817" sofa="431" begin="1858" end="1859" componentId="JULIE Token Boundary Detector" id="330" lemma="11504" posTag="9895"/><tcas:DocumentAnnotation xmi:id="12478" sofa="431" begin="0" end="1859" language="x-unspecified"/><ext:DBProcessingMetaData xmi:id="12483" sofa="431" begin="0" end="0"><primaryKey>1681975</primaryKey></ext:DBProcessingMetaData><cas:Sofa xmi:id="431" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Comparison of the effects of the novel vasodilator FK409 with those of nitroglycerin in isolated coronary artery of the dog.&#10;1. The vasorelaxant effects of FK409, a new nitrovasodilator synthesized from a microbial product, were compared with those of nitroglycerin in isolated coronary artery rings of the dog contracted with U46619 (10(-7) M). 2. FK409 (10(-11)-10(-5) M) and nitroglycerin (10(-9)-10(-4) M) each produced a concentration-dependent relaxation. Comparison of EC50 values showed that FK409 was about 25 times more potent than nitroglycerin. 3. Submaximum concentrations of nitroglycerin (10(-6) M) and FK409 (3 x 10(-8) M) elevated guanosine 3':5'-cyclic monophosphate (cyclic GMP) levels, effects associated with vasorelaxation. Adenosine 3':5'-cyclic monophosphate (cyclic AMP) levels were unaffected. 4. The concentration-relaxation curves for nitroglycerin and FK409 were shifted to the right by methylene blue (3 x 10(-6) - 3 x 10(-5) M), an inhibitor of soluble guanylate cyclase, and to the left by M&amp;B22,948 (3 x 10(-6) - 3 x 10(-5) M), an inhibitor of cyclic GMP phosphodiesterase. 5. After exposure of coronary arteries to the maximally-effective concentration of nitroglycerin (10(-4) M), the mean EC50 value of FK409 did not change significantly, although that of nitroglycerin increased about 60 fold. After exposure to the maximally-effective concentration of FK409 (10(-5) M), the mean EC50 value of FK409 increased about 6 fold and that of nitroglycerin about 11 fold. 6. These results suggest that the vasorelaxant effect of FK409, like that of nitroglycerin, is due to activation of soluble guanylate cyclase and a resultant increase in intracellular cyclic GMP. However, compared with nitroglycerin, there was less self-tolerance to the relaxant effects of FK409 and relatively little cross-tolerance between the two agents."/><cas:FSArray xmi:id="380" elements="398 60 449 95 258 420 409 38 367 49 269 128 27 117 106 179"/><cas:FSArray xmi:id="342" elements="315 324 9 18 86 333 440 225 297 489 306"/><cas:FSArray xmi:id="460" elements=""/><cas:FSArray xmi:id="438" elements=""/><cas:FSArray xmi:id="173" elements="151 355 498 139"/><cas:FSArray xmi:id="378" elements=""/><cas:FSArray xmi:id="3847" elements="6294"/><cas:FSArray xmi:id="4956" elements="4545"/><cas:FSArray xmi:id="12189" elements="5223"/><cas:FSArray xmi:id="11986" elements="6162"/><cas:FSArray xmi:id="8864" elements="12006"/><cas:FSArray xmi:id="3333" elements="5334"/><cas:FSArray xmi:id="1669" elements="9615"/><cas:FSArray xmi:id="1966" elements="1864"/><cas:FSArray xmi:id="7358" elements="1010"/><cas:FSArray xmi:id="6592" elements="10564"/><cas:FSArray xmi:id="3517" elements="11943"/><cas:FSArray xmi:id="11631" elements="4105"/><cas:FSArray xmi:id="9822" elements="11890"/><cas:FSArray xmi:id="3024" elements="12431"/><cas:FSArray xmi:id="6195" elements="2275"/><cas:FSArray xmi:id="998" elements="11005"/><cas:FSArray xmi:id="8793" elements="3353"/><cas:FSArray xmi:id="6229" elements="8121"/><cas:FSArray xmi:id="1531" elements="12015"/><cas:FSArray xmi:id="786" elements="3095"/><cas:FSArray xmi:id="7473" elements="3569"/><cas:FSArray xmi:id="1655" elements="7951"/><cas:FSArray xmi:id="8109" elements="2899"/><cas:FSArray xmi:id="7832" elements="6783"/><cas:FSArray xmi:id="3113" elements="5299"/><cas:FSArray xmi:id="6364" elements="3703"/><cas:FSArray xmi:id="7325" elements="11495"/><cas:FSArray xmi:id="6548" elements="11114"/><cas:FSArray xmi:id="9206" elements="974"/><cas:FSArray xmi:id="11090" elements="5895"/><cas:FSArray xmi:id="1177" elements="7670"/><cas:FSArray xmi:id="4562" elements="9942"/><cas:FSArray xmi:id="7984" elements="8505"/><cas:FSArray xmi:id="10807" elements="855"/><cas:FSArray xmi:id="10062" elements="3740"/><cas:FSArray xmi:id="10883" elements="4426"/><cas:FSArray xmi:id="2610" elements="2012"/><cas:FSArray xmi:id="2411" elements="8759"/><cas:FSArray xmi:id="9168" elements="3520"/><cas:FSArray xmi:id="2065" elements="10822"/><cas:FSArray xmi:id="707" elements="10455"/><cas:FSArray xmi:id="3654" elements="2625"/><cas:FSArray xmi:id="10056" elements="10047"/><cas:FSArray xmi:id="11937" elements="7573"/><cas:FSArray xmi:id="1969" elements="8274"/><cas:FSArray xmi:id="4734" elements="6411"/><cas:FSArray xmi:id="4672" elements="3877"/><cas:FSArray xmi:id="6198" elements="12065"/><cas:FSArray xmi:id="4454" elements="12234"/><cas:FSArray xmi:id="4420" elements="11391"/><cas:FSArray xmi:id="8205" elements="7746"/><cas:FSArray xmi:id="6328" elements="9597"/><cas:FSArray xmi:id="3042" elements="11928"/><cas:FSArray xmi:id="833" elements="6673"/><cas:FSArray xmi:id="8514" elements="5059"/><cas:FSArray xmi:id="7740" elements="6303"/><cas:FSArray xmi:id="10819" elements="10671"/><cas:FSArray xmi:id="5084" elements="11803"/><cas:FSArray xmi:id="11030" elements="5582"/><cas:FSArray xmi:id="11774" elements="5792"/><cas:FSArray xmi:id="12456" elements="5904"/><cas:FSArray xmi:id="11952" elements="6907"/><cas:FSArray xmi:id="12475" elements="11794"/><cas:FSArray xmi:id="8322" elements="2390"/><cas:FSArray xmi:id="7033" elements="10956"/><cas:FSArray xmi:id="10098" elements="11332"/><cas:FSArray xmi:id="4600" elements="12243"/><cas:FSArray xmi:id="1396" elements="2186"/><cas:FSArray xmi:id="1240" elements="7220"/><cas:FSArray xmi:id="6637" elements="3943"/><cas:FSArray xmi:id="8420" elements="5317"/><cas:FSArray xmi:id="2613" elements="11358"/><cas:FSArray xmi:id="3983" elements="5308"/><cas:FSArray xmi:id="4498" elements="9523"/><cas:FSArray xmi:id="10395" elements="598"/><cas:FSArray xmi:id="12347" elements="3781"/><cas:FSArray xmi:id="4501" elements="11349"/><cas:FSArray xmi:id="2414" elements="9874"/><cas:FSArray xmi:id="4273" elements="867"/><cas:FSArray xmi:id="5937" elements="1267"/><cas:FSArray xmi:id="7879" elements="2951"/><cas:FSArray xmi:id="8252" elements="4411"/><cas:FSArray xmi:id="11284" elements="4962"/><cas:FSArray xmi:id="660" elements="11093"/><cas:FSArray xmi:id="12276" elements="8884"/><cas:FSArray xmi:id="5399" elements="8650"/><cas:FSArray xmi:id="10535" elements="3814"/><cas:FSArray xmi:id="10059" elements="6603"/><cas:FSArray xmi:id="1470" elements="7679"/><cas:FSArray xmi:id="2183" elements="876"/><cas:FSArray xmi:id="5446" elements="1228"/><cas:FSArray xmi:id="6989" elements="6620"/><cas:FSArray xmi:id="640" elements="7138"/><cas:FSArray xmi:id="6698" elements="9810"/><cas:FSArray xmi:id="6251" elements="6484"/><cas:FSArray xmi:id="8392" elements="10089"/><cas:FSArray xmi:id="12353" elements="4350"/><cas:FSArray xmi:id="1063" elements="10127"/><cas:FSArray xmi:id="7036" elements="10874"/><cas:FSArray xmi:id="4254" elements="7058"/><cas:FSArray xmi:id="2650" elements="4655"/><cas:FSArray xmi:id="10596" elements="2592"/><cas:FSArray xmi:id="9354" elements="3216"/><cas:FSArray xmi:id="5449" elements="7854"/><cas:FSArray xmi:id="8560" elements="10242"/><cas:FSArray xmi:id="8130" elements="3986"/><cas:FSArray xmi:id="4753" elements="1334"/><cas:FSArray xmi:id="7424" elements="10225"/><cas:FSArray xmi:id="2469" elements="5154"/><cas:FSArray xmi:id="9015" elements="5521"/><cas:FSArray xmi:id="7193" elements="7885"/><cas:FSArray xmi:id="8533" elements="10398"/><cas:FSArray xmi:id="12412" elements="11599"/><cas:FSArray xmi:id="9209" elements="10101"/><cas:FSArray xmi:id="8289" elements="3757"/><cas:FSArray xmi:id="6210" elements="3270"/><cas:FSArray xmi:id="10324" elements="6448"/><cas:FSArray xmi:id="7039" elements="3077"/><cas:FSArray xmi:id="1853" elements="11057"/><cas:FSArray xmi:id="3233" elements="10729"/><cas:FSArray xmi:id="3657" elements="7990"/><cas:FSArray xmi:id="10489" elements="11634"/><cas:FSArray xmi:id="1739" elements="11367"/><cas:FSArray xmi:id="11868" elements="9006"/><cas:FSArray xmi:id="12136" elements="4359"/><cas:FSArray xmi:id="11476" elements="4276"/><cas:FSArray xmi:id="6504" elements="1610"/><cas:FSArray xmi:id="710" elements="2689"/><cas:FSArray xmi:id="5984" elements="4684"/><cas:FSArray xmi:id="7743" elements="2399"/><cas:FSArray xmi:id="946" elements="8893"/><cas:FSArray xmi:id="2124" elements="5434"/><cas:FSArray xmi:id="4251" elements="2874"/><cas:FSArray xmi:id="3511" elements="11646"/><cas:FSArray xmi:id="2441" elements="2556"/><cas:FSArray xmi:id="4784" elements="2300"/><cas:FSArray xmi:id="1818" elements="9253"/><cas:FSArray xmi:id="983" elements="3621"/><cas:FSArray xmi:id="4929" elements="7380"/><cas:FSArray xmi:id="9308" elements="10587"/><cas:FSArray xmi:id="6457" elements="910"/><cas:FSArray xmi:id="6232" elements="3446"/><cas:FSArray xmi:id="3314" elements="9568"/><cas:FSArray xmi:id="4870" elements="9898"/><cas:FSArray xmi:id="8572" elements="2021"/><cas:FSArray xmi:id="7696" elements="4693"/><cas:FSArray xmi:id="7147" elements="5849"/><cas:FSArray xmi:id="9262" elements="10538"/><cas:FSArray xmi:id="747" elements="3769"/><cas:FSArray xmi:id="3303" elements="11105"/><cas:FSArray xmi:id="1276" elements="10547"/><cas:FSArray xmi:id="1399" elements="12139"/><cas:FSArray xmi:id="2353" elements="3413"/><cas:FSArray xmi:id="2670" elements="6833"/><cas:FSArray xmi:id="5546" elements="7328"/><cas:FSArray xmi:id="12148" elements="9726"/><cas:FSArray xmi:id="4611" elements="1782"/><cas:FSArray xmi:id="5973" elements="9674"/><cas:FSArray xmi:id="11544" elements="9606"/><cas:FSArray xmi:id="9107" elements="9465"/><cas:FSArray xmi:id="11596" elements="5452"/><cas:FSArray xmi:id="11287" elements="2616"/><cas:FSArray xmi:id="9819" elements="1497"/><cas:FSArray xmi:id="6445" elements="2460"/><cas:FSArray xmi:id="9311" elements="3995"/><cas:FSArray xmi:id="12350" elements="643"/><cas:FSArray xmi:id="11465" elements="9118"/><cas:FSArray xmi:id="1343" elements="2774"/><cas:FSArray xmi:id="9939" elements="3191"/><cas:FSArray xmi:id="11871" elements="6551"/><cas:FSArray xmi:id="11940" elements="7780"/><cas:FSArray xmi:id="12167" elements="5998"/><cas:FSArray xmi:id="9655" elements="3086"/><cas:FSArray xmi:id="1799" elements="738"/><cas:FSArray xmi:id="8607" elements="9627"/><cas:FSArray xmi:id="4405" elements="4376"/><cas:FSArray xmi:id="11983" elements="824"/><cas:FSArray xmi:id="1666" elements="2730"/><cas:FSArray xmi:id="7435" elements="1672"/><cas:FSArray xmi:id="9636" elements="11293"/><cas:FSArray xmi:id="11102" elements="11777"/><cas:FSArray xmi:id="12170" elements="2488"/><cas:FSArray xmi:id="8319" elements="9999"/><cas:FSArray xmi:id="9892" elements="5038"/><cas:FSArray xmi:id="7337" elements="1362"/><cas:FSArray xmi:id="9624" elements="2356"/><cas:FSArray xmi:id="11313" elements="1506"/><cas:FSArray xmi:id="4781" elements="6355"/><cas:FSArray xmi:id="5047" elements="4096"/><cas:FSArray xmi:id="9715" elements="5425"/><cas:FSArray xmi:id="11379" elements="1371"/><cas:FSArray xmi:id="836" elements="5713"/><cas:FSArray xmi:id="7135" elements="1722"/><cas:FSArray xmi:id="5510" elements="6795"/><cas:FSArray xmi:id="4959" elements="6898"/><cas:FSArray xmi:id="10945" elements="6972"/><cas:FSArray xmi:id="2565" elements="6263"/><cas:FSArray xmi:id="10886" elements="6656"/><cas:FSArray xmi:id="5773" elements="9046"/><cas:FSArray xmi:id="5414" elements="9220"/><cas:FSArray xmi:id="3858" elements="989"/><cas:FSArray xmi:id="8820" elements="9559"/><cas:FSArray xmi:id="10930" elements="5886"/><cas:FSArray xmi:id="4435" elements="3902"/><cas:FSArray xmi:id="3712" elements="2101"/><cas:FSArray xmi:id="12288" elements="8867"/><cas:FSArray xmi:id="1066" elements="8224"/><cas:FSArray xmi:id="6367" elements="10680"/><cas:FSArray xmi:id="11643" elements="4646"/><cas:FSArray xmi:id="5603" elements="8784"/><cas:FSArray xmi:id="10795" elements="8032"/><cas:FSArray xmi:id="2110" elements="885"/><cas:FSArray xmi:id="1534" elements="7771"/><cas:FSArray xmi:id="11755" elements="5606"/><cas:FSArray xmi:id="1060" elements="6201"/><cas:FSArray xmi:id="789" elements="8750"/><cas:FSArray xmi:id="5738" elements="4087"/><cas:FSArray xmi:id="11736" elements="10118"/><cas:FSArray xmi:id="7361" elements="4504"/><cas:FSArray xmi:id="1716" elements="12024"/><cas:FSArray xmi:id="1279" elements="6254"/><cas:FSArray xmi:id="11302" elements="8041"/><cas:FSArray xmi:id="4803" elements="8423"/><cas:FSArray xmi:id="7421" elements="1019"/><cas:FSArray xmi:id="8283" elements="10933"/><cas:FSArray xmi:id="5443" elements="7630"/><cas:FSArray xmi:id="2771" elements="11183"/><cas:FSArray xmi:id="2328" elements="10905"/><cas:FSArray xmi:id="758" elements="9098"/><cas:FSArray xmi:id="9556" elements="11622"/><cas:FSArray xmi:id="7882" elements="5405"/><cas:FSArray xmi:id="2908" elements="4675"/><cas:FSArray xmi:id="10855" elements="5655"/><cas:FSArray xmi:id="3578" elements="6812"/><cas:FSArray xmi:id="10336" elements="9577"/><cas:FSArray xmi:id="12341" elements="4979"/><cas:FSArray xmi:id="10222" elements="5858"/><cas:FSArray xmi:id="3514" elements="6758"/><cas:FSArray xmi:id="2350" elements="4457"/><cas:FSArray xmi:id="9171" elements="6749"/><cas:FSArray xmi:id="12117" elements="1402"/><cas:FSArray xmi:id="8074" elements="1069"/><cas:FSArray xmi:id="1697" elements="7067"/><cas:FSArray xmi:id="12344" elements="5948"/><cas:FSArray xmi:id="1307" elements="3104"/><cas:FSArray xmi:id="3394" elements="3336"/><cas:FSArray xmi:id="927" elements="11382"/><cas:FSArray xmi:id="9365" elements="9498"/><cas:FSArray xmi:id="10016" elements="2817"/><cas:FSArray xmi:id="7851" elements="9392"/><cas:FSArray xmi:id="6272" elements="2653"/><cas:FSArray xmi:id="679" elements="11171"/><cas:FSArray xmi:id="11608" elements="7492"/><cas:FSArray xmi:id="5867" elements="11997"/><cas:FSArray xmi:id="11955" elements="5198"/><cas:FSArray xmi:id="4423" elements="10339"/><cas:FSArray xmi:id="8249" elements="8403"/><cas:FSArray xmi:id="4408" elements="12201"/><cas:FSArray xmi:id="864" elements="10136"/><cas:FSArray xmi:id="10581" elements="6386"/><cas:FSArray xmi:id="10203" elements="9127"/><cas:FSArray xmi:id="1237" elements="12279"/><cas:FSArray xmi:id="3684" elements="5591"/><cas:FSArray xmi:id="9095" elements="12192"/><cas:FSArray xmi:id="10584" elements="10798"/><cas:FSArray xmi:id="11290" elements="10480"/><cas:FSArray xmi:id="2537" elements="7715"/><cas:FSArray xmi:id="5600" elements="8823"/><cas:FSArray xmi:id="7987" elements="4385"/><cas:FSArray xmi:id="7454" elements="729"/><cas:FSArray xmi:id="7650" elements="1001"/><cas:FSArray xmi:id="7924" elements="5696"/><cas:FSArray xmi:id="3027" elements="6436"/><cas:FSArray xmi:id="7897" elements="7024"/><cas:FSArray xmi:id="12114" elements="2799"/><cas:FSArray xmi:id="5402" elements="12316"/><cas:FSArray xmi:id="7092" elements="3731"/><cas:FSArray xmi:id="1411" elements="7340"/><cas:FSArray xmi:id="1628" elements="10810"/><cas:FSArray xmi:id="8255" elements="5050"/><cas:FSArray xmi:id="10942" elements="3245"/><cas:FSArray xmi:id="2054" elements="1900"/><cas:FSArray xmi:id="8286" elements="4756"/><cas:FSArray xmi:id="2331" elements="8563"/><cas:FSArray xmi:id="986" elements="4988"/><cas:FSArray xmi:id="6821" elements="11966"/><cas:FSArray xmi:id="10492" elements="11916"/><cas:FSArray xmi:id="4036" elements="2808"/><cas:FSArray xmi:id="4394" elements="949"/><cas:FSArray xmi:id="3952" elements="2381"/><cas:FSArray xmi:id="11200" elements="10178"/><cas:FSArray xmi:id="10348" elements="7349"/><cas:FSArray xmi:id="7639" elements="12291"/><cas:FSArray xmi:id="5232" elements="4162"/><cas:FSArray xmi:id="9265" elements="7653"/><cas:FSArray xmi:id="1719" elements="3236"/><cas:FSArray xmi:id="2325" elements="777"/><cas:FSArray xmi:id="8325" elements="8432"/><cas:FSArray xmi:id="6501" elements="4997"/><cas:FSArray xmi:id="3164" elements="11679"/><cas:FSArray xmi:id="5987" elements="1619"/><cas:FSArray xmi:id="3778" elements="11547"/><cas:FSArray xmi:id="11925" elements="11203"/><cas:FSArray xmi:id="11611" elements="2092"/><cas:FSArray xmi:id="11376" elements="1585"/><cas:FSArray xmi:id="9018" elements="9883"/><cas:FSArray xmi:id="6792" elements="4341"/><cas:FSArray xmi:id="8316" elements="615"/><cas:FSArray xmi:id="9594" elements="6824"/><cas:FSArray xmi:id="5151" elements="10038"/><cas:FSArray xmi:id="8731" elements="10275"/><cas:FSArray xmi:id="11180" elements="1957"/><cas:FSArray xmi:id="3030" elements="11416"/><cas:FSArray xmi:id="3766" elements="3971"/><cas:FSArray xmi:id="7894" elements="5461"/><cas:FSArray xmi:id="7166" elements="5573"/><cas:FSArray xmi:id="2121" elements="10738"/><cas:FSArray xmi:id="9783" elements="2601"/><cas:FSArray xmi:id="7821" elements="682"/><cas:FSArray xmi:id="2408" elements="7999"/><cas:FSArray xmi:id="4565" elements="9021"/><cas:FSArray xmi:id="6015" elements="6515"/><cas:FSArray xmi:id="5187" elements="10351"/><cas:FSArray xmi:id="6283" elements="9841"/><cas:FSArray xmi:id="1174" elements="10161"/><cas:FSArray xmi:id="10019" elements="11907"/><cas:FSArray xmi:id="3980" elements="3033"/><cas:FSArray xmi:id="10392" elements="8112"/><cas:FSArray xmi:id="1881" elements="1282"/><cas:FSArray xmi:id="9895" elements="10327"/><cas:View sofa="431" members="1 9 18 86 225 297 306 315 324 333 440 489 27 38 49 60 95 106 117 128 179 258 269 367 398 409 420 449 71 139 151 355 498 163 215 462 190 198 280 472 234 526 582 534 542 590 558 510 574 550 566 518 6294 4545 5223 6162 12006 5334 9615 1864 1010 10564 11943 4105 11890 12431 2275 11005 3353 8121 12015 3095 3569 7951 2899 6783 5299 3703 11495 11114 974 5895 7670 9942 8505 855 3740 4426 2012 8759 3520 10822 10455 2625 10047 7573 8274 6411 3877 12065 12234 11391 7746 9597 11928 6673 5059 6303 10671 11803 5582 5792 5904 6907 11794 2390 10956 11332 12243 2186 7220 3943 5317 11358 5308 9523 598 3781 11349 9874 867 1267 2951 4411 4962 11093 8884 8650 3814 6603 7679 876 1228 6620 7138 9810 6484 10089 4350 10127 10874 7058 4655 2592 3216 7854 10242 3986 1334 10225 5154 5521 7885 10398 11599 10101 3757 3270 6448 3077 11057 10729 7990 11634 11367 9006 4359 4276 1610 2689 4684 2399 8893 5434 2874 11646 2556 2300 9253 3621 7380 10587 910 3446 9568 9898 2021 4693 5849 10538 3769 11105 10547 12139 3413 6833 7328 9726 1782 9674 9606 9465 5452 2616 1497 2460 3995 643 9118 2774 3191 6551 7780 5998 3086 738 9627 4376 824 2730 1672 11293 11777 2488 9999 5038 1362 2356 1506 6355 4096 5425 1371 5713 1722 6795 6898 6972 6263 6656 9046 9220 989 9559 5886 3902 2101 8867 8224 10680 4646 8784 8032 885 7771 5606 6201 8750 4087 10118 4504 12024 6254 8041 8423 1019 10933 7630 11183 10905 9098 11622 5405 4675 5655 6812 9577 4979 5858 6758 4457 6749 1402 1069 7067 5948 3104 3336 11382 9498 2817 9392 2653 11171 7492 11997 5198 10339 8403 12201 10136 6386 9127 12279 5591 12192 10798 10480 7715 8823 4385 729 1001 5696 6436 7024 2799 12316 3731 7340 10810 5050 3245 1900 4756 8563 4988 11966 11916 2808 949 2381 10178 7349 12291 4162 7653 3236 777 8432 4997 11679 1619 11547 11203 2092 1585 9883 4341 615 6824 10038 10275 1957 11416 3971 5461 5573 10738 2601 682 7999 9021 6515 10351 9841 10161 11907 3033 8112 1282 10327 6476 3749 11958 10615 3069 9532 11704 6138 938 9357 11786 2076 1346 6171 6916 7590 10847 10170 8768 4235 6286 7372 11341 2284 3581 8024 11049 8050 9490 7662 3823 11899 2747 2167 4862 2175 9457 5326 8465 8599 11528 2662 11671 4838 5022 10284 5275 4397 7582 3850 2540 10008 6725 8812 10495 691 10110 7446 11747 8832 6347 4603 6981 12090 11739 6701 6034 3806 6042 10447 5940 10639 9775 9449 11536 9718 10965 2584 8165 11556 7269 9751 5103 1310 8776 1028 9850 3295 5990 11212 607 6082 1925 7960 4554 9586 1791 2417 8536 9384 6665 3438 10556 7196 10251 7688 919 6612 10831 10989 7127 9212 10779 1708 4932 8008 11882 11260 8856 3225 1949 2642 2681 11305 11014 11236 10839 4592 11614 2739 4846 12210 8395 10948 11066 10423 8591 1700 9330 750 9759 5470 11975 12440 7050 10997 9087 5557 1689 5705 2113 9284 5111 1036 2038 7185 9802 652 7438 6804 6948 7824 1972 2826 1259 11874 11989 2634 8958 2548 12173 6007 1430 4079 1326 10022 3345 5478 1873 2030 7642 8368 3545 6595 7042 5841 3061 5030 5647 4466 2084 9858 1251 5163 7541 12448 3790 4243 10689 12181 4971 7813 5833 4584 9907 11425 5549 12356 5976 6629 9959 7549 10771 2068 3487 2513 1731 1489 9767 8328 3200 6874 3798 10747 9951 9507 8181 6493 3638 7900 3455 1354 4071 4138 2292 5375 8876 2057 8918 4227 9866 8173 4490 4664 632 10030 966 3676 5513 10065 4854 8457 10787 9915 1631 3630 10573 1291 11712 2211 3132 1856 8412 5913 6507 1681 4114 8723 2943 1380 8016 1658 3167 8292 1758 5190 6709 5565 4333 4368 9245 5631 9030 7501 3479 2046 6540 3140 2251 12268 930 3208 3306 1243 5417 11812 10663 7229 1318 11192 2673 1388 6275 9515 11022 5502 7789 624 2850 6924 2127 9110 7927 958 8610 5235 4905 1299 3646 11468 4171 7364 5639 6717 7427 699 10234 8982 9038 11504 10292 3911 5801 4146 5359 8189 9735 3529 7509 6576 8208 7755 2444 2755 3148 7935 3861 7525 2259 1569 4301 10431 12074 12415 9474 9658 10360 9983 4211 4179 4806 5957 3008 663 2521 10259 3831 4940 2960 9967 2151 5171 10073 8077 5119 8441 8093 5722 8058 2309 1212 9229 1158 3254 5664 1909 2783 10376 2219 3495 6420 1414 10623 7095 8352 839 2568 9055 4737 10599 8258 1594 11139 9699 6066 1553 3715 5680 4822 7237 4438 7557 2334 6235 4317 2858 1196 4718 9923 1473 7863 5135 9071 3660 4702 3422 11564 3175 9368 6312 4614 3116 6682 6090 4787 9401 1044 6106 8796 9338 7150 10407 8233 7968 12300 4513 7253 2834 7476 9292 4039 2195 4873 1180 1933 5283 8376 3553 6370 4004 11433 2927 4529 7204 7169 8691 3886 2135 2976 6018 10973 11220 10858 3279 1078 4474 7405 11655 10697 12459 4765 7835 1980 12151 7277 8675 1884 8926 2425 11033 10519 4568 3687 9683 12364 9190 9825 11836 2714 7699 9268 11123 5259 2698 1537 2235 713 808 7008 6640 5068 11852 6213 8840 12380 8473 11155 5383 11244 8707 7389 7614 1094 7309 11720 6992 11580 5615 9314 9786 4913 4122 4020 6050 10308 6179 5207 8489 5530 1742 6956 6122 12396 2472 12098 894 5776 3362 10713 6560 10647 11074 2497 8517 1515 10755 8544 8942 8133 10889 11758 2992 6460 12325 5486 6842 8734 12033 10464 1821 1110 12218 9152 12049 1142 12120 3045 3955 9136 4257 9540 11688 1996 6524 5006 1802 6331 5741 7457 4889 3397 2883 6858 8300 10145 8990 9174 2365 10503 11316 1438 8575 8618 9639 5757 7076 1454 2911 1766 7111 1639 7724 7598 1837 8634 8149 5343 6146 4195 3589 6767 11268 761 10206 11449 4285 10187 11820 11479 3378 3927 5921 5870 7908 6395 8902 5087 6733 4055 3317 3463 5243 12252 8966 9433 7293 11512 10914 7797 11400 8659 4630 792 3605 9417 6932 6882 1126 8336 5817 12478 12483"/></xmi:XMI>
\ No newline at end of file
diff --git a/jcore-flair-token-embedding-ae/pom.xml b/jcore-flair-token-embedding-ae/pom.xml
index 483998eda..251ffedb9 100644
--- a/jcore-flair-token-embedding-ae/pom.xml
+++ b/jcore-flair-token-embedding-ae/pom.xml
@@ -33,8 +33,8 @@
             <version>1.0.1</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>com.google.code.gson</groupId>
diff --git a/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/python/getEmbeddingScript.py b/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/python/getEmbeddingScript.py
index a262f84af..43095851a 100644
--- a/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/python/getEmbeddingScript.py
+++ b/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/python/getEmbeddingScript.py
@@ -1,15 +1,11 @@
-import os
-from flair.models import SequenceTagger
+import json
+import sys
+import time
 from flair.data import Sentence
-from typing import List
-
-from flair.embeddings import WordEmbeddings, CharacterEmbeddings, BytePairEmbeddings, FlairEmbeddings, BertEmbeddings, ELMoEmbeddings
 from flair.embeddings import StackedEmbeddings
-
-import sys
-import json
+from flair.embeddings import WordEmbeddings, CharacterEmbeddings, BytePairEmbeddings, FlairEmbeddings, BertEmbeddings, \
+    ELMoEmbeddings
 from struct import *
-import time
 
 
 def decodeString(buffer):
diff --git a/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/EmbeddingScriptTest.java b/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/EmbeddingScriptTest.java
index ee2ff04ae..d62ad9b4e 100644
--- a/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/EmbeddingScriptTest.java
+++ b/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/EmbeddingScriptTest.java
@@ -5,8 +5,8 @@
 import de.julielab.ipc.javabridge.ResultDecoders;
 import de.julielab.ipc.javabridge.StdioBridge;
 import org.assertj.core.data.Offset;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -20,11 +20,11 @@ public class EmbeddingScriptTest {
     private static final String SCRIPT_PATH = "src/main/resources/de/julielab/jcore/ae/fte/python/getEmbeddingScript.py";
     private static String pythonCommand;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() {
         pythonCommand = System.getenv("PYTHON");
         if (pythonCommand == null)
-            pythonCommand = "python3.6";
+            pythonCommand = "python";
     }
 
     @Test
@@ -49,11 +49,7 @@ public void testPythonEmbeddingScriptSimple() throws Exception {
         final double[][] vectors = response.map(ResultDecoders.decodeVectors).findAny().get();
         bridge.stop();
 
-        assertThat(vectors).hasSize(10);
-        for (double[] vector : vectors) {
-            // The vectors should all have a dimensionality of 1024
-            assertThat(vector.length).isEqualTo(1024);
-        }
+        assertThat(vectors).hasDimensions(10, 1024);
 
         // Those values were output using print(token.embedding.numpy(), file=sys.stderr) in the script
         assertThat(vectors[0][0]).isCloseTo(1.8812446e-01, Offset.offset(0.000001));
@@ -86,11 +82,7 @@ public void testPythonEmbeddingScriptSpecificVectorsResponse() throws Exception
         final double[][] vectors = response.map(ResultDecoders.decodeVectors).findAny().get();
         bridge.stop();
 
-        assertThat(vectors).hasSize(2);
-        for (int i = 0; i < vectors.length; i++) {
-            // The vectors should all have a dimensionality of 1024
-            assertThat(vectors[i].length).isEqualTo(1024);
-        }
+        assertThat(vectors).hasDimensions(2, 1024);
 
         // Those values were output using print(token.embedding.numpy(), file=sys.stderr) in the script
         assertThat(vectors[0][0]).isCloseTo(-0.16511102, Offset.offset(0.000001));
@@ -128,7 +120,7 @@ public void testPythonEmbeddingScriptMultipleSentences() throws Exception {
         final double[][] vectors = response.map(ResultDecoders.decodeVectors).findAny().get();
         bridge.stop();
 
-        assertThat(vectors).hasSize(12);
+        assertThat(vectors.length).isEqualTo(12);
 
     }
 }
diff --git a/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java b/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java
index 200bb491c..d67615d3e 100644
--- a/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java
+++ b/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java
@@ -8,7 +8,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Collection;
 
@@ -29,7 +29,9 @@ public void testEmbeddingAnnotator() throws Exception {
         addTokens(jCas);
 
         final String embeddingPath = "flair:src/test/resources/gene_small_best_lm.pt";
-        final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.fte.desc.jcore-flair-token-embedding-ae", FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath);
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.fte.desc.jcore-flair-token-embedding-ae",
+                FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath,
+                FlairTokenEmbeddingAnnotator.PARAM_PYTHON_EXECUTABLE, "python");
 
         engine.process(jCas);
 
@@ -58,7 +60,10 @@ public void testEmbeddingAnnotatorWithFilterAnnotation() throws Exception {
         new Gene(jCas, 75, 91).addToIndexes();
 
         final String embeddingPath = "flair:src/test/resources/gene_small_best_lm.pt";
-        final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.fte.desc.jcore-flair-token-embedding-ae", FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath, FlairTokenEmbeddingAnnotator.PARAM_COMPUTATION_FILTER, "de.julielab.jcore.types.Gene");
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.fte.desc.jcore-flair-token-embedding-ae",
+                FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath,
+                FlairTokenEmbeddingAnnotator.PARAM_COMPUTATION_FILTER, "de.julielab.jcore.types.Gene",
+                FlairTokenEmbeddingAnnotator.PARAM_PYTHON_EXECUTABLE, "python");
 
         engine.process(jCas);
 
diff --git a/jcore-flow-controllers/pom.xml b/jcore-flow-controllers/pom.xml
index d17ecac74..fe3e3ff4e 100644
--- a/jcore-flow-controllers/pom.xml
+++ b/jcore-flow-controllers/pom.xml
@@ -24,7 +24,6 @@
         <dependency>
             <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter-engine</artifactId>
-            <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>ch.qos.logback</groupId>
diff --git a/jcore-iexml-consumer/pom.xml b/jcore-iexml-consumer/pom.xml
index 8924c020c..5d7a199a2 100644
--- a/jcore-iexml-consumer/pom.xml
+++ b/jcore-iexml-consumer/pom.xml
@@ -77,8 +77,8 @@
             <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-iexml-reader/pom.xml b/jcore-iexml-reader/pom.xml
index 2ce284fda..3d1e90378 100644
--- a/jcore-iexml-reader/pom.xml
+++ b/jcore-iexml-reader/pom.xml
@@ -78,8 +78,8 @@
             <version>2.6.0-SNAPSHOT</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <description>Reader for IEXML files as used in the Mantra project/challenge</description>
diff --git a/jcore-ign-reader/pom.xml b/jcore-ign-reader/pom.xml
index 423a3fbce..8bd754eaa 100644
--- a/jcore-ign-reader/pom.xml
+++ b/jcore-ign-reader/pom.xml
@@ -35,8 +35,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-ign-reader/src/test/java/de/julielab/jcore/reader/ign/IGNReaderTest.java b/jcore-ign-reader/src/test/java/de/julielab/jcore/reader/ign/IGNReaderTest.java
index 11e48e537..e0fdec94c 100644
--- a/jcore-ign-reader/src/test/java/de/julielab/jcore/reader/ign/IGNReaderTest.java
+++ b/jcore-ign-reader/src/test/java/de/julielab/jcore/reader/ign/IGNReaderTest.java
@@ -19,11 +19,11 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Collection;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class IGNReaderTest {
 	private static final String READER_DESCRIPTOR = "de.julielab.jcore.reader.ign.desc.jcore-ign-reader";
diff --git a/jcore-iob-consumer/pom.xml b/jcore-iob-consumer/pom.xml
index e09d8591a..b1a21c3b7 100644
--- a/jcore-iob-consumer/pom.xml
+++ b/jcore-iob-consumer/pom.xml
@@ -34,8 +34,8 @@
             <version>1.0.7</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>commons-io</groupId>
diff --git a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/utils/UIMAUtils.java b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/utils/UIMAUtils.java
index 3e6affd02..fa06059f6 100644
--- a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/utils/UIMAUtils.java
+++ b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/utils/UIMAUtils.java
@@ -9,6 +9,7 @@
 package de.julielab.jcore.consumer.cas2iob.utils;
 
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 
diff --git a/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java b/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java
index fefa5975a..e1d926452 100644
--- a/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java
+++ b/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java
@@ -30,7 +30,7 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
diff --git a/jcore-jnet-ae/pom.xml b/jcore-jnet-ae/pom.xml
index ea8a89340..31f7e544b 100644
--- a/jcore-jnet-ae/pom.xml
+++ b/jcore-jnet-ae/pom.xml
@@ -117,8 +117,8 @@
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java
index cdfe60693..4cc449a62 100644
--- a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java
+++ b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java
@@ -7,11 +7,11 @@
 package de.julielab.jcore.ae.jnet.cli;
 
 import org.junit.After;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class JNETApplicationTest {
 	private static final String PREFIX = "src/test/resources/de/julielab/jcore/ae/jnet/cli/";
diff --git a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/tagger/NETaggerTest.java b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/tagger/NETaggerTest.java
index f21a11d09..e05e6a6c1 100644
--- a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/tagger/NETaggerTest.java
+++ b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/tagger/NETaggerTest.java
@@ -2,7 +2,7 @@
 
 import cc.mallet.types.Instance;
 import cc.mallet.types.InstanceList;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.InputStream;
 import java.util.ArrayList;
diff --git a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/MiniTestapp.java b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/MiniTestapp.java
index 1b1ed323f..006328391 100644
--- a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/MiniTestapp.java
+++ b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/MiniTestapp.java
@@ -35,8 +35,8 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
 import org.apache.uima.util.XMLSerializer;
-import org.junit.After;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
@@ -47,7 +47,7 @@
 import java.io.IOException;
 import java.nio.charset.Charset;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class MiniTestapp {
 
@@ -61,7 +61,7 @@ public class MiniTestapp {
 
     private static final String ANNOTATOR_DESC = PREFIX + "EntityAnnotatorTest.xml";
 
-    @After
+    @AfterEach
     public void clean() {
         if (new File(TEST_XMI_OUT).isFile()) {
             new File(TEST_XMI_OUT).delete();
diff --git a/jcore-jpos-ae/pom.xml b/jcore-jpos-ae/pom.xml
index 87cbc7fc5..4f195e62d 100644
--- a/jcore-jpos-ae/pom.xml
+++ b/jcore-jpos-ae/pom.xml
@@ -114,8 +114,8 @@
             <version>2.1.2</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-jpos-ae/src/test/java/de/julielab/jcore/ae/jpos/postagger/POSAnnotatorTest.java b/jcore-jpos-ae/src/test/java/de/julielab/jcore/ae/jpos/postagger/POSAnnotatorTest.java
index c7a03c06d..50c639d51 100644
--- a/jcore-jpos-ae/src/test/java/de/julielab/jcore/ae/jpos/postagger/POSAnnotatorTest.java
+++ b/jcore-jpos-ae/src/test/java/de/julielab/jcore/ae/jpos/postagger/POSAnnotatorTest.java
@@ -17,9 +17,9 @@
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class POSAnnotatorTest {
 
diff --git a/jcore-jsbd-ae/pom.xml b/jcore-jsbd-ae/pom.xml
index 964b14ef9..e21b02e2b 100644
--- a/jcore-jsbd-ae/pom.xml
+++ b/jcore-jsbd-ae/pom.xml
@@ -76,7 +76,6 @@
         <dependency>
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
-            <version>3.9.1</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
@@ -103,6 +102,10 @@
             <artifactId>mallet</artifactId>
             <version>2.0.8</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-utilities</artifactId>
@@ -112,6 +115,10 @@
             <groupId>de.julielab</groupId>
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter</artifactId>
+        </dependency>
     </dependencies>
     <organization>
         <name>JULIE Lab Jena, Germany</name>
diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/Abstract2UnitPipeTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/Abstract2UnitPipeTest.java
index 3d7f63cc7..91ffa9f45 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/Abstract2UnitPipeTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/Abstract2UnitPipeTest.java
@@ -15,8 +15,8 @@
 import cc.mallet.types.Token;
 import cc.mallet.types.TokenSequence;
 import org.assertj.core.data.Offset;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.List;
@@ -27,7 +27,7 @@ public class Abstract2UnitPipeTest {
 
     protected static Pipe pipe;
 
-    @Before
+    @BeforeEach
     public void init() {
         pipe = new Abstract2UnitPipe(false);
     }
diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/SentenceSplitterTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/SentenceSplitterTest.java
index 8715c714b..a3ce21a17 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/SentenceSplitterTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/SentenceSplitterTest.java
@@ -18,7 +18,7 @@
 import cc.mallet.pipe.Pipe;
 import cc.mallet.types.Instance;
 import cc.mallet.types.InstanceList;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -26,8 +26,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  * Test for the class {@link SentenceSplitter}
diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
index 5506d38b8..0f0870ae8 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
@@ -34,7 +34,7 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -44,7 +44,7 @@
 import java.util.stream.Collectors;
 
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 public class SentenceAnnotatorTest {
 
 	/**
@@ -224,7 +224,7 @@ public void testSentenceDelimiterTypes() throws Exception {
 		while (it.hasNext()) {
 			Annotation sentence = it.next();
 			Range<Integer> sentenceRange = Range.between(sentence.getBegin(), sentence.getEnd());
-			assertTrue("Range " + sentenceRange + " was not expected", expectedSpans.remove(sentenceRange));
+			assertTrue(expectedSpans.remove(sentenceRange), "Range " + sentenceRange + " was not expected");
 		}
 		assertTrue(expectedSpans.isEmpty());
 	}
diff --git a/jcore-jtbd-ae/pom.xml b/jcore-jtbd-ae/pom.xml
index 0c7e7d127..54671bfc1 100644
--- a/jcore-jtbd-ae/pom.xml
+++ b/jcore-jtbd-ae/pom.xml
@@ -91,8 +91,8 @@
             <version>2.0.8</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Token Annotator</name>
diff --git a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/TokenizerTest.java b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/TokenizerTest.java
index c953307c1..e99c1f2f2 100644
--- a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/TokenizerTest.java
+++ b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/TokenizerTest.java
@@ -24,7 +24,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -39,7 +39,7 @@
 import java.util.List;
 import java.util.stream.Collectors;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * Test for the class {@link Tokenizer}
diff --git a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java
index 4e3dfe9b3..37d8571f9 100644
--- a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java
+++ b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java
@@ -26,7 +26,7 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
diff --git a/jcore-julielab-entity-evaluator-consumer/pom.xml b/jcore-julielab-entity-evaluator-consumer/pom.xml
index 35ae8b960..e0e543814 100644
--- a/jcore-julielab-entity-evaluator-consumer/pom.xml
+++ b/jcore-julielab-entity-evaluator-consumer/pom.xml
@@ -45,8 +45,8 @@
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>org.apache.commons</groupId>
diff --git a/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java b/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
index 69010da56..ca29657b9 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
@@ -22,7 +22,7 @@
 import org.apache.uima.jcas.cas.DoubleArray;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.cas.StringArray;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.ByteArrayInputStream;
 import java.io.File;
@@ -34,8 +34,8 @@
 import java.util.zip.GZIPInputStream;
 
 import static de.julielab.jcore.consumer.entityevaluator.EntityEvaluatorConsumer.*;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class EntityEvaluatorConsumerTest {
 
diff --git a/jcore-likelihood-assignment-ae/pom.xml b/jcore-likelihood-assignment-ae/pom.xml
index d053fef46..0ab512b9b 100644
--- a/jcore-likelihood-assignment-ae/pom.xml
+++ b/jcore-likelihood-assignment-ae/pom.xml
@@ -33,8 +33,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Likelihood Assignment AE</name>
diff --git a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
index 5caf84f55..6fe9746f5 100644
--- a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
+++ b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
@@ -12,14 +12,14 @@
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.Iterator;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 
 /**
diff --git a/jcore-likelihood-detection-ae/pom.xml b/jcore-likelihood-detection-ae/pom.xml
index eb4aaa51e..1bee1538d 100644
--- a/jcore-likelihood-detection-ae/pom.xml
+++ b/jcore-likelihood-detection-ae/pom.xml
@@ -42,8 +42,8 @@
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Likelihood Detection AE</name>
diff --git a/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java b/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java
index 864b0c431..814ce9755 100644
--- a/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java
+++ b/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java
@@ -11,7 +11,7 @@
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -19,7 +19,7 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 
 /**
diff --git a/jcore-line-multiplier/pom.xml b/jcore-line-multiplier/pom.xml
index f81a228ca..2bd30a4d4 100644
--- a/jcore-line-multiplier/pom.xml
+++ b/jcore-line-multiplier/pom.xml
@@ -29,8 +29,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>org.assertj</groupId>
diff --git a/jcore-line-multiplier/src/test/java/de/julielab/jcore/multiplier/line/LineMultiplierTest.java b/jcore-line-multiplier/src/test/java/de/julielab/jcore/multiplier/line/LineMultiplierTest.java
index 23b7e9ea3..5ecd2c19a 100644
--- a/jcore-line-multiplier/src/test/java/de/julielab/jcore/multiplier/line/LineMultiplierTest.java
+++ b/jcore-line-multiplier/src/test/java/de/julielab/jcore/multiplier/line/LineMultiplierTest.java
@@ -5,13 +5,13 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.List;
 
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 /**
  * Unit tests for jcore-line-multiplier.
  */
diff --git a/jcore-line-multiplier/target/test-classes/de/julielab/jcore/multiplier/line/LineMultiplierTest.class b/jcore-line-multiplier/target/test-classes/de/julielab/jcore/multiplier/line/LineMultiplierTest.class
index e654ed056e0b31d6e11df028b557de2de00bdb72..f32ad510b6fe0df92dcaa8afe7ba2bbfd652919f 100644
GIT binary patch
delta 64
zcmX>seqVe;A(Oa_PkvFlepYE-W(kli$Sg@M(oZbN++4#nkwr)WLvHe1cFD~e?6X+_
D1M?S4

delta 47
vcmcaFepq}%ArrTNPkvFlepYE-X36Fbrim<q0&u>*V{vh6QORa2_N6QUm>>|&

diff --git a/jcore-lingpipe-porterstemmer-ae/pom.xml b/jcore-lingpipe-porterstemmer-ae/pom.xml
index 6df6ba486..615c960a0 100644
--- a/jcore-lingpipe-porterstemmer-ae/pom.xml
+++ b/jcore-lingpipe-porterstemmer-ae/pom.xml
@@ -22,8 +22,8 @@
             <version>4.1.2-JL1.0</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipe-porterstemmer-ae</url>
diff --git a/jcore-lingpipe-porterstemmer-ae/src/test/java/de/julielab/jcore/ae/lingpipe/porterstemmer/LingpipePorterstemmerAnnotatorTest.java b/jcore-lingpipe-porterstemmer-ae/src/test/java/de/julielab/jcore/ae/lingpipe/porterstemmer/LingpipePorterstemmerAnnotatorTest.java
index 58eb08a15..5bc2d85dd 100644
--- a/jcore-lingpipe-porterstemmer-ae/src/test/java/de/julielab/jcore/ae/lingpipe/porterstemmer/LingpipePorterstemmerAnnotatorTest.java
+++ b/jcore-lingpipe-porterstemmer-ae/src/test/java/de/julielab/jcore/ae/lingpipe/porterstemmer/LingpipePorterstemmerAnnotatorTest.java
@@ -16,10 +16,10 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class LingpipePorterstemmerAnnotatorTest {
 	@Test
diff --git a/jcore-lingpipegazetteer-ae/pom.xml b/jcore-lingpipegazetteer-ae/pom.xml
index 686f9ae80..3941b37bd 100644
--- a/jcore-lingpipegazetteer-ae/pom.xml
+++ b/jcore-lingpipegazetteer-ae/pom.xml
@@ -52,15 +52,14 @@
         <dependency>
             <groupId>org.apache.commons</groupId>
             <artifactId>commons-lang3</artifactId>
-            <version>3.4</version>
         </dependency>
         <dependency>
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
index a1bbadf8c..06cc79ca0 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
@@ -9,11 +9,11 @@
 import de.julielab.jcore.ae.lingpipegazetteer.utils.StringNormalizerForChunking;
 import de.julielab.jcore.ae.lingpipegazetteer.utils.StringNormalizerForChunking.NormalizedString;
 import org.apache.commons.lang3.Range;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.List;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class StringNormalizerForChunkingTest {
 	@Test
@@ -23,25 +23,25 @@ public void testTextNormalization() {
 
 		term = "\"Call\" - postponed";
 		ns = StringNormalizerForChunking.normalizeString(term);
-		assertEquals("Term normalization was not correct", "Call  postponed", ns.string);
+		assertEquals( "Call  postponed",  ns.string, "Term normalization was not correct");
 
 		term = "\"Light-for-dates\" with signs of fetal malnutrition, 1,000-1,249 grams";
 		ns = StringNormalizerForChunking.normalizeString(term);
-		assertEquals("Term normalization was not correct",
-				"Lightfordates with signs of fetal malnutrition 10001249 grams", ns.string);
+		assertEquals("Lightfordates with signs of fetal malnutrition 10001249 grams",
+				ns.string, "Term normalization was not correct");
 
 		term = "#Tarsal &/or metatarsal bones";
 		ns = StringNormalizerForChunking.normalizeString(term);
-		assertEquals("Term normalization was not correct", "Tarsal or metatarsal bones", ns.string);
+		assertEquals( "Tarsal or metatarsal bones",  ns.string, "Term normalization was not correct");
 
 		term = "% <poverty line Neighborhood PhenX";
 		ns = StringNormalizerForChunking.normalizeString(term);
-		assertEquals("Term normalization was not correct", " poverty line Neighborhood PhenX", ns.string);
+		assertEquals( " poverty line Neighborhood PhenX",  ns.string, "Term normalization was not correct");
 
 		term = "'DP-1:E2F1 complex [nucleoplasm]' positively regulates 'Transactivation of NOXA by E2F1'";
 		ns = StringNormalizerForChunking.normalizeString(term);
-		assertEquals("Term normalization was not correct",
-				"DP1E2F1 complex nucleoplasm positively regulates Transactivation of NOXA by E2F1", ns.string);
+		assertEquals("DP1E2F1 complex nucleoplasm positively regulates Transactivation of NOXA by E2F1",
+				ns.string, "Term normalization was not correct");
 	}
 
 	@Test
@@ -52,23 +52,23 @@ public void testNormalizedOffsets() {
 		text = "-aa :+bb";
 		// Outcome: "aabb";
 		ns = StringNormalizerForChunking.normalizeString(text);
-		assertEquals("The original offset is computed wrong", Integer.valueOf(0), ns.getOriginalOffset(0));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(2), ns.getOriginalOffset(1));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(3), ns.getOriginalOffset(2));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(6), ns.getOriginalOffset(3));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(7), ns.getOriginalOffset(4));
-		assertNull("There are more offset mappings than should be", ns.getOffsetMap().get(5));
+		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(0), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(2),  ns.getOriginalOffset(1), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(3),  ns.getOriginalOffset(2), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(6),  ns.getOriginalOffset(3), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(7),  ns.getOriginalOffset(4), "The original offset is computed wrong");
+		assertNull(ns.getOffsetMap().get(5), "There are more offset mappings than should be");
 
 		text = "((2-n-butyl-6,7-dichloro-2-cyclopentyl-2,3-dihydro-1-oxo-1H-inden-5-yl)oxy)acetic acid";
 		// Outcome:
 		// "2nbutyl67dichloro2cyclopentyl23dihydro1oxo1Hinden5yloxyacetic acid";
 		ns = StringNormalizerForChunking.normalizeString(text);
-		assertEquals("The original offset is computed wrong", Integer.valueOf(0), ns.getOriginalOffset(0));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(4), ns.getOriginalOffset(1));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(6), ns.getOriginalOffset(2));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(16), ns.getOriginalOffset(9));
-		assertEquals("The original offset is computed wrong", Integer.valueOf(82), ns.getOriginalOffset(62));
-		assertNull("There are more offset mappings than should be", ns.getOffsetMap().get(66));
+		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(0), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(4),  ns.getOriginalOffset(1), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(6),  ns.getOriginalOffset(2), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(16),  ns.getOriginalOffset(9), "The original offset is computed wrong");
+		assertEquals( Integer.valueOf(82),  ns.getOriginalOffset(62), "The original offset is computed wrong");
+		assertNull(ns.getOffsetMap().get(66), "There are more offset mappings than should be");
 	}
 
 	@Test
@@ -84,9 +84,9 @@ public void testNormalizedOffsetsTransliterate() {
 		Transliterator t = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
 		assertTrue(t.transform(text).length() == text.length() - 2);
 		ns = StringNormalizerForChunking.normalizeString(text, new IndoEuropeanTokenizerFactory(), t);
-		assertEquals("Transliteration wasn't done correctly",
-				"each node either a sensor or a beacon is noted as nodep, p ∈ 𝕊 ∪ 𝔹, and vector vp is used to represent the coordinate of nodep. beacons are placed onto the map with fixed coordinates vj, where j ∈ 𝔹. we assume that each beacon is aware of its own absolute location.",
-				ns.string);
+		assertEquals("each node either a sensor or a beacon is noted as nodep, p ∈ 𝕊 ∪ 𝔹, and vector vp is used to represent the coordinate of nodep. beacons are placed onto the map with fixed coordinates vj, where j ∈ 𝔹. we assume that each beacon is aware of its own absolute location.",
+				ns.string,
+				"Transliteration wasn't done correctly");
 		assertEquals(Integer.valueOf(83), ns.getOriginalOffset(82));
 		assertEquals(Integer.valueOf(188), ns.getOriginalOffset(186));
 	}
@@ -104,7 +104,7 @@ public void testNormalizedOffetsTransliterator2() {
 		// in the original string, the substring from 0 to 10 would be "b⃗1,2,
 		// res", since the first character is a two-byte character. In the
 		// (transliterated) normalized string, it is only one byte. This is why we expect to get one character more.
-		assertEquals("b⃗1,2, resp", text.substring(0, ns.getOriginalOffset(10)));
+		assertEquals( text.substring(0,  ns.getOriginalOffset(10)), "b⃗1,2, resp");
 	}
 
 	@Test
@@ -115,31 +115,31 @@ public void testNormalizeWithTokenizer() {
 		PorterStemmerTokenizerFactory tokenizerFactory = new PorterStemmerTokenizerFactory(
 				IndoEuropeanTokenizerFactory.INSTANCE);
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
-		assertEquals("Normalization was wrong: ",
-				"We saw Parkinson Diseas and S(H)P 1 in a sadli-form circumvent of applic.", ns.string);
-		assertEquals("Offset wrong: ", Integer.valueOf(0), ns.getOriginalOffset(Integer.valueOf(0)));
-		assertEquals("Offset wrong: ", Integer.valueOf(16), ns.getOriginalOffset(Integer.valueOf(16)));
-		assertEquals("Offset wrong: ", Integer.valueOf(19), ns.getOriginalOffset(Integer.valueOf(17)));
-		assertEquals("Offset wrong: ", Integer.valueOf(26), ns.getOriginalOffset(Integer.valueOf(23)));
-		assertEquals("Offset wrong: ", Integer.valueOf(49), ns.getOriginalOffset(Integer.valueOf(46)));
-		assertEquals("Offset wrong: ", Integer.valueOf(50), ns.getOriginalOffset(Integer.valueOf(47)));
-		assertEquals("Offset wrong: ", Integer.valueOf(56), ns.getOriginalOffset(Integer.valueOf(51)));
+		assertEquals("We saw Parkinson Diseas and S(H)P 1 in a sadli-form circumvent of applic.",
+				ns.string, "Normalization was wrong: ");
+		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(Integer.valueOf(0)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(16),  ns.getOriginalOffset(Integer.valueOf(16)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(19),  ns.getOriginalOffset(Integer.valueOf(17)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(26),  ns.getOriginalOffset(Integer.valueOf(23)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(49),  ns.getOriginalOffset(Integer.valueOf(46)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(50),  ns.getOriginalOffset(Integer.valueOf(47)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(56),  ns.getOriginalOffset(Integer.valueOf(51)), "Offset wrong: ");
 		str = "We go to James' to have some coffee'ses.";
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
-		assertEquals("Normalization was wrong: ", "We go to Jame' to have some coffe'se.", ns.string);
-		assertEquals("Offset wrong: ", Integer.valueOf(0), ns.getOriginalOffset(Integer.valueOf(0)));
-		assertEquals("Offset wrong: ", Integer.valueOf(9), ns.getOriginalOffset(Integer.valueOf(9)));
-		assertEquals("Offset wrong: ", Integer.valueOf(14), ns.getOriginalOffset(Integer.valueOf(13)));
-		assertEquals("Offset wrong: ", Integer.valueOf(35), ns.getOriginalOffset(Integer.valueOf(33)));
+		assertEquals( "We go to Jame' to have some coffe'se.",  ns.string, "Normalization was wrong: ");
+		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(Integer.valueOf(0)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(9),  ns.getOriginalOffset(Integer.valueOf(9)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(14),  ns.getOriginalOffset(Integer.valueOf(13)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(35),  ns.getOriginalOffset(Integer.valueOf(33)), "Offset wrong: ");
 		str = "We have some 'serious things' to talk about.";
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
-		assertEquals("Normalization was wrong: ", "We have some 'seriou thing' to talk about.", ns.string);
-		assertEquals("Offset wrong: ", Integer.valueOf(0), ns.getOriginalOffset(Integer.valueOf(0)));
-		assertEquals("Offset wrong: ", Integer.valueOf(12), ns.getOriginalOffset(Integer.valueOf(12)));
-		assertEquals("Offset wrong: ", Integer.valueOf(13), ns.getOriginalOffset(Integer.valueOf(13)));
-		assertEquals("Offset wrong: ", Integer.valueOf(28), ns.getOriginalOffset(Integer.valueOf(26)));
-		assertEquals("Offset wrong: ", Integer.valueOf(29), ns.getOriginalOffset(Integer.valueOf(27)));
-		assertEquals("Offset wrong: ", Integer.valueOf(30), ns.getOriginalOffset(Integer.valueOf(28)));
+		assertEquals( "We have some 'seriou thing' to talk about.",  ns.string, "Normalization was wrong: ");
+		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(Integer.valueOf(0)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(12),  ns.getOriginalOffset(Integer.valueOf(12)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(13),  ns.getOriginalOffset(Integer.valueOf(13)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(28),  ns.getOriginalOffset(Integer.valueOf(26)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(29),  ns.getOriginalOffset(Integer.valueOf(27)), "Offset wrong: ");
+		assertEquals( Integer.valueOf(30),  ns.getOriginalOffset(Integer.valueOf(28)), "Offset wrong: ");
 
 		str = "test dosing unit KLRg1 killer cell lectin like receptor G2 Parkinson's Disease";
 		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/desc/ConfigurableDescriptorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/desc/ConfigurableDescriptorTest.java
index ad55afffb..cf308750a 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/desc/ConfigurableDescriptorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/desc/ConfigurableDescriptorTest.java
@@ -8,11 +8,11 @@
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
 import org.apache.uima.util.InvalidXMLException;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class ConfigurableDescriptorTest {
     @Test
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index f7b1a6e8f..7e3197bd6 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -21,7 +21,6 @@
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.ConfigurableChunkerProviderImplAlt;
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.OverlappingChunk;
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -40,7 +39,7 @@
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
@@ -51,7 +50,9 @@
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.assertj.core.api.Assertions.assertThat;
-public class GazetteerAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.*;
+
+public class GazetteerAnnotatorTest {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(GazetteerAnnotatorTest.class);
 
@@ -122,6 +123,7 @@ public void setAbbreviations(JCas myCAS) {
 	 * tests whether the expected number of entities is found for both exact and
 	 * approximate matching
 	 */
+	@Test
 	public void testProcess() throws AnalysisEngineProcessException, CASException, ResourceConfigurationException,
 			InvalidXMLException, ResourceInitializationException, IOException, SAXException {
 		AnalysisEngine gazetteerAnnotator = null;
@@ -294,19 +296,19 @@ public void testAnnotatorWithTextNormalization()
 
 		FSIterator<org.apache.uima.jcas.tcas.Annotation> it = jCas.getAnnotationIndex(EntityMention.type).iterator();
 
-		assertTrue("There are no entity annotations in the CAS.", it.hasNext());
+		assertTrue(it.hasNext(), "There are no entity annotations in the CAS.");
 		EntityMention em = (EntityMention) it.next();
-		assertEquals("Start wrong: ", new Integer(0), new Integer(em.getBegin()));
-		assertEquals("End wrong: ", new Integer(5), new Integer(em.getEnd()));
-		assertEquals("Wrong type: ", "SHP-1", em.getSpecificType());
+		assertEquals( new Integer(0),  new Integer(em.getBegin()), "Start wrong: ");
+		assertEquals( new Integer(5),  new Integer(em.getEnd()), "End wrong: ");
+		assertEquals( "SHP-1",  em.getSpecificType(), "Wrong type: ");
 
-		assertTrue("The secnond entity annotations is missing.", it.hasNext());
+		assertTrue(it.hasNext(), "The secnond entity annotations is missing.");
 		em = (EntityMention) it.next();
-		assertEquals("Start wrong: ", new Integer(10), new Integer(em.getBegin()));
-		assertEquals("End wrong: ", new Integer(45), new Integer(em.getEnd()));
-		assertEquals("Wrong type: ", "KLRG2", em.getSpecificType());
+		assertEquals( new Integer(10),  new Integer(em.getBegin()), "Start wrong: ");
+		assertEquals( new Integer(45),  new Integer(em.getEnd()), "End wrong: ");
+		assertEquals( "KLRG2",  em.getSpecificType(), "Wrong type: ");
 
-		assertFalse("There are too many annotations.", it.hasNext());
+		assertFalse(it.hasNext(), "There are too many annotations.");
 
 		jCas.reset();
 		jCas.setDocumentText(
@@ -314,13 +316,13 @@ public void testAnnotatorWithTextNormalization()
 		annotator.process(jCas);
 		it = jCas.getAnnotationIndex(EntityMention.type).iterator();
 
-		assertTrue("There are no entity annotations in the CAS.", it.hasNext());
+		assertTrue(it.hasNext(), "There are no entity annotations in the CAS.");
 		em = (EntityMention) it.next();
-		assertEquals("Start wrong: ", new Integer(17), new Integer(em.getBegin()));
-		assertEquals("End wrong: ", new Integer(103), new Integer(em.getEnd()));
-		assertEquals("Wrong type: ", "CHEM", em.getSpecificType());
+		assertEquals( new Integer(17),  new Integer(em.getBegin()), "Start wrong: ");
+		assertEquals( new Integer(103),  new Integer(em.getEnd()), "End wrong: ");
+		assertEquals( "CHEM",  em.getSpecificType(), "Wrong type: ");
 
-		assertFalse("There are too many annotations.", it.hasNext());
+		assertFalse(it.hasNext(), "There are too many annotations.");
 
 		jCas.reset();
 		jCas.setDocumentText(
@@ -328,13 +330,13 @@ public void testAnnotatorWithTextNormalization()
 		annotator.process(jCas);
 		it = jCas.getAnnotationIndex(EntityMention.type).iterator();
 
-		assertTrue("There are no entity annotations in the CAS.", it.hasNext());
+		assertTrue(it.hasNext(), "There are no entity annotations in the CAS.");
 		em = (EntityMention) it.next();
-		assertEquals("Start wrong: ", new Integer(17), new Integer(em.getBegin()));
-		assertEquals("End wrong: ", new Integer(103), new Integer(em.getEnd()));
-		assertEquals("Wrong type: ", "CHEM", em.getSpecificType());
+		assertEquals( new Integer(17),  new Integer(em.getBegin()), "Start wrong: ");
+		assertEquals( new Integer(103),  new Integer(em.getEnd()), "End wrong: ");
+		assertEquals( "CHEM",  em.getSpecificType(), "Wrong type: ");
 
-		assertFalse("There are too many annotations.", it.hasNext());
+		assertFalse(it.hasNext(), "There are too many annotations.");
 
 		jCas.reset();
 		jCas.setDocumentText(
@@ -342,7 +344,7 @@ public void testAnnotatorWithTextNormalization()
 		annotator.process(jCas);
 		it = jCas.getAnnotationIndex(EntityMention.type).iterator();
 
-		assertFalse("There is an annotation in CAS although there shouldnt be.", it.hasNext());
+		assertFalse(it.hasNext(), "There is an annotation in CAS although there shouldnt be.");
 
 		jCas.reset();
 		jCas.setDocumentText("Test-dosing unit KLRg1 killer cell lectin like receptor G2 Parkinson's Disease");
@@ -354,7 +356,7 @@ public void testAnnotatorWithTextNormalization()
 			System.out.println(it.next().getCoveredText());
 			counter++;
 		}
-		assertEquals("Wrong entity count: ", new Integer(4), counter);
+		assertEquals( new Integer(4),  counter, "Wrong entity count: ");
 
 	}
 
@@ -378,10 +380,10 @@ public void testAnnotatorWithPluralNormalization()
 		annotator.process(jCas);
 
 		Collection<EntityMention> entityMentions = JCasUtil.select(jCas, EntityMention.class);
-		assertEquals("Expected a single entity", 2, entityMentions.size());
+		assertEquals( 2,  entityMentions.size(), "Expected a single entity");
 		Iterator<EntityMention> iterator = entityMentions.iterator();
-		assertEquals("Unexpected covered entity text", "lipoprotein", iterator.next().getCoveredText());
-		assertEquals("Unexpected covered entity text", "lipoproteins", iterator.next().getCoveredText());
+		assertEquals( "lipoprotein",  iterator.next().getCoveredText(), "Unexpected covered entity text");
+		assertEquals( "lipoproteins",  iterator.next().getCoveredText(), "Unexpected covered entity text");
 	}
 
 	@Test
@@ -416,7 +418,7 @@ public void testAnnotateAcronymsWithFullFormEntity() throws Exception {
 			it.next();
 			counter++;
 		}
-		assertEquals("Wrong entity count: ", new Integer(1), counter);
+		assertEquals( new Integer(1),  counter, "Wrong entity count: ");
 
 		jCas.reset();
 		jCas.setDocumentText(
@@ -454,7 +456,7 @@ public void testAnnotateAcronymsWithFullFormEntity() throws Exception {
 			}
 			assertEquals("GENE", next.getSpecificType());
 		}
-		assertEquals("Wrong entity count: ", new Integer(1), counter);
+		assertEquals( new Integer(1),  counter, "Wrong entity count: ");
 	}
 
 	@Test
@@ -599,9 +601,9 @@ public void testGroupOvecrlappingChunks() {
 			assertEquals(1, bestChunkList.size());
 			Chunk bestChunk = bestChunkList.get(0);
 			assertFalse(
+					bestChunks.contains(bestChunk),
 					"Duplicate best chunk: " + bestChunk + " (\""
-							+ chunkedText.subSequence(bestChunk.start(), bestChunk.end()) + "\")",
-					bestChunks.contains(bestChunk));
+							+ chunkedText.subSequence(bestChunk.start(), bestChunk.end()) + "\")");
 			bestChunks.add(bestChunk);
 		}
 	}
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/OverlappingChunkTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/OverlappingChunkTest.java
index 078f62ecb..c700ff26f 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/OverlappingChunkTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/OverlappingChunkTest.java
@@ -3,12 +3,12 @@
 import com.aliasi.chunk.Chunk;
 import com.aliasi.chunk.ChunkFactory;
 import de.julielab.jcore.ae.lingpipegazetteer.chunking.OverlappingChunk;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class OverlappingChunkTest {
 	@Test
diff --git a/jcore-lingscope-ae/pom.xml b/jcore-lingscope-ae/pom.xml
index 4c5a15b41..60cdb8dd6 100644
--- a/jcore-lingscope-ae/pom.xml
+++ b/jcore-lingscope-ae/pom.xml
@@ -47,8 +47,8 @@
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-lingscope-ae/src/main/java/de/julielab/jcore/ae/lingscope/LingscopePosAnnotator.java b/jcore-lingscope-ae/src/main/java/de/julielab/jcore/ae/lingscope/LingscopePosAnnotator.java
index ff34b56ad..aaaae3656 100644
--- a/jcore-lingscope-ae/src/main/java/de/julielab/jcore/ae/lingscope/LingscopePosAnnotator.java
+++ b/jcore-lingscope-ae/src/main/java/de/julielab/jcore/ae/lingscope/LingscopePosAnnotator.java
@@ -22,8 +22,8 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.util.*;
 import java.util.List;
+import java.util.*;
 import java.util.function.Supplier;
 import java.util.stream.Collectors;
 
diff --git a/jcore-lingscope-ae/src/test/java/de/julielab/LingscopePosAnnotatorTest.java b/jcore-lingscope-ae/src/test/java/de/julielab/LingscopePosAnnotatorTest.java
index 1e5d75496..7089675df 100644
--- a/jcore-lingscope-ae/src/test/java/de/julielab/LingscopePosAnnotatorTest.java
+++ b/jcore-lingscope-ae/src/test/java/de/julielab/LingscopePosAnnotatorTest.java
@@ -8,7 +8,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.stream.Collectors;
 
diff --git a/jcore-linnaeus-species-ae/pom.xml b/jcore-linnaeus-species-ae/pom.xml
index 68c29ba14..354a3c751 100644
--- a/jcore-linnaeus-species-ae/pom.xml
+++ b/jcore-linnaeus-species-ae/pom.xml
@@ -41,8 +41,8 @@
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusMatcherProvider.java b/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusMatcherProvider.java
index bdccc500e..0bf56eb18 100644
--- a/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusMatcherProvider.java
+++ b/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusMatcherProvider.java
@@ -1,7 +1,5 @@
 package de.julielab.jcore.ae.linnaeus;
 
-import org.apache.uima.resource.DataResource;
-import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.SharedResourceObject;
 import uk.ac.man.entitytagger.matching.Matcher;
 
diff --git a/jcore-linnaeus-species-ae/src/test/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotatorTest.java b/jcore-linnaeus-species-ae/src/test/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotatorTest.java
index 58a46dec9..16bcd3e2c 100644
--- a/jcore-linnaeus-species-ae/src/test/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotatorTest.java
+++ b/jcore-linnaeus-species-ae/src/test/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotatorTest.java
@@ -20,11 +20,10 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ExternalResourceDescription;
-import org.apache.uima.resource.metadata.ExternalResourceBinding;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class LinnaeusSpeciesAnnotatorTest {
 	@Test
diff --git a/jcore-medxn-ae/pom.xml b/jcore-medxn-ae/pom.xml
index aac277c21..0eaff3697 100644
--- a/jcore-medxn-ae/pom.xml
+++ b/jcore-medxn-ae/pom.xml
@@ -25,8 +25,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-medxn-ae/src/test/java/de/julielab/jcore/ae/medxn/MedAttrAnnotatorTest.java b/jcore-medxn-ae/src/test/java/de/julielab/jcore/ae/medxn/MedAttrAnnotatorTest.java
index 110de0875..4f4e08302 100644
--- a/jcore-medxn-ae/src/test/java/de/julielab/jcore/ae/medxn/MedAttrAnnotatorTest.java
+++ b/jcore-medxn-ae/src/test/java/de/julielab/jcore/ae/medxn/MedAttrAnnotatorTest.java
@@ -21,16 +21,17 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Ignore;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 public class MedAttrAnnotatorTest {
 
 	private static final String AE_DESCRIPTOR = "de.julielab.jcore.ae.medxn.desc.jcore-medxn-ae-attributes-german";
@@ -66,11 +67,11 @@ private void check(String[] goldlines, JCas tcas) {
 		
 		
 		Boolean lengthEqual = (goldlines.length == menCount);
-		Assert.assertTrue("Expression count differs; should be '" + 
-				Integer.toString(goldlines.length) + "' but is '" + menCount.toString() +"'.",
-				lengthEqual);
+		assertTrue(lengthEqual,
+				"Expression count differs; should be '" +
+				goldlines.length + "' but is '" + menCount.toString() +"'.");
 		Boolean arrayEqual = (goldlines.equals(actLines.toArray(new String[actLines.size()])));
-		Assert.assertTrue("Expressions differ", arrayEqual);
+		assertTrue(arrayEqual, "Expressions differ");
 	}
 	
 	private void reset() {
@@ -78,7 +79,7 @@ private void reset() {
 	}
 	
 	
-	@Before
+	@BeforeEach
 	public void initializeComponents() throws IOException, UIMAException {
 		if (setUpIsDone) {
 	        return;
@@ -90,7 +91,7 @@ public void initializeComponents() throws IOException, UIMAException {
 		setUpIsDone = true;
 	}
 	
-	@Ignore
+	@Disabled
 	@Test
 	public void testDuration() {
 			String text;
@@ -113,7 +114,7 @@ public void testDuration() {
 			}
 	}
 	
-	@Ignore
+	@Disabled
 	@Test
 	public void testDose() {
 			String text;
@@ -136,7 +137,7 @@ public void testDose() {
 			}
 	}
 	
-	@Ignore
+	@Disabled
 	@Test
 	public void testFrequency() {
 			String text;
@@ -159,7 +160,7 @@ public void testFrequency() {
 			}
 	}
 	
-	@Ignore
+	@Disabled
 	@Test
 	public void testModus() {
 			String text;
diff --git a/jcore-msdoc-reader/pom.xml b/jcore-msdoc-reader/pom.xml
index 74d9d3daa..c162caa94 100644
--- a/jcore-msdoc-reader/pom.xml
+++ b/jcore-msdoc-reader/pom.xml
@@ -46,8 +46,8 @@
             <version>3.16</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-msdoc-reader/src/test/java/de/julielab/jcore/reader/msdoc/main/MSdocReaderTest.java b/jcore-msdoc-reader/src/test/java/de/julielab/jcore/reader/msdoc/main/MSdocReaderTest.java
index d3945a6db..68942199c 100644
--- a/jcore-msdoc-reader/src/test/java/de/julielab/jcore/reader/msdoc/main/MSdocReaderTest.java
+++ b/jcore-msdoc-reader/src/test/java/de/julielab/jcore/reader/msdoc/main/MSdocReaderTest.java
@@ -27,17 +27,17 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class MSdocReaderTest {
 	/**
@@ -69,7 +69,7 @@ public class MSdocReaderTest {
 	private static final String DOC_DUMMY_NAME = "dummy.doc";
 	private static final String DOC_DUMMY_FILE = "src/test/resources/" + DOC_DUMMY_NAME;
 
-	@BeforeClass
+	@BeforeAll
 	public static void setUp() throws Exception {
 		/**
 		 * Create dummies of *.doc-files.
@@ -161,7 +161,7 @@ private static void writeArtifact(String file_name) throws IOException {
 		}
 	}
 
-	@AfterClass
+	@AfterAll
 	public static void tearDown() throws Exception {
 		/**
 		 * Delete dummies from setUp.
diff --git a/jcore-mstparser-ae/pom.xml b/jcore-mstparser-ae/pom.xml
index 83f9017af..08d948e99 100644
--- a/jcore-mstparser-ae/pom.xml
+++ b/jcore-mstparser-ae/pom.xml
@@ -80,8 +80,8 @@
             <scope>provided</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-mstparser-ae/src/test/java/de/julielab/jcore/ae/mstparser/main/MSTParserTest.java b/jcore-mstparser-ae/src/test/java/de/julielab/jcore/ae/mstparser/main/MSTParserTest.java
index 297b93cb5..46a6fe3a9 100644
--- a/jcore-mstparser-ae/src/test/java/de/julielab/jcore/ae/mstparser/main/MSTParserTest.java
+++ b/jcore-mstparser-ae/src/test/java/de/julielab/jcore/ae/mstparser/main/MSTParserTest.java
@@ -19,7 +19,6 @@
 import de.julielab.jcore.types.DependencyRelation;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -36,7 +35,8 @@
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Ignore;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
@@ -46,12 +46,15 @@
 import java.io.FileOutputStream;
 import java.io.IOException;
 
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
 /**
  * This is the JUnit test for the MST Parser Annotator.
  *
  * @author Lichtenwald
  */
-public class MSTParserTest extends TestCase {
+public class MSTParserTest  {
     private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
 
     public static final String PARAM_MAX_NUM_TOKENS = "MaxNumTokens";
@@ -68,7 +71,7 @@ public class MSTParserTest extends TestCase {
 
     /*--------------------------------------------------------------------------------------------*/
 
-    @Ignore
+    @Disabled
 //    public void testCAS() throws Exception {
 //        // String[] heads = new String[] { "have", "Migrants", "drown", "coast", "off", "40", "40", "migrants", "have",
 //        // "have", "drowned", "Sea", "Sea", "in", "drowned", "coast", "coast", "off", "coast", "of", "drowned",
@@ -174,6 +177,7 @@ public class MSTParserTest extends TestCase {
     // jcas.reset();
     // } // of initCas
 
+    @Test
     public void testThreads() throws Exception {
         try {
             int count = 3;
@@ -188,7 +192,7 @@ public void testThreads() throws Exception {
             x.run();
             Thread.sleep(5000);
         } catch (RuntimeException e) {
-            fail("Errorin Threads");
+            fail("Error in Threads");
         }
     }
 
@@ -230,6 +234,7 @@ public void testThreads() throws Exception {
      * @throws AnalysisEngineProcessException
      * @throws SAXException
      */
+    @Test
     public void testProcess() throws IOException, InvalidXMLException, ResourceInitializationException, CASException,
             AnalysisEngineProcessException, SAXException {
         XMLInputSource descriptor = new XMLInputSource(DESCRIPTOR_MST_PARSER);
@@ -245,9 +250,10 @@ public void testProcess() throws IOException, InvalidXMLException, ResourceIniti
         FileOutputStream fos = new FileOutputStream(OUTPUT_DIR + File.separator + "test.xmi");
         XmiCasSerializer.serialize(jcas.getCas(), fos);
 
-        assertTrue("Invalid JCas!", checkAnnotations(jcas, null));
+        assertTrue(checkAnnotations(jcas, null), "Invalid JCas!");
     } // of testProcess
 
+    @Test
     public void testProcessWithNumTokensRestriction()
             throws IOException, InvalidXMLException, ResourceInitializationException, CASException,
             AnalysisEngineProcessException, SAXException, ResourceConfigurationException {
@@ -263,7 +269,7 @@ public void testProcessWithNumTokensRestriction()
         ae.process(jcas);
         FileOutputStream fos = new FileOutputStream(OUTPUT_DIR + File.separator + "test.xmi");
         XmiCasSerializer.serialize(jcas.getCas(), fos);
-        assertTrue("Invalid JCas!", checkAnnotations(jcas, MAX_NUM_TOKENS));
+        assertTrue(checkAnnotations(jcas, MAX_NUM_TOKENS), "Invalid JCas!");
     }
 
     /**
diff --git a/jcore-muc7-reader/pom.xml b/jcore-muc7-reader/pom.xml
index a1461b459..b06e1cbbf 100644
--- a/jcore-muc7-reader/pom.xml
+++ b/jcore-muc7-reader/pom.xml
@@ -22,8 +22,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-muc7-reader/scripts/muc7_SGML2XML.py b/jcore-muc7-reader/scripts/muc7_SGML2XML.py
index b015b9342..9dbed485a 100644
--- a/jcore-muc7-reader/scripts/muc7_SGML2XML.py
+++ b/jcore-muc7-reader/scripts/muc7_SGML2XML.py
@@ -5,9 +5,7 @@
 # - `<p>`: needs to be closed with `</p>`
 
 import re
-import os
 import sys
-import glob
 
 
 def close_paragraphs(line):
diff --git a/jcore-muc7-reader/src/test/java/de/julielab/jcore/reader/muc7/MUC7ReaderTest.java b/jcore-muc7-reader/src/test/java/de/julielab/jcore/reader/muc7/MUC7ReaderTest.java
index 77f12db5e..b2e97da26 100644
--- a/jcore-muc7-reader/src/test/java/de/julielab/jcore/reader/muc7/MUC7ReaderTest.java
+++ b/jcore-muc7-reader/src/test/java/de/julielab/jcore/reader/muc7/MUC7ReaderTest.java
@@ -11,7 +11,6 @@
 import de.julielab.jcore.types.muc7.ENAMEX;
 import de.julielab.jcore.types.muc7.NUMEX;
 import de.julielab.jcore.types.muc7.TIMEX;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
 import org.apache.uima.cas.CAS;
@@ -24,6 +23,8 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.xml.sax.SAXException;
 
 import javax.xml.parsers.ParserConfigurationException;
@@ -31,7 +32,9 @@
 import java.util.ArrayList;
 import java.util.Iterator;
 
-public class MUC7ReaderTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class MUC7ReaderTest  {
 	/**
 	 * Path to the MedlineReader descriptor
 	 */
@@ -40,12 +43,11 @@ public class MUC7ReaderTest extends TestCase {
 	/**
 	 * Object to be tested
 	 */
-	private CollectionReader muc7Reader;
+	private static CollectionReader muc7Reader;
+	private static CAS cas;
+
 
-	
-	private CAS cas;
 
-	
 	/**
 	 * Test data
 	 */
@@ -87,12 +89,11 @@ public class MUC7ReaderTest extends TestCase {
 	/**
 	 *    * CAS array with CAS objects that where processed by the muc7Reader
 	 */
-	private ArrayList<JCas> cases = new ArrayList<JCas>();
+	private static ArrayList<JCas> cases = new ArrayList<JCas>();
 	
 	
-	@Override
-	protected void setUp() throws Exception {
-		super.setUp();
+	@BeforeAll
+	protected static void setUp() throws Exception {
 		muc7Reader = produceCollectionReader(MUC7_READER_DESCRIPTOR);
 		processAllCases();
 	}
@@ -105,7 +106,7 @@ protected void setUp() throws Exception {
 	 * @throws SAXException 
 	 * @throws ParserConfigurationException 
 	 */
-	private void processAllCases() throws CASException, SAXException, ParserConfigurationException {
+	private static void processAllCases() throws CASException, SAXException, ParserConfigurationException {
 		try {
 		      while (muc7Reader.hasNext()) {
 		    	 cas = CasCreationUtils.createCas((AnalysisEngineMetaData) muc7Reader.getMetaData());
@@ -123,20 +124,21 @@ private void processAllCases() throws CASException, SAXException, ParserConfigur
 	} 
 	/**
 	 * Test if method getNextCas() has done its job
-	 */	 
+	 */
+	@Test
 	public void testGetNextCas() {
  		
 		//check for a TIMEX entity
  		String[] timexData = getTimexData(DOC_ID);
- 		assertTrue("TIMEX", checkTimex(timexData));
+ 		assertTrue(checkTimex(timexData), "TIMEX");
 		
 		//check for a ENAMEX entity
  		String[] enamexData = getEnamexData(DOC_ID);
- 		assertTrue("ENAMEX", checkEnamex(enamexData));
+ 		assertTrue(checkEnamex(enamexData), "ENAMEX");
 		
 		//check for a NUMEX entity
  		String[] numexData = getNumexData(DOC_ID);
- 		assertTrue("NUMEX", checkNumex(numexData));
+ 		assertTrue(checkNumex(numexData), "NUMEX");
  		
  		//TODO coreference doesn't works as of now
 		//check for a coref chain
@@ -337,7 +339,7 @@ private void buildCorefChain(int corefID, ArrayList<String> corefChain, JCas jca
 	/**
 	   * Gets an Iterator over the the CAS for the specific type
 	   * 
-	   * @param cas (the CAS)
+	   * @param jcas (the CAS)
 	   * @param type (the type)
 	   * @return the iterator
 	   */
@@ -371,7 +373,7 @@ private String[] toStringArray(ArrayList<String> stringArray) {
 	 * @throws InvalidXMLException 
 	 * @throws ResourceInitializationException 
 	  */
-	 private CollectionReader produceCollectionReader(String descriptor) throws InvalidXMLException, IOException, ResourceInitializationException {
+	 private static CollectionReader produceCollectionReader(String descriptor) throws InvalidXMLException, IOException, ResourceInitializationException {
 		 CollectionReader collectionReader;
 		 ResourceSpecifier spec;
 		 spec = UIMAFramework.getXMLParser().parseResourceSpecifier(new XMLInputSource(descriptor));
diff --git a/jcore-mutationfinder-ae/pom.xml b/jcore-mutationfinder-ae/pom.xml
index 62b3a5d5b..b6d707627 100644
--- a/jcore-mutationfinder-ae/pom.xml
+++ b/jcore-mutationfinder-ae/pom.xml
@@ -23,8 +23,8 @@
             <version>2.0.8</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
@@ -35,7 +35,16 @@
             <groupId>de.julielab</groupId>
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
     </dependencies>
+
     <licenses>
         <license>
             <name>BSD-2-Clause</name>
diff --git a/jcore-mutationfinder-ae/src/test/java/de/julielab/jcore/ae/mutationfinder/MutationAnnotatorTest.java b/jcore-mutationfinder-ae/src/test/java/de/julielab/jcore/ae/mutationfinder/MutationAnnotatorTest.java
index 5291c51fa..c877fdc14 100644
--- a/jcore-mutationfinder-ae/src/test/java/de/julielab/jcore/ae/mutationfinder/MutationAnnotatorTest.java
+++ b/jcore-mutationfinder-ae/src/test/java/de/julielab/jcore/ae/mutationfinder/MutationAnnotatorTest.java
@@ -6,12 +6,11 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.hamcrest.CoreMatchers;
-import org.junit.Assert;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Collection;
 
+import static org.assertj.core.api.Assertions.assertThat;
 public class MutationAnnotatorTest {
 
     @Test
@@ -21,8 +20,8 @@ public void testAnnotator() throws Exception {
         jCas.setDocumentText("A covalently bound catalytic intermediate in Escherichia coli asparaginase: crystal structure of a Thr-89-Val mutant.");
         annotator.process(jCas);
         final Collection<PointMutation> mutations = JCasUtil.select(jCas, PointMutation.class);
-        Assert.assertThat(mutations.size(), CoreMatchers.is(1));
-        Assert.assertThat(mutations.stream().findAny().get().getCoveredText(), CoreMatchers.equalTo("Thr-89-Val"));
-        Assert.assertThat(mutations.stream().findAny().get().getSpecificType(), CoreMatchers.equalTo("T89V"));
+        assertThat(mutations).hasSize(1);
+        assertThat(mutations.stream().findAny().get().getCoveredText()).isEqualTo("Thr-89-Val");
+        assertThat(mutations.stream().findAny().get().getSpecificType()).isEqualTo("T89V");
     }
 }
diff --git a/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationFinderTest.java b/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationFinderTest.java
index 51aa04218..4bc918ef2 100644
--- a/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationFinderTest.java
+++ b/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationFinderTest.java
@@ -1,24 +1,27 @@
 package edu.uchsc.ccp.nlp.ei.mutation;
 
-import junit.framework.TestCase;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
 import java.util.*;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
 /*
  * Copyright (c) 2007 Regents of the University of Colorado
  * Please refer to the licensing agreement at MUTATIONFINDER_HOME/doc/license.txt
  */
 
-public class MutationFinderTest extends TestCase {
+public class MutationFinderTest  {
 
-    private List<String> regularExpressions;
+    private static List<String> regularExpressions;
 
-    private MutationFinder mf;
+    private static MutationFinder mf;
 
-    @Override
-    protected void setUp() throws Exception {
+    @BeforeAll
+    protected static void setUp() {
         /* The first four default regular expressions */
         regularExpressions = new ArrayList<String>();
         regularExpressions
@@ -32,8 +35,6 @@ protected void setUp() throws Exception {
                 .add("(^|[\\s\\(\\[\\'\"/,\\-])(?P<wt_res>(CYS|ILE|SER|GLN|MET|ASN|PRO|LYS|ASP|THR|PHE|ALA|GLY|HIS|LEU|ARG|TRP|VAL|GLU|TYR)|(GLUTAMINE|GLUTAMIC ACID|LEUCINE|VALINE|ISOLEUCINE|LYSINE|ALANINE|GLYCINE|ASPARTATE|METHIONINE|THREONINE|HISTIDINE|ASPARTIC ACID|ARGININE|ASPARAGINE|TRYPTOPHAN|PROLINE|PHENYLALANINE|CYSTEINE|SERINE|GLUTAMATE|TYROSINE))(?P<pos>[1-9][0-9]*) to (?P<mut_res>(CYS|ILE|SER|GLN|MET|ASN|PRO|LYS|ASP|THR|PHE|ALA|GLY|HIS|LEU|ARG|TRP|VAL|GLU|TYR)|(GLUTAMINE|GLUTAMIC ACID|LEUCINE|VALINE|ISOLEUCINE|LYSINE|ALANINE|GLYCINE|ASPARTATE|METHIONINE|THREONINE|HISTIDINE|ASPARTIC ACID|ARGININE|ASPARAGINE|TRYPTOPHAN|PROLINE|PHENYLALANINE|CYSTEINE|SERINE|GLUTAMATE|TYROSINE))(?=([.,\\s)\\]\\'\":;\\-?!/]|$))");
 
         mf = new MutationFinder(new HashSet<String>(regularExpressions));
-
-        super.setUp();
     }
 
     /**
@@ -41,6 +42,7 @@ protected void setUp() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testConstructor() throws Exception {
         mf = new MutationFinder(new HashSet<String>());
         mf = new MutationFinder(new HashSet<String>(regularExpressions));
@@ -62,6 +64,7 @@ public void testConstructor() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractMappingsFromPythonRegex() throws Exception {
         Map<String, Integer> groupMappings = MutationFinder.extractMappingsFromPythonRegex(regularExpressions.get(0));
         assertEquals(new Integer(2), groupMappings.get(MutationFinder.WT_RES));
@@ -80,6 +83,7 @@ public void testExtractMappingsFromPythonRegex() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testRemoveTagsFromPythonRegex() throws Exception {
         String regex0WithoutTags = "(^|[\\s\\(\\[\\'\"/,\\-])([CISQMNPKDTFAGHLRWVEY])([1-9][0-9]+)([CISQMNPKDTFAGHLRWVEY])(?=([.,\\s)\\]\\'\":;\\-?!/]|$))[CASE_SENSITIVE]";
         assertEquals(regex0WithoutTags, MutationFinder.removeTagsFromPythonRegex(regularExpressions.get(0)));
@@ -95,6 +99,7 @@ public void testRemoveTagsFromPythonRegex() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractionNoMutations() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("");
         assertEquals(0, mutations.size());
@@ -117,6 +122,7 @@ public void testExtractionNoMutations() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractSingleMutation() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("S42T");
         Set<Mutation> expectedPMs = new HashSet<Mutation>();
@@ -141,6 +147,7 @@ public void testExtractSingleMutation() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractMultipleMutations() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("S42T and W36Y");
         Set<Mutation> expectedPMs = new HashSet<Mutation>();
@@ -173,6 +180,7 @@ public void testExtractMultipleMutations() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractMultipleMutationsWithPositiveLookahead() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("S42T W36Y");
         Set<Mutation> expectedPMs = new HashSet<Mutation>();
@@ -191,6 +199,7 @@ public void testExtractMultipleMutationsWithPositiveLookahead() throws Exception
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractionSpanCalculations() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("S42T and W36Y");
         Mutation expectedPM = new PointMutation(42, "S", "T");
@@ -248,6 +257,7 @@ public void testExtractionSpanCalculations() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testExtractionOfVariousFormats() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("The A42G mutation was made.");
         Mutation expectedPM = new PointMutation(42, "A", "G");
@@ -296,6 +306,7 @@ public void testExtractionOfVariousFormats() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testRegexCaseInsensitiveFlag() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("a64t");
         assertEquals(0, mutations.size());
@@ -323,6 +334,7 @@ public void testRegexCaseInsensitiveFlag() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testCaseInsensitiveCases() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("ala64gly");
         assertEquals(1, mutations.size());
@@ -346,6 +358,7 @@ public void testCaseInsensitiveCases() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testPostProcessing() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("A64G");
         assertEquals(1, mutations.size());
@@ -366,6 +379,7 @@ public void testPostProcessing() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testVariedDigitLength() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("ala64gly");
         assertEquals(1, mutations.size());
@@ -388,6 +402,7 @@ public void testVariedDigitLength() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testUnacceptableGeneralWordBoundaries() throws Exception {
         String startCharacters = "abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=])";
         String endCharacters = "abcdefghijklmnopqrstuvwxyz0123456789~@#$%^&*_+=(['";
@@ -408,6 +423,7 @@ public void testUnacceptableGeneralWordBoundaries() throws Exception {
          * 
          * @throws Exception
          */
+     @Test
     public void testAcceptableGeneralWordBoundaries() throws Exception {
         char[] endCharacters = { '.', ',', ' ', '\t', '\n', ')', ']', '"', '\'', ':', ';', '?', '!', '/', '-' };
         char[] startCharacters = { ' ', '\t', '\n', '"', '\'', '(', '[', '/', ',', '-' };
@@ -429,6 +445,7 @@ public void testAcceptableGeneralWordBoundaries() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testMixOneAndThreeLetterStrings() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("A64Gly");
         assertEquals(0, mutations.size());
@@ -442,6 +459,7 @@ public void testMixOneAndThreeLetterStrings() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testFullNameMethods() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("alanine64-->Gly");
         assertEquals(1, mutations.size());
@@ -455,6 +473,7 @@ public void testFullNameMethods() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testOneLetterAbbreviationFailsNon_wNmFormat() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("A64-->glycine");
         assertEquals(0, mutations.size());
@@ -471,6 +490,7 @@ public void testOneLetterAbbreviationFailsNon_wNmFormat() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testTextBasedMatches() throws Exception {
         String[] mutationTexts = { "Ala64 to Gly", "Alanine64 to Glycine", "Ala64 to Glycine", "alanine64 to Gly",
                 "The Ala64 to Gly substitution", "The Ala64 to glycine substitution", "The Ala64 to Gly substitution" };
@@ -490,6 +510,7 @@ public void testTextBasedMatches() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testTextMatchSpacing() throws Exception {
         Map<Mutation, Set<int[]>> mutations = mf.extractMutations("TheAla40toGlymutation");
         assertEquals(0, mutations.size());
diff --git a/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationTest.java b/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationTest.java
index 671baf314..465898ff9 100644
--- a/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationTest.java
+++ b/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/MutationTest.java
@@ -1,19 +1,23 @@
 package edu.uchsc.ccp.nlp.ei.mutation;
 
-import junit.framework.TestCase;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
 
 /*
  * Copyright (c) 2007 Regents of the University of Colorado
  * Please refer to the licensing agreement at MUTATIONFINDER_HOME/doc/license.txt
  */
 
-public class MutationTest extends TestCase {
+public class MutationTest  {
 
     /**
      * Test the the constructor works for input of both int's and String's
      * 
      * @throws Exception
      */
+    @Test
     public void testConstructor() throws Exception {
         Mutation m = new Mutation(42);
         assertEquals(42, m.getPosition());
@@ -32,6 +36,7 @@ public void testConstructor() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testUnsupportedMethods() throws Exception {
         Mutation m = new Mutation(42);
         try {
diff --git a/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/PointMutationTest.java b/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/PointMutationTest.java
index ec5704846..73bb0df0b 100644
--- a/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/PointMutationTest.java
+++ b/jcore-mutationfinder-ae/src/test/java/edu/uchsc/ccp/nlp/ei/mutation/PointMutationTest.java
@@ -1,23 +1,26 @@
 package edu.uchsc.ccp.nlp.ei.mutation;
 
-import junit.framework.TestCase;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
 
+import static org.junit.jupiter.api.Assertions.*;
+
 /*
  * Copyright (c) 2007 Regents of the University of Colorado
  * Please refer to the licensing agreement at MUTATIONFINDER_HOME/doc/license.txt
  */
 
-public class PointMutationTest extends TestCase {
-    private PointMutation pointMutation;
+public class PointMutationTest  {
+    private static PointMutation pointMutation;
 
-    private Map<String, String> aminoAcidCodeLookup;
+    private static Map<String, String> aminoAcidCodeLookup;
 
-    @Override
-    protected void setUp() throws Exception {
+    @BeforeAll
+    protected static void setUp() throws Exception {
         pointMutation = new PointMutation(42, "W", "G");
 
         aminoAcidCodeLookup = new HashMap<String, String>();
@@ -84,7 +87,6 @@ protected void setUp() throws Exception {
         aminoAcidCodeLookup.put("D", "D");
         aminoAcidCodeLookup.put("E", "E");
 
-        super.setUp();
     }
 
     /**
@@ -92,6 +94,7 @@ protected void setUp() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testConstructor() throws Exception {
         PointMutation pm = new PointMutation(42, "A", "C");
         assertEquals(42, pm.getPosition());
@@ -125,6 +128,7 @@ public void testConstructor() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testHashcode() throws Exception {
         PointMutation pm = new PointMutation(42, "W", "G");
         assertEquals((pm.getClass().getName() + pm.toString()).hashCode(), pm.hashCode());
@@ -135,6 +139,7 @@ public void testHashcode() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testInvalidInit() throws Exception {
         PointMutation pm;
         try {
@@ -178,6 +183,7 @@ public void testInvalidInit() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testEquals() throws Exception {
         PointMutation pm = new PointMutation(42, "W", "G");
         assertTrue(pointMutation.equals(pm));
@@ -200,6 +206,7 @@ public void testEquals() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testNormalizationOfResidue() throws Exception {
         Set<String> residuesToNormalize = aminoAcidCodeLookup.keySet();
         for (String residue : residuesToNormalize) {
@@ -212,6 +219,7 @@ public void testNormalizationOfResidue() throws Exception {
      * 
      * @throws Exception
      */
+    @Test
     public void testNormalizationOfInvalidResidue() throws Exception {
         try {
             pointMutation.normalizeResidueIdentity("");
@@ -260,6 +268,7 @@ public void testNormalizationOfInvalidResidue() throws Exception {
      * Test the static method which enables creation of a PointMutation object from a String in the wNm format
      * @throws Exception
      */
+    @Test
     public void testCreateNewPointMutationFrom_wNm() throws Exception {
         PointMutation pm = PointMutation.createPointMutationFrom_wNm("W42G");
         assertEquals(pointMutation, pm);
diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index 92fc5f29b..dca3293f1 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -64,6 +64,11 @@
             <groupId>de.julielab</groupId>
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.13.1</version>
+        </dependency>
     </dependencies>
     <name>JCoRe Neo4j Relations Consumer</name>
     <organization>
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
index 9a1fc1905..174a19537 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerIntegrationTest.java
@@ -32,7 +32,7 @@
 import static de.julielab.neo4j.plugins.constants.semedico.SemanticRelationConstants.PROP_DOC_IDS;
 import static de.julielab.neo4j.plugins.datarepresentation.constants.ConceptConstants.PROP_SRC_IDS;
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME;
 
 /**
diff --git a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
index 28ba51f74..6e242d25d 100644
--- a/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
+++ b/jcore-neo4j-relations-consumer/src/test/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumerTest.java
@@ -14,7 +14,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.factory.UimaContextFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.lang.reflect.Method;
 import java.util.List;
diff --git a/jcore-opennlp-chunk-ae/pom.xml b/jcore-opennlp-chunk-ae/pom.xml
index 1ec6f1917..c89174100 100644
--- a/jcore-opennlp-chunk-ae/pom.xml
+++ b/jcore-opennlp-chunk-ae/pom.xml
@@ -53,8 +53,8 @@
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-opennlp-chunk-ae/src/test/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotatorTest.java b/jcore-opennlp-chunk-ae/src/test/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotatorTest.java
index b87f1ab61..08be7f7ab 100644
--- a/jcore-opennlp-chunk-ae/src/test/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotatorTest.java
+++ b/jcore-opennlp-chunk-ae/src/test/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotatorTest.java
@@ -21,7 +21,6 @@
 import de.julielab.jcore.types.PennBioIEPOSTag;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -33,6 +32,7 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,13 +41,13 @@
 import java.util.function.BiConsumer;
 import java.util.stream.Collectors;
 
-public class ChunkAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class ChunkAnnotatorTest {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(ChunkAnnotatorTest.class);
 
-	protected void setUp() throws Exception {
-		super.setUp();
-	}
 
 	String text = "A study on the Prethcamide hydroxylation system in rat hepatic microsomes .";
 
@@ -84,6 +84,7 @@ private void initCas(JCas jcas) {
 		}
 	}
 
+	@Test
 	public void testProcess() {
 
 		XMLInputSource chunkerXML = null;
@@ -134,7 +135,7 @@ public void testProcess() {
 		assertEquals(chunks, predictedChunks);
 
 	}
-
+	@Test
 	public void testProcessWithDefaultMappings() {
 
 		XMLInputSource chunkerXML = null;
@@ -185,7 +186,7 @@ public void testProcessWithDefaultMappings() {
 		assertEquals(chunks, predictedChunks);
 
 	}
-
+	@Test
 	public void testPunctuation() throws Exception {
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
 		AnalysisEngine chunker = AnalysisEngineFactory.createEngine("ChunkAnnotatorTest");
diff --git a/jcore-opennlp-parser-ae/pom.xml b/jcore-opennlp-parser-ae/pom.xml
index 5a2ef3229..07da362a9 100644
--- a/jcore-opennlp-parser-ae/pom.xml
+++ b/jcore-opennlp-parser-ae/pom.xml
@@ -102,8 +102,8 @@
             <version>1.6.0</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-opennlp-parser-ae/src/test/java/de/julielab/jcore/ae/opennlpparser/main/ParseAnnotatorTest.java b/jcore-opennlp-parser-ae/src/test/java/de/julielab/jcore/ae/opennlpparser/main/ParseAnnotatorTest.java
index 0f0cd1315..6955ce7c3 100644
--- a/jcore-opennlp-parser-ae/src/test/java/de/julielab/jcore/ae/opennlpparser/main/ParseAnnotatorTest.java
+++ b/jcore-opennlp-parser-ae/src/test/java/de/julielab/jcore/ae/opennlpparser/main/ParseAnnotatorTest.java
@@ -18,7 +18,6 @@
 package de.julielab.jcore.ae.opennlpparser.main;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.jcas.JCas;
@@ -26,23 +25,19 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.Iterator;
 
-public class ParseAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class ParseAnnotatorTest  {
     private static final Logger LOGGER = LoggerFactory.getLogger(ParseAnnotatorTest.class);
 
     private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
 
-    @Override
-    protected void setUp() throws Exception {
-        super.setUp();
-        // set log4j properties file
-        // PropertyConfigurator.configure(LOGGER_PROPERTIES);
-    }
-
     String text = "A study on the Prethcamide hydroxylation system in rat hepatic microsomes .";
 
     String wantedCons = "NP NP PP NP NP PP NP ";
@@ -68,6 +63,7 @@ public void initCas(JCas jcas) {
         }
     }
 
+    @Test
     public void testProcess() {
 
         boolean annotationsOK = true;
diff --git a/jcore-opennlp-postag-ae/pom.xml b/jcore-opennlp-postag-ae/pom.xml
index cadd08079..77ca254eb 100644
--- a/jcore-opennlp-postag-ae/pom.xml
+++ b/jcore-opennlp-postag-ae/pom.xml
@@ -76,8 +76,8 @@
             <scope>provided</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe OpenNLP POS Tagger</name>
diff --git a/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagAnnotatorTest.java b/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagAnnotatorTest.java
index d2db4293f..ebdeb2c5b 100644
--- a/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagAnnotatorTest.java
+++ b/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagAnnotatorTest.java
@@ -33,15 +33,15 @@
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.Collection;
 import java.util.Iterator;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class PosTagAnnotatorTest {
 
diff --git a/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagDictCreatorTest.java b/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagDictCreatorTest.java
index 22dd88ad2..d7b8f6742 100644
--- a/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagDictCreatorTest.java
+++ b/jcore-opennlp-postag-ae/src/test/java/de/julielab/jcore/ae/opennlp/postag/PosTagDictCreatorTest.java
@@ -13,7 +13,7 @@
 import opennlp.tools.postag.POSDictionary;
 import opennlp.tools.postag.POSSample;
 import org.apache.commons.io.FileUtils;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -23,8 +23,8 @@
 import java.util.List;
 import java.util.Set;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class PosTagDictCreatorTest {
 	@Test
diff --git a/jcore-opennlp-sentence-ae/pom.xml b/jcore-opennlp-sentence-ae/pom.xml
index c1c0c2b03..d1c2cd2c3 100644
--- a/jcore-opennlp-sentence-ae/pom.xml
+++ b/jcore-opennlp-sentence-ae/pom.xml
@@ -41,8 +41,8 @@
             <artifactId>slf4j-api</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe OpenNLP Sentence Splitter</name>
diff --git a/jcore-opennlp-sentence-ae/src/test/java/de/julielab/jcore/ae/jsentsplit/SentenceAnnotatorTest.java b/jcore-opennlp-sentence-ae/src/test/java/de/julielab/jcore/ae/jsentsplit/SentenceAnnotatorTest.java
index 3dcbbef41..6aacdf297 100644
--- a/jcore-opennlp-sentence-ae/src/test/java/de/julielab/jcore/ae/jsentsplit/SentenceAnnotatorTest.java
+++ b/jcore-opennlp-sentence-ae/src/test/java/de/julielab/jcore/ae/jsentsplit/SentenceAnnotatorTest.java
@@ -18,7 +18,6 @@
 package de.julielab.jcore.ae.jsentsplit;
 
 import de.julielab.jcore.types.Sentence;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.jcas.JCas;
@@ -26,12 +25,15 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.Iterator;
 
-public class SentenceAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class SentenceAnnotatorTest  {
 
 	/**
 	 * Logger for this class
@@ -43,10 +45,7 @@ public class SentenceAnnotatorTest extends TestCase {
 
 	String offsets = "0-15;16-32;";
 
-	protected void setUp() throws Exception {
-		super.setUp();
-	}
-	
+	@Test
 	public void testProcess() {
 
 		XMLInputSource sentenceXML = null;
diff --git a/jcore-opennlp-token-ae/pom.xml b/jcore-opennlp-token-ae/pom.xml
index 306972531..419b52446 100644
--- a/jcore-opennlp-token-ae/pom.xml
+++ b/jcore-opennlp-token-ae/pom.xml
@@ -40,8 +40,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe OpenNLP Tokenizer</name>
diff --git a/jcore-opennlp-token-ae/src/test/java/de/julielab/jcore/ae/opennlp/token/TokenAnnotatorTest.java b/jcore-opennlp-token-ae/src/test/java/de/julielab/jcore/ae/opennlp/token/TokenAnnotatorTest.java
index 04ab72c43..f42582429 100644
--- a/jcore-opennlp-token-ae/src/test/java/de/julielab/jcore/ae/opennlp/token/TokenAnnotatorTest.java
+++ b/jcore-opennlp-token-ae/src/test/java/de/julielab/jcore/ae/opennlp/token/TokenAnnotatorTest.java
@@ -19,7 +19,6 @@
 
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.jcas.JCas;
@@ -32,7 +31,9 @@
 
 import java.util.Iterator;
 
-public class TokenAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class TokenAnnotatorTest  {
 
 	private static final Logger LOGGER = LoggerFactory
 			.getLogger(TokenAnnotatorTest.class);
diff --git a/jcore-pmc-reader/pom.xml b/jcore-pmc-reader/pom.xml
index 976a1b456..b0eee7fe3 100644
--- a/jcore-pmc-reader/pom.xml
+++ b/jcore-pmc-reader/pom.xml
@@ -154,8 +154,8 @@
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
index 8c328c2ac..f1e6bd11c 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/NXMLURIIteratorTest.java
@@ -1,6 +1,6 @@
 package de.julielab.jcore.reader.pmc;
 
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -15,7 +15,7 @@
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class NXMLURIIteratorTest {
     @Test
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java
index 8a8527930..27339365b 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCMultiplierTest.java
@@ -11,7 +11,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.HashSet;
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
index 9d5d91007..7d5547754 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
@@ -19,7 +19,7 @@
 import org.apache.uima.fit.util.CasUtil;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.List;
 import java.util.*;
@@ -28,7 +28,7 @@
 
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class PMCReaderTest {
     @Test
@@ -288,8 +288,8 @@ public void testKeywords() throws Exception {
         Set<String> expectedKeywords = new HashSet<>(Arrays.asList("Baltic Sea Action Plan (BSAP)", "Costs", "Review",
                 "Eutrophication", "Hazardous substances"));
         IntStream.range(0, md.getKeywordList().size())
-                .forEach(i -> assertTrue("The keyword \"" + md.getKeywordList(i).getName() + "\" was not expected",
-                        expectedKeywords.remove(md.getKeywordList(i).getName())));
+                .forEach(i -> assertTrue(expectedKeywords.remove(md.getKeywordList(i).getName()),
+                        "The keyword \"" + md.getKeywordList(i).getName() + "\" was not expected"));
         assertTrue(expectedKeywords.isEmpty());
     }
 
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribGroupParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribGroupParserTest.java
index 911500480..324a653dc 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribGroupParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribGroupParserTest.java
@@ -12,12 +12,12 @@
 
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class ContribGroupParserTest {
 	@Test
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribParserTest.java
index 667e85812..fc3f81489 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/ContribParserTest.java
@@ -13,11 +13,11 @@
 import de.julielab.jcore.types.AuthorInfo;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class ContribParserTest {
 	@Test
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java
index c5ac41078..a3ba75ae7 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/FrontParserTest.java
@@ -16,11 +16,11 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class FrontParserTest {
 	@Test
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/NxmlElementParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/NxmlElementParserTest.java
index 256ac33a0..de3fca292 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/NxmlElementParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/NxmlElementParserTest.java
@@ -12,11 +12,11 @@
 
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class NxmlElementParserTest {
 	@Test
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/SectionParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/SectionParserTest.java
index 8d2baf7fb..136420616 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/SectionParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/SectionParserTest.java
@@ -15,7 +15,7 @@
 import org.apache.commons.io.IOUtils;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -23,7 +23,7 @@
 import java.io.FileInputStream;
 import java.util.zip.GZIPInputStream;
 
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class SectionParserTest {
 
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/XRefParserTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/XRefParserTest.java
index 46c79e5fb..72d94b03b 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/XRefParserTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/parser/XRefParserTest.java
@@ -6,12 +6,12 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 
 public class XRefParserTest {
 	@Test
diff --git a/jcore-ppd-writer/pom.xml b/jcore-ppd-writer/pom.xml
index 8e409735b..c5dc78e43 100644
--- a/jcore-ppd-writer/pom.xml
+++ b/jcore-ppd-writer/pom.xml
@@ -42,8 +42,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Piped Format Writer</name>
diff --git a/jcore-ppd-writer/src/test/java/de/julielab/jcore/consumer/ppd/PPDWriterTest.java b/jcore-ppd-writer/src/test/java/de/julielab/jcore/consumer/ppd/PPDWriterTest.java
index 0327f1b26..0603851fc 100644
--- a/jcore-ppd-writer/src/test/java/de/julielab/jcore/consumer/ppd/PPDWriterTest.java
+++ b/jcore-ppd-writer/src/test/java/de/julielab/jcore/consumer/ppd/PPDWriterTest.java
@@ -7,13 +7,13 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 
 /**
diff --git a/jcore-pubtator-reader/pom.xml b/jcore-pubtator-reader/pom.xml
index 84661f424..3440bb1fc 100644
--- a/jcore-pubtator-reader/pom.xml
+++ b/jcore-pubtator-reader/pom.xml
@@ -41,8 +41,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-pubtator-reader/src/test/java/de/julielab/jcore/reader/pubtator/PubtatorReaderTest.java b/jcore-pubtator-reader/src/test/java/de/julielab/jcore/reader/pubtator/PubtatorReaderTest.java
index afece0a59..9ad0c4efc 100644
--- a/jcore-pubtator-reader/src/test/java/de/julielab/jcore/reader/pubtator/PubtatorReaderTest.java
+++ b/jcore-pubtator-reader/src/test/java/de/julielab/jcore/reader/pubtator/PubtatorReaderTest.java
@@ -16,14 +16,14 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.Set;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class PubtatorReaderTest {
 	@Test
@@ -73,6 +73,6 @@ public void testDocumentDirectory() throws Exception {
 			}
 			jcas.reset();
 		}
-		assertTrue("The following IDs have not been read: " + expectedDocIds, expectedDocIds.isEmpty());
+		assertTrue(expectedDocIds.isEmpty(), "The following IDs have not been read: " + expectedDocIds);
 	}
 }
diff --git a/jcore-stanford-lemmatizer-ae/pom.xml b/jcore-stanford-lemmatizer-ae/pom.xml
index 39eda0c8b..33da8f8c2 100644
--- a/jcore-stanford-lemmatizer-ae/pom.xml
+++ b/jcore-stanford-lemmatizer-ae/pom.xml
@@ -37,8 +37,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-stanford-lemmatizer-ae/src/test/java/de/julielab/jcore/ae/stanford/lemma/StanfordLemmatizerTest.java b/jcore-stanford-lemmatizer-ae/src/test/java/de/julielab/jcore/ae/stanford/lemma/StanfordLemmatizerTest.java
index 3e8b94fc2..ca0e0138b 100644
--- a/jcore-stanford-lemmatizer-ae/src/test/java/de/julielab/jcore/ae/stanford/lemma/StanfordLemmatizerTest.java
+++ b/jcore-stanford-lemmatizer-ae/src/test/java/de/julielab/jcore/ae/stanford/lemma/StanfordLemmatizerTest.java
@@ -13,7 +13,6 @@
 import de.julielab.jcore.types.PennBioIEPOSTag;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.jcas.JCas;
@@ -22,12 +21,15 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.Iterator;
 
-public class StanfordLemmatizerTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class StanfordLemmatizerTest  {
 
 	private static final Logger LOGGER = LoggerFactory
 			.getLogger(StanfordLemmatizerTest.class);
@@ -66,6 +68,7 @@ public void initCas(JCas aJCas) {
 	}
 
 	@SuppressWarnings("rawtypes")
+	@Test
 	public void testProcess() {
 
 		XMLInputSource lemmaXML = null;
diff --git a/jcore-topic-indexing-ae/pom.xml b/jcore-topic-indexing-ae/pom.xml
index b378f818d..be99da6f5 100644
--- a/jcore-topic-indexing-ae/pom.xml
+++ b/jcore-topic-indexing-ae/pom.xml
@@ -96,8 +96,8 @@
             </exclusions>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe-Topic-Labeling-AE</name>
diff --git a/jcore-topic-indexing-ae/src/test/java/de/julielab/jcore/ae/TopicIndexingTest.java b/jcore-topic-indexing-ae/src/test/java/de/julielab/jcore/ae/TopicIndexingTest.java
index f42a8368b..756bad437 100644
--- a/jcore-topic-indexing-ae/src/test/java/de/julielab/jcore/ae/TopicIndexingTest.java
+++ b/jcore-topic-indexing-ae/src/test/java/de/julielab/jcore/ae/TopicIndexingTest.java
@@ -21,7 +21,7 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -30,8 +30,8 @@
 import java.util.List;
 
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  * Unit tests for jcore-topic-labeling-ae.
diff --git a/jcore-topics-writer/pom.xml b/jcore-topics-writer/pom.xml
index 19752ec2e..644c48f2b 100644
--- a/jcore-topics-writer/pom.xml
+++ b/jcore-topics-writer/pom.xml
@@ -42,8 +42,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <name>JCoRe Topics Writer</name>
diff --git a/jcore-topics-writer/src/test/java/de/julielab/jcore/consumer/topics/TopicsWriterTest.java b/jcore-topics-writer/src/test/java/de/julielab/jcore/consumer/topics/TopicsWriterTest.java
index 4db31fa61..693ce2c58 100644
--- a/jcore-topics-writer/src/test/java/de/julielab/jcore/consumer/topics/TopicsWriterTest.java
+++ b/jcore-topics-writer/src/test/java/de/julielab/jcore/consumer/topics/TopicsWriterTest.java
@@ -9,9 +9,9 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.DoubleArray;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.IOException;
@@ -26,8 +26,8 @@
  */
 public class TopicsWriterTest {
 
-    @BeforeClass
-    @AfterClass
+    @BeforeAll
+    @AfterAll
     public static void setup() {
         FileUtils.deleteQuietly(new File("src/test/resources/output"));
     }
diff --git a/jcore-txt-consumer/pom.xml b/jcore-txt-consumer/pom.xml
index 07b878cab..3c4fdb273 100644
--- a/jcore-txt-consumer/pom.xml
+++ b/jcore-txt-consumer/pom.xml
@@ -44,8 +44,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <licenses>
diff --git a/jcore-txt-consumer/src/test/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumerTest.java b/jcore-txt-consumer/src/test/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumerTest.java
index 82b76eef9..29197eac6 100644
--- a/jcore-txt-consumer/src/test/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumerTest.java
+++ b/jcore-txt-consumer/src/test/java/de/julielab/jcore/consumer/txt/SentenceTokenConsumerTest.java
@@ -20,7 +20,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.*;
 import java.nio.charset.StandardCharsets;
@@ -32,7 +32,7 @@
 
 import static de.julielab.jcore.consumer.txt.SentenceTokenConsumer.*;
 import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 public class SentenceTokenConsumerTest {
 	/**
 	 * just tests if there is an error with an empty CAS
diff --git a/jcore-utilities/pom.xml b/jcore-utilities/pom.xml
index aafbe74fe..5a6ad681f 100644
--- a/jcore-utilities/pom.xml
+++ b/jcore-utilities/pom.xml
@@ -19,8 +19,8 @@
 			<artifactId>slf4j-api</artifactId>
 		</dependency>
 		<dependency>
-			<groupId>junit</groupId>
-			<artifactId>junit</artifactId>
+			<groupId>org.junit.jupiter</groupId>
+			<artifactId>junit-jupiter-engine</artifactId>
 		</dependency>
 		<dependency>
 			<groupId>org.apache.commons</groupId>
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java
index 42a374b7d..9a101452b 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java
@@ -18,7 +18,6 @@
 package de.julielab.jcore.utility;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIterator;
@@ -28,27 +27,30 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
 import org.apache.uima.util.XmlCasDeserializer;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.FileInputStream;
-import java.lang.reflect.InvocationTargetException;
 import java.util.List;
 
+import static org.junit.jupiter.api.Assertions.*;
+
 // import de.julielab.jcore.types.Annotation;
 
-public class JCoReAnnotationToolsTest extends TestCase {
+public class JCoReAnnotationToolsTest {
 
 	/**
 	 * Logger for this class
 	 */
 	private static final Logger LOG = LoggerFactory.getLogger(JCoReAnnotationToolsTest.class);
 
-	JCas jcas;
-	public final String DESC_TEST_ANALYSIS_ENGINE = "src/test/resources/AETestDescriptor.xml";
+	static JCas jcas;
+	public final static String DESC_TEST_ANALYSIS_ENGINE = "src/test/resources/AETestDescriptor.xml";
 
-	protected void setUp() throws Exception {
+	@BeforeAll
+	protected static void setUp() throws Exception {
 
 		// get a CAS/JCas
 		CAS cas = CasCreationUtils.createCas(UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
@@ -78,9 +80,8 @@ protected void setUp() throws Exception {
 		e4.addToIndexes();
 	}
 
-	// TODO only Exception werfen
-	public void testGetAnnotationAtOffset() throws SecurityException, IllegalArgumentException, ClassNotFoundException,
-			NoSuchMethodException, InstantiationException, IllegalAccessException, InvocationTargetException {
+	@Test
+	public void testGetAnnotationAtOffset() throws Exception {
 
 		LOG.debug("testGetAnnotationAtOffset() - testing getAnnotationAtOffset(..)");
 		Annotation entity = new Annotation(jcas);
@@ -94,10 +95,8 @@ public void testGetAnnotationAtOffset() throws SecurityException, IllegalArgumen
 		assertTrue(anno == null);
 	}
 
-	// TODO only Exception werfen
-	public void testGetOverlappingAnnotation() throws SecurityException, IllegalArgumentException,
-			ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException,
-			InvocationTargetException {
+	@Test
+	public void testGetOverlappingAnnotation() throws Exception {
 
 		LOG.debug("testGetOverlappingAnnotation() - testing getOverlappingAnnotation(..)");
 		Annotation entity = new Annotation(jcas);
@@ -119,10 +118,8 @@ public void testGetOverlappingAnnotation() throws SecurityException, IllegalArgu
 		assertTrue((anno != null) && (anno instanceof Annotation));
 	}
 
-	// TODO only Exception werfen
-	public void testGetAnnotationByClassName() throws SecurityException, IllegalArgumentException,
-			ClassNotFoundException, NoSuchMethodException, InstantiationException, IllegalAccessException,
-			InvocationTargetException {
+	@Test
+	public void testGetAnnotationByClassName() throws Exception {
 
 		LOG.debug("testGetAnnotationByClassName() - testing getAnnotationObject(..)");
 		Annotation entity = new Annotation(jcas);
@@ -130,6 +127,7 @@ public void testGetAnnotationByClassName() throws SecurityException, IllegalArgu
 		assertTrue(anno instanceof Annotation);
 	}
 
+	@Test
 	public void testGetPartiallyOverlappingAnnotationOtherType() throws Exception {
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-all-types");
 		jcas.setDocumentText("wort");
@@ -242,7 +240,7 @@ public void testIncludedAnnotations() throws Exception {
 
 		List<Token> includedAnnotations = JCoReAnnotationTools.getIncludedAnnotations(jcas, em, Token.class);
 
-		assertEquals("Wrong amount of included tokens returned", 4, includedAnnotations.size());
+		assertEquals(4, includedAnnotations.size(), "Wrong amount of included tokens returned");
 
 		for (int i = 0; i < includedAnnotations.size(); i++) {
 			Token includedToken = includedAnnotations.get(i);
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 445b234e3..12672e122 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -3,12 +3,12 @@
 import de.julielab.jcore.types.InternalReference;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Arrays;
 import java.util.HashSet;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class JCoReCondensedDocumentTextTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFSListIteratorTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFSListIteratorTest.java
index bb11d9beb..1ebca1c68 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFSListIteratorTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFSListIteratorTest.java
@@ -13,14 +13,14 @@
 import de.julielab.jcore.types.Token;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.NoSuchElementException;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 
 public class JCoReFSListIteratorTest {
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFeaturePathTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFeaturePathTest.java
index 8983aa858..bc01ec660 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFeaturePathTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReFeaturePathTest.java
@@ -19,14 +19,14 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.cas.StringArray;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class JCoReFeaturePathTest {
 	@Test
@@ -275,12 +275,12 @@ public void testReplacePrimitiveValue() throws Exception {
 
 		assertEquals("originalValue", cm.getTextualRepresentation());
 		assertEquals("originalValue", fp.getValueAsString(cm));
-		assertEquals("replacementValue", fp.getValueAsString(cm, true));
+		assertEquals( fp.getValueAsString(cm,  true), "replacementValue");
 		assertEquals("replacementValue", fp.getValueAsString(cm));
 		assertEquals("replacementValue", cm.getTextualRepresentation());
 
 		// doing a replacement again should have no effect
-		assertEquals("replacementValue", fp.getValueAsString(cm, true));
+		assertEquals( fp.getValueAsString(cm,  true), "replacementValue");
 		assertEquals("replacementValue", fp.getValueAsString(cm));
 	}
 
@@ -303,11 +303,11 @@ public void testReplaceNotMappedPrimitiveValue() throws Exception {
 
 		assertEquals("originalValue", cm.getTextualRepresentation());
 		assertEquals("originalValue", fp.getValueAsString(cm));
-		assertEquals("replacementValue", fp.getValueAsString(cm, true));
+		assertEquals( fp.getValueAsString(cm,  true), "replacementValue");
 
 		assertEquals("unknownValue", cm2.getTextualRepresentation());
 		assertEquals("unknownValue", fp.getValueAsString(cm2));
-		assertEquals("not-mapped", fp.getValueAsString(cm2, true));
+		assertEquals( fp.getValueAsString(cm2,  true), "not-mapped");
 	}
 
 	@Test
@@ -328,7 +328,7 @@ public void testReplaceNotMappedPrimitiveValueWithNull() throws Exception {
 		assertEquals("unknownValue", cm.getTextualRepresentation());
 		assertEquals("unknownValue", fp.getValueAsString(cm));
 		assertEquals(null, fp.getValueAsString(cm, true));
-		assertNotSame("null", fp.getValueAsString(cm, true));
+		assertNotSame( fp.getValueAsString(cm,  true), "null");
 	}
 
 	@Test
@@ -350,7 +350,7 @@ public void testReplaceAllArrayElements() throws Exception {
 		fp.initialize("/semanticTypes", replacements);
 
 		assertEquals("entry1, entry2, entry3", fp.getValueAsString(ocm));
-		assertEquals("replacement1, replacement2, replacement3", fp.getValueAsString(ocm, true));
+		assertEquals( fp.getValueAsString(ocm,  true), "replacement1, replacement2, replacement3");
 	}
 
 	@Test
@@ -368,7 +368,7 @@ public void testReplaceAllArrayElementsFromFile() throws Exception {
 		fp.initialize("/semanticTypes");
 
 		assertEquals("entry1, entry2, entry3", fp.getValueAsString(ocm));
-		assertEquals("replacement1, replacement2, replacement3", fp.getValueAsString(ocm, true));
+		assertEquals( fp.getValueAsString(ocm,  true), "replacement1, replacement2, replacement3");
 
 	}
 
@@ -391,7 +391,7 @@ public void testReplaceSingleArrayElement() throws Exception {
 		JCoReFeaturePath fp = new JCoReFeaturePath();
 		fp.initialize("/semanticTypes[1]", replacements);
 
-		assertEquals("replacement2", fp.getValueAsString(ocm, true));
+		assertEquals( fp.getValueAsString(ocm,  true), "replacement2");
 
 		fp.initialize("/semanticTypes");
 		assertEquals("entry1, replacement2, entry3", fp.getValueAsString(ocm));
@@ -423,7 +423,7 @@ public void testReplaceValueOnDeepFeatureStructure() throws Exception {
 		JCoReFeaturePath fp = new JCoReFeaturePath();
 		fp.initialize("/resourceEntryList/entryId", replacements);
 
-		assertEquals("tid1, tid2", fp.getValueAsString(gene, true));
+		assertEquals( fp.getValueAsString(gene,  true), "tid1, tid2");
 
 	}
 
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReToolsTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReToolsTest.java
index 3212f4c77..74619777f 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReToolsTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReToolsTest.java
@@ -16,13 +16,13 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 public class JCoReToolsTest {
 	@Test
@@ -36,7 +36,7 @@ public void testAddCollectionToFSArray1() throws Exception {
 		Annotation newElement = new Annotation(jCas);
 		Collection<Annotation> newElements = Lists.newArrayList(newElement);
 		FSArray joinedArray = JCoReTools.addToFSArray(fsArray, newElements);
-		assertEquals("A new FSArray was instantiated although the old one should have been kept", fsArray, joinedArray);
+		assertEquals( fsArray,  joinedArray, "A new FSArray was instantiated although the old one should have been kept");
 		assertEquals(newElement, joinedArray.get(1));
 	}
 
@@ -54,7 +54,7 @@ public void testAddCollectionToFSArray2() throws Exception {
 		Annotation newElement4 = new Annotation(jCas, 4, 4);
 		Collection<Annotation> newElements = Lists.newArrayList(newElement1, newElement2, newElement3, newElement4);
 		FSArray joinedArray = JCoReTools.addToFSArray(fsArray, newElements);
-		assertNotSame("The old FSArray was returned although a new one should have been created", fsArray, joinedArray);
+		assertNotSame( fsArray,  joinedArray, "The old FSArray was returned although a new one should have been created");
 		assertEquals(newElement1, joinedArray.get(1));
 		assertEquals(newElement2, joinedArray.get(2));
 		assertEquals(newElement3, joinedArray.get(3));
@@ -68,7 +68,7 @@ public void testAddCollectionToFSArray3() throws Exception {
 		Annotation newElement = new Annotation(jCas);
 		Collection<Annotation> newElements = Lists.newArrayList(newElement);
 		FSArray joinedArray = JCoReTools.addToFSArray(fsArray, newElements);
-		assertNotSame("The old FSArray was returned although a new one should have been created", fsArray, joinedArray);
+		assertNotSame( fsArray,  joinedArray, "The old FSArray was returned although a new one should have been created");
 		assertEquals(newElement, joinedArray.get(0));
 	}
 
@@ -79,7 +79,7 @@ public void testAddCollectionToFSArray4() throws Exception {
 		Annotation newElement = new Annotation(jCas);
 		Collection<Annotation> newElements = Lists.newArrayList(newElement);
 		FSArray joinedArray = JCoReTools.addToFSArray(fsArray, newElements);
-		assertEquals("A new FSArray was instantiated although the old one should have been kept", fsArray, joinedArray);
+		assertEquals( fsArray,  joinedArray, "A new FSArray was instantiated although the old one should have been kept");
 		assertEquals(newElement, joinedArray.get(0));
 	}
 
@@ -93,7 +93,7 @@ public void testAddElementToFSArray1() throws Exception {
 		assertNull(fsArray.get(1));
 		Annotation newElement = new Annotation(jCas);
 		FSArray joinedArray = JCoReTools.addToFSArray(fsArray, newElement);
-		assertEquals("A new FSArray was instantiated although the old one should have been kept", fsArray, joinedArray);
+		assertEquals( fsArray,  joinedArray, "A new FSArray was instantiated although the old one should have been kept");
 		assertEquals(newElement, joinedArray.get(1));
 	}
 
@@ -112,23 +112,23 @@ public void testAddElementToFSArray2() throws Exception {
 		List<Annotation> newElements = Lists.newArrayList(newElement1, newElement2, newElement3, newElement4);
 
 		FSArray joinedArray = JCoReTools.addToFSArray(fsArray, newElements.get(0));
-		assertEquals("A new FSArray was instantiated although the old one should have been kept", fsArray, joinedArray);
+		assertEquals( fsArray,  joinedArray, "A new FSArray was instantiated although the old one should have been kept");
 		assertEquals(2, joinedArray.size());
 		assertEquals(newElement1, joinedArray.get(1));
 		fsArray = joinedArray;
 
 		joinedArray = JCoReTools.addToFSArray(fsArray, newElements.get(1));
-		assertNotSame("The old FSArray was returned although a new one should have been created", fsArray, joinedArray);
+		assertNotSame( fsArray,  joinedArray, "The old FSArray was returned although a new one should have been created");
 		assertEquals(newElement2, joinedArray.get(2));
 		fsArray = joinedArray;
 
 		joinedArray = JCoReTools.addToFSArray(fsArray, newElements.get(2));
-		assertEquals("A new FSArray was instantiated although the old one should have been kept", fsArray, joinedArray);
+		assertEquals( fsArray,  joinedArray, "A new FSArray was instantiated although the old one should have been kept");
 		assertEquals(newElement3, joinedArray.get(3));
 		fsArray = joinedArray;
 
 		joinedArray = JCoReTools.addToFSArray(fsArray, newElements.get(3));
-		assertEquals("A new FSArray was instantiated although the old one should have been kept", fsArray, joinedArray);
+		assertEquals( fsArray,  joinedArray, "A new FSArray was instantiated although the old one should have been kept");
 		assertEquals(newElement4, joinedArray.get(4));
 
 	}
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/ComparatorsTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/ComparatorsTest.java
index f5720c7c1..5b87d968a 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/ComparatorsTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/ComparatorsTest.java
@@ -13,10 +13,10 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class ComparatorsTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReCoverAnnotationIndexTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReCoverAnnotationIndexTest.java
index e81f2cd08..5e2b1d105 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReCoverAnnotationIndexTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReCoverAnnotationIndexTest.java
@@ -14,13 +14,13 @@
 import de.julielab.jcore.types.Token;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class JCoReCoverAnnotationIndexTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReMapAnnotationIndexTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReMapAnnotationIndexTest.java
index 4cd521007..088917946 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReMapAnnotationIndexTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReMapAnnotationIndexTest.java
@@ -14,7 +14,7 @@
 import de.julielab.jcore.types.Token;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.HashMap;
 import java.util.Set;
@@ -22,7 +22,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class JCoReMapAnnotationIndexTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java
index e2f7a39b2..942f32785 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReOverlapAnnotationIndexTest.java
@@ -13,12 +13,12 @@
 import de.julielab.jcore.types.Token;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.List;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class JCoReOverlapAnnotationIndexTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReSetAnnotationIndexTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReSetAnnotationIndexTest.java
index cfb4f2374..111861268 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReSetAnnotationIndexTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReSetAnnotationIndexTest.java
@@ -15,13 +15,13 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class JCoReSetAnnotationIndexTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReTreeMapAnnotationIndexTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReTreeMapAnnotationIndexTest.java
index 1294407f2..208e8abb4 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReTreeMapAnnotationIndexTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/JCoReTreeMapAnnotationIndexTest.java
@@ -15,12 +15,12 @@
 import de.julielab.jcore.types.Token;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class JCoReTreeMapAnnotationIndexTest {
 	@Test
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/TermGeneratorsTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/TermGeneratorsTest.java
index 8595e5840..e3d269f83 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/TermGeneratorsTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/index/TermGeneratorsTest.java
@@ -13,12 +13,12 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.util.function.BinaryOperator;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 
 public class TermGeneratorsTest {
 	
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
index 81dff7797..fd631e58f 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
@@ -14,7 +14,6 @@
 import de.julielab.xml.binary.BinaryJeDISNodeDecoder;
 import de.julielab.xml.binary.BinaryXmiBuilder;
 import org.apache.commons.lang.StringUtils;
-import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.collection.CollectionException;
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
index ff60e41a0..f7fa5f19a 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
@@ -16,9 +16,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -35,7 +35,7 @@ public class XmiDBMultiplierDifferentNsSchemaTest {
     private static String costosysConfig;
     private static int subsetCounter;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -49,7 +49,7 @@ public static void setup() throws UIMAException, IOException, ConfigurationExcep
         subsetCounter = 0;
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
index 2af097f43..c2398d503 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
@@ -16,9 +16,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -35,7 +35,7 @@ public class XmiDBMultiplierTest {
     private static String costosysConfig;
     private static int subsetCounter;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -49,7 +49,7 @@ public static void setup() throws UIMAException, IOException, ConfigurationExcep
         subsetCounter = 0;
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
index 309ab09a4..51d66d493 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
@@ -2,7 +2,9 @@
 
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.db.test.DBTestUtils;
-import de.julielab.jcore.types.*;
+import de.julielab.jcore.types.Header;
+import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.Token;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.uima.UIMAException;
 import org.apache.uima.collection.CollectionReader;
@@ -10,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
@@ -20,15 +22,15 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBReaderBinaryFormatTest {
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -46,7 +48,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
     }
 
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
index a8a15b58d..39b2639f0 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
@@ -1,22 +1,20 @@
 package de.julielab.jcore.reader.xmi;
 
 import de.julielab.costosys.dbconnection.DataBaseConnector;
-import de.julielab.jcore.consumer.xmi.XMIDBWriter;
 import de.julielab.jcore.db.test.DBTestUtils;
-import de.julielab.jcore.reader.db.TableReaderConstants;
-import de.julielab.jcore.types.*;
+import de.julielab.jcore.types.Header;
+import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.Token;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.uima.UIMAException;
-import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.collection.CollectionReader;
-import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.CollectionReaderFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
@@ -24,15 +22,15 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBReaderDifferentNsSchemaTest {
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -50,7 +48,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
     }
 
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
index 9a7fea0b3..018170026 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -35,7 +35,7 @@ public class XmiDBReaderGzippedDataTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -53,7 +53,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.close();
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
index e0ae7f3ed..decd4e840 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -32,7 +32,7 @@ public class XmiDBReaderMonolithicDocumentsTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -50,7 +50,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.close();
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
index 72bea54a6..1f8150274 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
@@ -14,9 +14,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
-import org.testng.annotations.AfterClass;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
@@ -24,15 +24,15 @@
 import java.util.ArrayList;
 import java.util.List;
 
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBReaderTest {
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -50,7 +50,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
     }
 
 
-    @AfterClass
+    @AfterAll
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-writer/pom.xml b/jcore-xmi-db-writer/pom.xml
index 657e06c16..2b4a326f4 100644
--- a/jcore-xmi-db-writer/pom.xml
+++ b/jcore-xmi-db-writer/pom.xml
@@ -159,6 +159,12 @@
             <artifactId>logback-classic</artifactId>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.jetbrains</groupId>
+            <artifactId>annotations</artifactId>
+            <version>RELEASE</version>
+            <scope>compile</scope>
+        </dependency>
     </dependencies>
     <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-xmi-db-writer</url>
     <licenses>
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
index f89ce94e5..4df9efaaa 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
@@ -4,9 +4,7 @@
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.db.test.DBTestUtils;
 import de.julielab.jcore.types.*;
-import de.julielab.jcore.types.ext.DBProcessingMetaData;
 import de.julielab.xml.XmiSplitConstants;
-import de.julielab.xml.XmiSplitter;
 import de.julielab.xml.binary.BinaryDecodingResult;
 import de.julielab.xml.binary.BinaryJeDISNodeDecoder;
 import org.apache.commons.configuration2.ex.ConfigurationException;
@@ -17,7 +15,11 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.StringArray;
-import org.junit.*;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
 import org.testcontainers.containers.PostgreSQLContainer;
 
 import java.io.ByteArrayInputStream;
@@ -26,15 +28,15 @@
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
-import java.util.*;
 import java.util.List;
+import java.util.*;
 import java.util.stream.Collectors;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBWriterBinaryFormatTest {
     @ClassRule
@@ -43,7 +45,7 @@ public class XmiDBWriterBinaryFormatTest {
     private static String xmlSubsetTable;
     private static DataBaseConnector dbc;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.reserveConnection();
@@ -52,7 +54,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.releaseConnections();
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutDown() {
         dbc.close();
     }
@@ -65,7 +67,7 @@ public static JCas getJCasWithRequiredTypes() throws UIMAException {
                 "de.julielab.jcore.types.jcore-xmi-splitter-types");
     }
 
-    @Before
+    @BeforeEach
     public void cleanForTest() throws SQLException {
         String binaryMappingTable = "public." + MetaTableManager.BINARY_MAPPING_TABLE;
         String binaryFeaturesToMapTable = "public." + MetaTableManager.BINARY_FEATURES_TO_MAP_TABLE;
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
index 10684230b..84e35a027 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
@@ -3,7 +3,9 @@
 import de.julielab.costosys.dbconnection.CoStoSysConnection;
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.db.test.DBTestUtils;
-import de.julielab.jcore.types.*;
+import de.julielab.jcore.types.Header;
+import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.Token;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.uima.UIMAException;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -12,24 +14,18 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
 
 import java.io.ByteArrayInputStream;
-import java.io.IOException;
 import java.sql.ResultSet;
-import java.sql.SQLException;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.stream.Collectors;
 
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBWriterMonolithicDocumentTest {
     @ClassRule
@@ -37,14 +33,14 @@ public class XmiDBWriterMonolithicDocumentTest {
     private static String costosysConfig;
     private static DataBaseConnector dbc;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws ConfigurationException {
         dbc = DBTestUtils.getDataBaseConnector(postgres);
         costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2017", 1, postgres);
         DBTestUtils.createAndSetHiddenConfig("src/test/resources/hiddenConfig.txt", postgres);
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutDown() {
         dbc.close();
     }
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
index 866d0ddf8..00230cda6 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
@@ -11,10 +11,10 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
 import org.junit.ClassRule;
-import org.junit.Test;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
 
 import java.io.IOException;
@@ -34,7 +34,7 @@ public class XmiDBWriterTest {
     private static String xmlSubsetTable;
     private static DataBaseConnector dbc;
 
-    @BeforeClass
+    @BeforeAll
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.reserveConnection();
@@ -43,7 +43,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.releaseConnections();
     }
 
-    @AfterClass
+    @AfterAll
     public static void shutDown() {
         dbc.close();
     }
diff --git a/jcore-xmi-reader/pom.xml b/jcore-xmi-reader/pom.xml
index 9e3df5b4c..e7630643a 100644
--- a/jcore-xmi-reader/pom.xml
+++ b/jcore-xmi-reader/pom.xml
@@ -24,8 +24,8 @@
             <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-xmi-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiCollectionReaderTest.java b/jcore-xmi-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiCollectionReaderTest.java
index 17fda0be8..2d360f427 100644
--- a/jcore-xmi-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiCollectionReaderTest.java
+++ b/jcore-xmi-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiCollectionReaderTest.java
@@ -16,9 +16,9 @@
 import org.apache.uima.fit.factory.CollectionReaderFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiCollectionReaderTest {
 	@Test
diff --git a/jcore-xmi-writer/pom.xml b/jcore-xmi-writer/pom.xml
index 65dd58b07..586126e26 100644
--- a/jcore-xmi-writer/pom.xml
+++ b/jcore-xmi-writer/pom.xml
@@ -29,8 +29,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-xmi-writer/src/test/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumerTest.java b/jcore-xmi-writer/src/test/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumerTest.java
index 1242372d6..e6b7006e2 100644
--- a/jcore-xmi-writer/src/test/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumerTest.java
+++ b/jcore-xmi-writer/src/test/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumerTest.java
@@ -24,15 +24,15 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Before;
-import org.junit.Test;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 import java.io.FilenameFilter;
 import java.io.IOException;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 /**
  * Test for class {@link CasToXmiConsumer}
@@ -91,7 +91,7 @@ public boolean accept(File file, String name) {
 	 * Delete all files ending with "xmi" or "xmi.gzip" in the output directory, 
 	 * and do the same for all subdirectories of outputDir, recursively
 	 */
-	@Before	
+	@BeforeEach
 	public void clearDirectory() {
 		File outputDir = new File(OUTPUT_FOLDER_XMI);
 		removeXmiGzipAndZipFiles(outputDir);
@@ -121,7 +121,7 @@ private void removeXmiGzipAndZipFiles(File dir) {
 	/**
 	 * Create the CasConsumer under test
 	 */
-	@Before	
+	@BeforeEach
 	public void createConsumer() {
 //		XMLInputSource source;
 		try {
diff --git a/jcore-xml-db-reader/pom.xml b/jcore-xml-db-reader/pom.xml
index 8447584a9..3342d08b7 100644
--- a/jcore-xml-db-reader/pom.xml
+++ b/jcore-xml-db-reader/pom.xml
@@ -66,7 +66,6 @@
         <dependency>
             <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter-engine</artifactId>
-            <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-xml-mapper/pom.xml b/jcore-xml-mapper/pom.xml
index 7264e7079..c2fa73802 100644
--- a/jcore-xml-mapper/pom.xml
+++ b/jcore-xml-mapper/pom.xml
@@ -34,8 +34,8 @@
             <scope>test</scope>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeTemplate.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeTemplate.java
index 383dc3215..466350e8c 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeTemplate.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/genericTypes/TypeTemplate.java
@@ -24,7 +24,6 @@
 
 import java.util.*;
 
-import static org.fest.reflect.core.Reflection.constructor;
 
 /**
  * Represents a Template for a type which Contains a List of Feature Templates
@@ -119,14 +118,17 @@ public void setFullClassName(String fullClassName) {
 	public void setParser(String trim) throws CollectionException {
 		if (trim != null) {
 			externalParser = true;
-			Class<?> externalParserClass;
+			Class<?> externalParserClass = null;
 			try {
 				externalParserClass = Class.forName(trim);
+			this.parser = (TypeParser) externalParserClass.getConstructor().newInstance();
 			} catch (ClassNotFoundException e) {
 				LOGGER.error("ExternalParser " + trim + " for type or feature " + fullClassName + " returns a ClassNotFoundException", e);
 				throw new CollectionException(e);
+			} catch (Exception e) {
+				LOGGER.error("Could not create instance of class {}: ", externalParserClass, e);
+				throw new CollectionException(e);
 			}
-			this.parser = (TypeParser) constructor().in(externalParserClass).newInstance();
 		}else{
 			this.parser = null;
 		}
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/DocumentTextHandler.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/DocumentTextHandler.java
index 02218ee8b..4ef868e6f 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/DocumentTextHandler.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/DocumentTextHandler.java
@@ -25,9 +25,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import static org.fest.reflect.core.Reflection.constructor;
 
 /**
  * Handels to parse the DocumentText
@@ -128,16 +125,19 @@ public void setXPathForPartOfDocumentText(int id, String xpath) {
 
     public void setExternalParserForPartOfDocument(int id, String externalParserClassName) throws CollectionException {
         if (externalParserClassName != null) {
-            Class<?> externalParserClass;
+            Class<?> externalParserClass = null;
+            DocumentTextPartParser parser;
             try {
                 externalParserClass = Class.forName(externalParserClassName.trim());
+                parser = (DocumentTextPartParser) externalParserClass.getConstructor().newInstance();
             } catch (ClassNotFoundException e) {
                 LOGGER.error("ExternalParser " + externalParserClassName + " for document text part " + id + " returns a ClassNotFoundException", e);
                 throw new CollectionException(e);
+            } catch (Exception e) {
+                LOGGER.error("Could not create instance of {}: ", externalParserClass, e);
+                throw new CollectionException(e);
             }
-            DocumentTextPartParser parser = (DocumentTextPartParser) constructor().in(externalParserClass).newInstance();
             this.docTextData.get(id).setParser(parser);
         }
     }
-
 }
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java
index 9fa1c46b2..a010092c1 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/typeBuilder/StandardTypeBuilder.java
@@ -27,8 +27,6 @@
 
 import java.util.HashMap;
 
-import static org.fest.reflect.core.Reflection.method;
-
 /**
  * In this class, the actual UIMA types are built from the templates which have
  * been filled with values by the type parsers before. The standard type builder
@@ -150,8 +148,7 @@ private Annotation buildSingleInstance(ConcreteType concreteType, JCas jcas) thr
 					// itself.
 					if (standardJavaTypesMap.get(concreteFeature.getFullClassName()) != null) {
 						featureClass = standardJavaTypesMap.get(concreteFeature.getFullClassName());
-						method(methodName).withParameterTypes(featureClass).in(type)
-								.invoke(parseValueStringToValueType(concreteFeature.getValue(), concreteFeature.getFullClassName()));
+						type.getClass().getMethod(methodName, featureClass).invoke(type, parseValueStringToValueType(concreteFeature.getValue(), concreteFeature.getFullClassName()));
 					} else if (concreteFeature.getFullClassName().equals("String") || concreteFeature.getFullClassName().equals("java.lang.String")) {
 						featureClass = Class.forName(concreteFeature.getFullClassName());
 						typeClass.getMethod(methodName, featureClass).invoke(type, concreteFeature.getValue());
@@ -163,7 +160,7 @@ private Annotation buildSingleInstance(ConcreteType concreteType, JCas jcas) thr
 									+ "\" the feature value class (e.g. String, Integer, another type...) was not defined in the mapping file.");
 						featureClass = Class.forName(featureClassName);
 						TOP top = concreteFeature.getTypeTemplate().getParser().getTypeBuilder().buildType(concreteFeature, jcas);
-						method(methodName).withParameterTypes(featureClass).in(type).invoke(top);
+						type.getClass().getMethod(methodName, featureClass).invoke(type, top);
 					}
 				} catch (Throwable e) {
 					LOGGER.error("Wrong Feature Type: " + concreteFeature.getFullClassName(), e);
diff --git a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/EncodingTest.java b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/EncodingTest.java
index 23a256259..9d61cd532 100644
--- a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/EncodingTest.java
+++ b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/EncodingTest.java
@@ -13,9 +13,9 @@
 import com.ximpleware.AutoPilot;
 import com.ximpleware.VTDGen;
 import com.ximpleware.VTDNav;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 
 public class EncodingTest {
 	@Test
diff --git a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLMapperTest.java b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLMapperTest.java
index 8b3efcb59..a3e682208 100644
--- a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLMapperTest.java
+++ b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLMapperTest.java
@@ -33,13 +33,13 @@
 import org.apache.uima.resource.metadata.ExternalResourceBinding;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * TODO insert description
diff --git a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java
index 1ecb95ccd..ef926761a 100644
--- a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java
+++ b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java
@@ -6,8 +6,8 @@
 
 package de.julielab.jcore.reader.xmlmapper;
 
-import de.julielab.jcore.types.*;
 import de.julielab.jcore.types.Date;
+import de.julielab.jcore.types.*;
 import de.julielab.jcore.types.pubmed.Header;
 import de.julielab.jcore.types.pubmed.ManualDescriptor;
 import org.apache.uima.UIMAException;
@@ -29,7 +29,7 @@
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
@@ -38,10 +38,10 @@
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.*;
 import java.util.List;
+import java.util.*;
 
-import static org.junit.Assert.*;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * Test for class MedlineReader
@@ -183,7 +183,7 @@ public void testSingleEntityData() throws Throwable {
 				if (DEBUG_MODE) {
 					serializeCas(cas);
 				}
-				assertTrue("test documenttext", cas.getDocumentText() != null && cas.getDocumentText().length() > 0);
+				assertTrue(cas.getDocumentText() != null && cas.getDocumentText().length() > 0);
 				assertEquals(
 						"Mitigation of graft-versus-host disease in rats treated with allogeneic and xenogeneic antilymphocytic sera.\nThis is a very short test abstract.",
 						cas.getDocumentText());
@@ -191,7 +191,7 @@ public void testSingleEntityData() throws Throwable {
 				int counter = 0;
 				String[] types = new String[] { ":::diso:2,3", ":::spe", ":::pgn" };
 				String[] texts = new String[] { "graft-versus-host disease", "rats", "sera" };
-				assertTrue("No entity mentions found in the CAS", iter.hasNext());
+				assertTrue(iter.hasNext(), "No entity mentions found in the CAS");
 				while (iter.hasNext()) {
 					EntityMention text = (EntityMention) iter.next();
 					String coveredText = text.getCoveredText();
@@ -398,8 +398,8 @@ public void testMissingInputDirectory() {
 			medlineReader = getCollectionReader(DESC_XML_READER_MISSING_INPUT_DIR);
 			fail("Expected exception was not thrown");
 		} catch (Exception e) {
-			assertTrue("Exception should be an instance of ResourceInitializationException , but was "
-					+ e.getClass().getName(), e instanceof ResourceInitializationException);
+			assertTrue(e instanceof ResourceInitializationException, "Exception should be an instance of ResourceInitializationException , but was "
+					+ e.getClass().getName());
 		}
 	}
 
@@ -497,71 +497,71 @@ private void checkElements() {
 			String pmid = getPMID(cas);
 			if (pmid.equals("11119751")) {
 				checkCount++;
-				assertTrue("Invalid keyWordList", checkKeywords(cas, EXPECTED_KEYWORDS));
-				assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-				assertTrue("Invalid DBInfoList", ckeckDBInfos(cas, EXPECTED_DB_INFO));
-				assertTrue("Invalid MeshHeading", checkMeshHeadings(cas, EXPECTED_MESH_HEADINGS));
-				assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-				assertTrue("Invalid Chemical", checkChemicals(cas, EXPECTED_CHEMICALS));
-				assertTrue("Invalid Header", checkHeader(cas, EXPECTED_HEADER));
-				assertTrue("Invalid ManualDescriptor", checkManualDescriptor(cas));
-				assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-				assertTrue("Invalid DocumentText", checkDocumentText(cas, EXPECTED_DOCUMENT_TEXT));
-				assertTrue("Invalid AbstractText", checkAbstractText(cas, EXPECTED_ABSTRACT_TEXT));
-				assertTrue("Invalid Title", checkTitle(cas, EXPECTED_TITLE));
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(checkKeywords(cas, EXPECTED_KEYWORDS), "Invalid keyWordList");
+				assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+				assertTrue(ckeckDBInfos(cas, EXPECTED_DB_INFO), "Invalid DBInfoList");
+				assertTrue(checkMeshHeadings(cas, EXPECTED_MESH_HEADINGS), "Invalid MeshHeading");
+				assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+				assertTrue(checkChemicals(cas, EXPECTED_CHEMICALS), "Invalid Chemical");
+				assertTrue(checkHeader(cas, EXPECTED_HEADER), "Invalid Header");
+				assertTrue(checkManualDescriptor(cas), "Invalid ManualDescriptor");
+				assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+				assertTrue(checkDocumentText(cas, EXPECTED_DOCUMENT_TEXT), "Invalid DocumentText");
+				assertTrue(checkAbstractText(cas, EXPECTED_ABSTRACT_TEXT), "Invalid AbstractText");
+				assertTrue(checkTitle(cas, EXPECTED_TITLE), "Invalid Title");
+				assertTrue(!checkSentences(cas), "Sentences Found");
 			}
 			// check medline XML without most lists (gene, keywords,...)
 			if (pmid.equals("11119751-a")) {
 				checkCount++;
-				assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+				assertTrue(!checkSentences(cas), "Sentences Found");
 			}
 			// check medline XML with pub date: <MedlineDate>2000
 			// Spring-Summer</MedlineDate>
 			if (pmid.equals("11119751-b")) {
 				checkCount++;
-				assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-				assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-				assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-				assertTrue("Invalid PubDate", checkPubDate(cas, EXPECTED_DATE_1));
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+				assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+				assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+				assertTrue(checkPubDate(cas, EXPECTED_DATE_1), "Invalid PubDate");
+				assertTrue(!checkSentences(cas), "Sentences Found");
 			}
 			// check medline XML with pub date: <MedlineDate>2000 Dec
 			// 23-30</MedlineDate>
 			if (pmid.equals("11119751-c")) {
 				checkCount++;
-				assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-				assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-				assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-				assertTrue("Invalid PubDate", checkPubDate(cas, EXPECTED_DATE_2));
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+				assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+				assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+				assertTrue(checkPubDate(cas, EXPECTED_DATE_2), "Invalid PubDate");
+				assertTrue(!checkSentences(cas), "Sentences Found");
 			}
 			// check medline XML pub date: <MedlineDate>2000 Oct-2001
 			// Mar</MedlineDate>
 			if (pmid.equals("11119751-d")) {
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(!checkSentences(cas), "Sentences Found");
 				checkCount++;
-				assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-				assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-				assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-				assertTrue("Invalid PubDate", checkPubDate(cas, EXPECTED_DATE_3));
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+				assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+				assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+				assertTrue(checkPubDate(cas, EXPECTED_DATE_3), "Invalid PubDate");
+				assertTrue(!checkSentences(cas), "Sentences Found");
 			}
 			if (pmid.equals("8045680")) {
 				checkCount++;
-				assertTrue("No Sentences Found", checkSentences(cas));
+				assertTrue(checkSentences(cas), "No Sentences Found");
 				// assertTrue("Invalid Header", checkHeader(cas,
 				// EXPECTED_HEADER_OTHER_LANGUAGE));
 			}
 			if (pmid.equals("12626969")) {
 				checkCount++;
-				assertTrue("No Sentences Found", checkSentences(cas));
+				assertTrue(checkSentences(cas), "No Sentences Found");
 				// assertTrue("Invalid Header", checkHeader(cas,
 				// EXPECTED_HEADER_OTHER_LANGUAGE));
 			}
 			if (pmid.equals("11119751-e")) {
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(!checkSentences(cas), "Sentences Found");
 				checkCount++;
 				// assertTrue("Invalid Header", checkHeader(cas,
 				// EXPECTED_HEADER_OTHER_LANGUAGE));
@@ -569,25 +569,25 @@ private void checkElements() {
 			// test the case that only a title is found and no abstractText
 			// (documentText should be equal to title in this case)
 			if (pmid.equals("17276851")) {
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(!checkSentences(cas), "Sentences Found");
 				checkCount++;
-				assertTrue("Invalid Document Title", checkTitle(cas, EXPECTED_TITLE_2));
-				assertTrue("Invalid Document Text", checkDocumentText(cas, EXPECTED_TITLE_2));
+				assertTrue(checkTitle(cas, EXPECTED_TITLE_2), "Invalid Document Title");
+				assertTrue(checkDocumentText(cas, EXPECTED_TITLE_2), "Invalid Document Text");
 			}
 			// PubMed has changed the XML element ForeName to FirstName, but
 			// foreName should still
 			// be supported
 			if (pmid.equals("18439884")) {
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(!checkSentences(cas), "Sentences Found");
 				checkCount++;
-				assertTrue("Invalid foreName", checkForeNames(cas, EXPECTED_FORE_NAMES));
+				assertTrue(checkForeNames(cas, EXPECTED_FORE_NAMES), "Invalid foreName");
 				checkJournalTitle(cas, EXPECTED_JOURNAL_TITLE);
 			}
 			if (pmid.equals("17306504")) {
-				assertTrue("Sentences Found", !checkSentences(cas));
+				assertTrue(!checkSentences(cas), "Sentences Found");
 				checkCount++;
-				assertTrue("Invalid pubTypeList", checkPubTypeList(cas, EXPECTED_PUBTYPES));
-				assertTrue("Invalid DOI", checkDoi(cas, EXPECTED_DOI));
+				assertTrue(checkPubTypeList(cas, EXPECTED_PUBTYPES), "Invalid pubTypeList");
+				assertTrue(checkDoi(cas, EXPECTED_DOI), "Invalid DOI");
 			}
 		}
 		assertEquals(11, checkCount);
@@ -668,7 +668,7 @@ private boolean checkAbstractText(CAS cas, String abstractTextString2) {
 	 * 
 	 * @param cas
 	 *            The CAS
-	 * @param title
+	 * @param expectedTitle
 	 *            The correct title
 	 * @return true if the correct title is contained in the CAS
 	 */
@@ -1006,9 +1006,9 @@ private boolean checkSentences(CAS cas) {
 		int count = 0;
 		while (iter.hasNext()) {
 			Sentence s = (Sentence) iter.next();
-			assertTrue("Sentence has an ID", s.getId() != null);
-			assertTrue("Sentence has an Begin", s.getBegin() >= 0);
-			assertTrue("Sentence has an End", s.getEnd() >= 0);
+			assertTrue(s.getId() != null, "Sentence has an ID");
+			assertTrue(s.getBegin() >= 0, "Sentence has an Begin");
+			assertTrue(s.getEnd() >= 0, "Sentence has an End");
 			count++;
 		}
 		if (count == 0)
@@ -1052,7 +1052,7 @@ private boolean checkAuthors(CAS cas, String[][] authors) {
 	 * foreName, but both should be supported)
 	 * 
 	 * @param cas
-	 * @param foreName
+	 * @param foreNames
 	 * @return
 	 */
 	private boolean checkForeNames(CAS cas, String[] foreNames) {
diff --git a/jcore-xml-reader/pom.xml b/jcore-xml-reader/pom.xml
index 1719c5c73..7d2ec2b1f 100644
--- a/jcore-xml-reader/pom.xml
+++ b/jcore-xml-reader/pom.xml
@@ -137,8 +137,8 @@
             <artifactId>assertj-core</artifactId>
         </dependency>
         <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
     </dependencies>
     <organization>
diff --git a/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierReaderTest.java b/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierReaderTest.java
index 67faae92f..875be49ce 100644
--- a/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierReaderTest.java
+++ b/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierReaderTest.java
@@ -21,7 +21,6 @@
 import de.julielab.jcore.types.Journal;
 import de.julielab.jcore.types.casmultiplier.JCoReURI;
 import de.julielab.jcore.types.pubmed.Header;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAException;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.CAS;
@@ -37,6 +36,7 @@
 import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -45,10 +45,13 @@
 import java.util.*;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
 /**
  * Test for class XML Reader
  */
-public class XMLMultiplierReaderTest extends TestCase {
+public class XMLMultiplierReaderTest  {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(XMLMultiplierReaderTest.class);
 
@@ -80,6 +83,7 @@ public XMLMultiplierReaderTest() {
         }
     }
 
+    @Test
     public void testZipInput() throws UIMAException, IOException {
         JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types",
                 "org.apache.uima.ducc.FlowControllerTS");
@@ -97,10 +101,10 @@ public void testZipInput() throws UIMAException, IOException {
                 String fileName = it.next();
                 if (jCoReURI.getUri().endsWith(fileName)) {
                     found = true;
-                    assertTrue("File name " + fileName + " was already found", foundFileNames.add(fileName));
+                    assertTrue(foundFileNames.add(fileName), "File name " + fileName + " was already found");
                 }
             }
-            assertTrue("The URI " + jCoReURI.getUri()+ " was not matched by any expected file names", found);
+            assertTrue(found, "The URI " + jCoReURI.getUri()+ " was not matched by any expected file names");
             jCas.reset();
         }
         assertThat(expectedFileNames).isEqualTo(foundFileNames);
@@ -111,6 +115,7 @@ public void testZipInput() throws UIMAException, IOException {
      *
      * @throws ResourceInitializationException
      */
+    @Test
     public void testGetNextCas_singleFile() throws Exception {
         xmlMultiplierReader = CollectionReaderFactory.createReader(DESC_XML_MULTIPLIER_READER_DIR,
                 XMLMultiplierReader.PARAM_INPUT_FILE, "src/test/resources/pubmedXML/pubmedsample18n0001.xml.gz");
@@ -125,6 +130,7 @@ public void testGetNextCas_singleFile() throws Exception {
 
     }
 
+    @Test
     public void testGetNextCas_directory() throws Exception {
         xmlMultiplierReader = CollectionReaderFactory.createReader(DESC_XML_MULTIPLIER_READER_DIR,
                 XMLMultiplierReader.PARAM_INPUT_DIR, "src/test/resources/pubmedXML/");
diff --git a/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierTest.java b/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierTest.java
index c3913c702..c757166ba 100644
--- a/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierTest.java
+++ b/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLMultiplierTest.java
@@ -10,12 +10,12 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.Test;
+import org.junit.jupiter.api.Test;
 
 import java.io.File;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XMLMultiplierTest {
 
diff --git a/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLReaderTest.java b/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLReaderTest.java
index d1b67539f..cf54882b1 100644
--- a/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLReaderTest.java
+++ b/jcore-xml-reader/src/test/java/de/julielab/jcore/reader/XMLReaderTest.java
@@ -18,11 +18,10 @@
 package de.julielab.jcore.reader;
 
 import de.julielab.jcore.reader.xml.XMLReader;
-import de.julielab.jcore.types.*;
 import de.julielab.jcore.types.Date;
+import de.julielab.jcore.types.*;
 import de.julielab.jcore.types.pubmed.Header;
 import de.julielab.jcore.types.pubmed.ManualDescriptor;
-import junit.framework.TestCase;
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
@@ -49,13 +48,16 @@
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.*;
 import java.util.List;
+import java.util.*;
+
+import static org.assertj.core.api.Fail.fail;
+import static org.junit.jupiter.api.Assertions.*;
 
 /**
  * Test for class XML Reader
  */
-public class XMLReaderTest extends TestCase {
+public class XMLReaderTest {
 
     private static final Logger LOGGER = LoggerFactory.getLogger(XMLReaderTest.class);
 
@@ -228,7 +230,7 @@ public void testGetNextCas_singleFile() throws ResourceInitializationException {
             LOGGER.error(e.getMessage(), e);
             e.printStackTrace();
         }
-        assertEquals("reading single file", EXPECTED_DOCUMENT_TEXT, cas.getDocumentText());
+        assertEquals( EXPECTED_DOCUMENT_TEXT,  cas.getDocumentText(), "reading single file");
     }
 
     /**
@@ -239,8 +241,8 @@ public void testMissingInputDirectory() {
             medlineReader = getCollectionReader(DESC_MEDLINE_READER_MISSING_INPUT_DIR);
             fail("Expected exception was not thrown");
         } catch (Exception e) {
-            assertTrue("Exception should be an instance of ResourceInitializationException , but was "
-                    + e.getClass().getName(), e instanceof ResourceInitializationException);
+            assertTrue(e instanceof ResourceInitializationException, "Exception should be an instance of ResourceInitializationException , but was "
+                    + e.getClass().getName());
         }
     }
 
@@ -332,25 +334,25 @@ private void checkElements() {
             // check medline XML with all items
             if (getPMID(cas).equals("11119751")) {
                 checkCount++;
-                assertTrue("Invalid keyWordList", checkKeywords(cas, EXPECTED_KEYWORDS));
-                assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-                assertTrue("Invalid DBInfoList", ckeckDBInfos(cas, EXPECTED_DB_INFO));
-                assertTrue("Invalid MeshHeading", checkMeshHeadings(cas, EXPECTED_MESH_HEADINGS));
-                assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-                assertTrue("Invalid Chemical", checkChemicals(cas, EXPECTED_CHEMICALS));
-                assertTrue("Invalid Header in document " + getPMID(cas), checkHeader(cas, EXPECTED_HEADER));
-                assertTrue("Invalid ManualDescriptor", checkManualDescriptor(cas));
-                assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-                assertTrue("Invalid DocumentText in document " + getPMID(cas),
-                        checkDocumentText(cas, EXPECTED_DOCUMENT_TEXT));
-                assertTrue("Invalid AbstractText", checkAbstractText(cas, EXPECTED_ABSTRACT_TEXT));
-                assertTrue("Invalid Title", checkTitle(cas, EXPECTED_TITLE));
+                assertTrue(checkKeywords(cas, EXPECTED_KEYWORDS), "Invalid keyWordList");
+                assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+                assertTrue(ckeckDBInfos(cas, EXPECTED_DB_INFO), "Invalid DBInfoList");
+                assertTrue(checkMeshHeadings(cas, EXPECTED_MESH_HEADINGS), "Invalid MeshHeading");
+                assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+                assertTrue(checkChemicals(cas, EXPECTED_CHEMICALS), "Invalid Chemical");
+                assertTrue(checkHeader(cas, EXPECTED_HEADER), "Invalid Header in document " + getPMID(cas));
+                assertTrue(checkManualDescriptor(cas), "Invalid ManualDescriptor");
+                assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+                assertTrue(checkDocumentText(cas, EXPECTED_DOCUMENT_TEXT),
+                        "Invalid DocumentText in document " + getPMID(cas));
+                assertTrue(checkAbstractText(cas, EXPECTED_ABSTRACT_TEXT), "Invalid AbstractText");
+                assertTrue(checkTitle(cas, EXPECTED_TITLE), "Invalid Title");
             }
 
             // check medline XML without most lists (gene, keywords,...)
             if (getPMID(cas).equals("11119751-a")) {
                 checkCount++;
-                assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
+                assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
 
             }
 
@@ -358,30 +360,30 @@ private void checkElements() {
             // Spring-Summer</MedlineDate>
             if (getPMID(cas).equals("11119751-b")) {
                 checkCount++;
-                assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-                assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-                assertTrue("Invalid Journal in document " + getPMID(cas), ckeckJournal(cas, EXPECTED_JOURNAL));
-                assertTrue("Invalid PubDate", checkPubDate(cas, EXPECTED_DATE_1));
+                assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+                assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+                assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal in document " + getPMID(cas));
+                assertTrue(checkPubDate(cas, EXPECTED_DATE_1), "Invalid PubDate");
             }
 
             // check medline XML with pub date: <MedlineDate>2000 Dec
             // 23-30</MedlineDate>
             if (getPMID(cas).equals("11119751-c")) {
                 checkCount++;
-                assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-                assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-                assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-                assertTrue("Invalid PubDate", checkPubDate(cas, EXPECTED_DATE_2));
+                assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+                assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+                assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+                assertTrue(checkPubDate(cas, EXPECTED_DATE_2), "Invalid PubDate");
             }
 
             // check medline XML pub date: <MedlineDate>2000 Oct-2001
             // Mar</MedlineDate>
             if (getPMID(cas).equals("11119751-d")) {
                 checkCount++;
-                assertTrue("Invalid Authors", checkAuthors(cas, EXPECTED_AUTHORS));
-                assertTrue("Invalid GeneSymbol", checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS));
-                assertTrue("Invalid Journal", ckeckJournal(cas, EXPECTED_JOURNAL));
-                assertTrue("Invalid PubDate", checkPubDate(cas, EXPECTED_DATE_3));
+                assertTrue(checkAuthors(cas, EXPECTED_AUTHORS), "Invalid Authors");
+                assertTrue(checkGeneSymbols(cas, EXPECTED_GENE_SYMBOLS), "Invalid GeneSymbol");
+                assertTrue(ckeckJournal(cas, EXPECTED_JOURNAL), "Invalid Journal");
+                assertTrue(checkPubDate(cas, EXPECTED_DATE_3), "Invalid PubDate");
             }
 
             if (getPMID(cas).equals("11119751-e")) {
@@ -394,22 +396,22 @@ private void checkElements() {
             // (documentText should be equal to title in this case)
             if (getPMID(cas).equals("17276851")) {
                 checkCount++;
-                assertTrue("Invalid Document Title", checkTitle(cas, EXPECTED_TITLE_2));
-                assertTrue("Invalid Document Text", checkDocumentText(cas, EXPECTED_TITLE_2));
+                assertTrue(checkTitle(cas, EXPECTED_TITLE_2), "Invalid Document Title");
+                assertTrue(checkDocumentText(cas, EXPECTED_TITLE_2), "Invalid Document Text");
             }
 
             // PubMed has changed the XML element ForeName to FirstName, but
             // foreName should still be supported
             if (getPMID(cas).equals("18439884")) {
                 checkCount++;
-                assertTrue("Invalid foreName", checkForeNames(cas, EXPECTED_FORE_NAMES));
+                assertTrue(checkForeNames(cas, EXPECTED_FORE_NAMES), "Invalid foreName");
                 checkJournalTitle(cas, EXPECTED_JOURNAL_TITLE);
             }
 
             if (getPMID(cas).equals("17306504")) {
                 checkCount++;
-                assertTrue("Invalid pubTypeList", checkPubTypeList(cas, EXPECTED_PUBTYPES));
-                assertTrue("Invalid DOI in document " + getPMID(cas), checkDoi(cas, EXPECTED_DOI));
+                assertTrue(checkPubTypeList(cas, EXPECTED_PUBTYPES), "Invalid pubTypeList");
+                assertTrue(checkDoi(cas, EXPECTED_DOI), "Invalid DOI in document " + getPMID(cas));
             }
         }
         assertEquals(9, checkCount);
@@ -491,7 +493,7 @@ private boolean checkAbstractText(CAS cas, String abstractTextString2) {
      *
      * @param cas
      *            The CAS
-     * @param title
+     * @param expectedTitle
      *            The correct title
      * @return true if the correct title is contained in the CAS
      */
@@ -896,7 +898,7 @@ private boolean checkAuthors(CAS cas, String[][] authors) {
      * Check if foreName was correctly parsed (PubMed changed firstName to foreName, but both should be supported)
      *
      * @param cas
-     * @param foreName
+     * @param foreNames
      * @return
      */
     private boolean checkForeNames(CAS cas, String[] foreNames) {

From b0ca7688df47a8d7328d7f20b3f4dd568c5d5395 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Jun 2021 07:54:39 +0200
Subject: [PATCH 063/269] Fixed XMIDBReader tests which actually rely on TestNG
 for multiple, parallel execution of a single test. I don't now if and how
 this would work with Junit5.

---
 .../xmi/XmiDBMultiplierDifferentNsSchemaTest.java      |  8 ++++----
 .../julielab/jcore/reader/xmi/XmiDBMultiplierTest.java |  8 ++++----
 .../jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java  | 10 +++++-----
 .../reader/xmi/XmiDBReaderDifferentNsSchemaTest.java   | 10 +++++-----
 .../jcore/reader/xmi/XmiDBReaderGzippedDataTest.java   |  8 ++++----
 .../reader/xmi/XmiDBReaderMonolithicDocumentsTest.java |  8 ++++----
 .../de/julielab/jcore/reader/xmi/XmiDBReaderTest.java  | 10 +++++-----
 .../consumer/xmi/XmiDBWriterBinaryFormatTest.java      |  8 +++++---
 .../xmi/XmiDBWriterMonolithicDocumentTest.java         |  6 ++++--
 .../julielab/jcore/consumer/xmi/XmiDBWriterTest.java   |  6 ++++--
 10 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
index f7fa5f19a..ff60e41a0 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
@@ -16,9 +16,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -35,7 +35,7 @@ public class XmiDBMultiplierDifferentNsSchemaTest {
     private static String costosysConfig;
     private static int subsetCounter;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -49,7 +49,7 @@ public static void setup() throws UIMAException, IOException, ConfigurationExcep
         subsetCounter = 0;
     }
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
index c2398d503..2af097f43 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
@@ -16,9 +16,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -35,7 +35,7 @@ public class XmiDBMultiplierTest {
     private static String costosysConfig;
     private static int subsetCounter;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -49,7 +49,7 @@ public static void setup() throws UIMAException, IOException, ConfigurationExcep
         subsetCounter = 0;
     }
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
index 51d66d493..d2fc88444 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
@@ -30,7 +30,7 @@ public class XmiDBReaderBinaryFormatTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -38,7 +38,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_text", 2, postgres);
         XmiDBSetupHelper.processAndSplitData(costosysConfig, false, true,"public");
-        assertTrue("The data document table exists", dbc.withConnectionQueryBoolean(c -> c.tableExists("_data.documents")));
+        assertTrue(dbc.withConnectionQueryBoolean(c -> c.tableExists("_data.documents")), "The data document table exists");
         xmisubset = "xmisubset";
         dbc.setActiveTableSchema("xmi_text");
         dbc.reserveConnection();
@@ -48,7 +48,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
     }
 
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
index 39b2639f0..d592bec9e 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
@@ -30,7 +30,7 @@ public class XmiDBReaderDifferentNsSchemaTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -38,7 +38,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_text", 2, postgres);
         XmiDBSetupHelper.processAndSplitData(costosysConfig, false, false, "someotherschema");
-        assertTrue("The data document table exists", dbc.withConnectionQueryBoolean(c -> c.tableExists("_data.documents")));
+        assertTrue(dbc.withConnectionQueryBoolean(c -> c.tableExists("_data.documents")), "The data document table exists");
         xmisubset = "xmisubset";
         dbc.setActiveTableSchema("xmi_text");
         dbc.reserveConnection();
@@ -48,7 +48,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
     }
 
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
index 018170026..9a7fea0b3 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -35,7 +35,7 @@ public class XmiDBReaderGzippedDataTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -53,7 +53,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.close();
     }
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
index decd4e840..e0ae7f3ed 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
@@ -12,9 +12,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.File;
@@ -32,7 +32,7 @@ public class XmiDBReaderMonolithicDocumentsTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -50,7 +50,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc.close();
     }
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
index 1f8150274..cf1d089ef 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
@@ -14,9 +14,9 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
-import org.junit.jupiter.api.AfterAll;
-import org.junit.jupiter.api.BeforeAll;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import java.io.IOException;
@@ -32,7 +32,7 @@ public class XmiDBReaderTest {
     private static String costosysConfig;
     private static String xmisubset;
 
-    @BeforeAll
+    @BeforeClass
     public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
@@ -40,7 +40,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_text", 2, postgres);
         XmiDBSetupHelper.processAndSplitData(costosysConfig, false, false,"public");
-        assertTrue("The data document table exists", dbc.withConnectionQueryBoolean(c -> c.tableExists("_data.documents")));
+        assertTrue(dbc.withConnectionQueryBoolean(c -> c.tableExists("_data.documents")), "The data document table exists");
         xmisubset = "xmisubset";
         dbc.setActiveTableSchema("xmi_text");
         dbc.reserveConnection();
@@ -50,7 +50,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
     }
 
 
-    @AfterAll
+    @AfterClass
     public static void shutdown() {
         postgres.close();
     }
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
index 4df9efaaa..15b5fc5c9 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
@@ -15,12 +15,13 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.StringArray;
-import org.junit.ClassRule;
-import org.junit.Test;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
@@ -38,8 +39,9 @@
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+@Testcontainers
 public class XmiDBWriterBinaryFormatTest {
-    @ClassRule
+    @Container
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
     private static String costosysConfig;
     private static String xmlSubsetTable;
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
index 84e35a027..6af2d578d 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
@@ -14,11 +14,12 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.ClassRule;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.ByteArrayInputStream;
 import java.sql.ResultSet;
@@ -27,8 +28,9 @@
 import static org.assertj.core.api.Assertions.assertThatCode;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+@Testcontainers
 public class XmiDBWriterMonolithicDocumentTest {
-    @ClassRule
+    @Container
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
     private static String costosysConfig;
     private static DataBaseConnector dbc;
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
index 00230cda6..5f3a979bb 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
@@ -11,11 +11,12 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.ClassRule;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.IOException;
 import java.sql.SQLException;
@@ -27,8 +28,9 @@
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.assertj.core.api.Assertions.assertThatCode;
 
+@Testcontainers
 public class XmiDBWriterTest {
-    @ClassRule
+    @Container
     public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
     private static String costosysConfig;
     private static String xmlSubsetTable;

From 930850c85973f9affad3a2a23fe36be0fc6f1283 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Jun 2021 08:52:30 +0200
Subject: [PATCH 064/269] Added advanced logic to find the fitting Python
 executable for the flair components.

---
 .../jcore/ae/flairner/FlairNerAnnotator.java  | 33 +++++++++++++-
 .../ae/fte/FlairTokenEmbeddingAnnotator.java  | 45 +++++++++++++++----
 .../fte/FlairTokenEmbeddingAnnotatorTest.java |  7 ++-
 3 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 4aea01797..8ce44a6f5 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -1,5 +1,6 @@
 package de.julielab.jcore.ae.flairner;
 
+import de.julielab.java.utilities.IOStreamUtilities;
 import de.julielab.jcore.ae.annotationadder.AnnotationAdderAnnotator;
 import de.julielab.jcore.ae.annotationadder.AnnotationAdderConfiguration;
 import de.julielab.jcore.ae.annotationadder.AnnotationAdderHelper;
@@ -30,6 +31,8 @@
 import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
@@ -95,9 +98,35 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
             pythonExecutable = pythonExecutableOpt.get();
             log.info("Python executable: {} (from descriptor)", pythonExecutable);
         }
+        List<String> pythonCommands = List.of("python3", "python3.6", "python36", "python3.7", "python37", "python");
+        for (int i = 0; i < pythonCommands.size() && pythonExecutable == null; i++) {
+            String currentPythonExecutable = pythonCommands.get(i);
+            log.debug("Trying Python executable: {}", currentPythonExecutable);
+            try {
+                try {
+                    Process exec = new ProcessBuilder(List.of(currentPythonExecutable, "--version")).redirectErrorStream(true).start();
+                    List<String> pythonOutput = IOStreamUtilities.getLinesFromInputStream(exec.getInputStream());
+                    int exitCode = exec.waitFor();
+                    if (exitCode == 0 && !pythonOutput.isEmpty()) {
+                        String versionLine = pythonOutput.get(0);
+                        Matcher m = Pattern.compile("3\\..*$").matcher(versionLine);
+                        if (m.find()) {
+                            pythonExecutable = currentPythonExecutable;
+                            log.info("Found Python {} with command {}.", m.group(), pythonExecutable);
+                        }
+                    }
+                } catch (IOException e) {
+                    log.trace("Python command {} does not exist. Trying the next.", currentPythonExecutable);
+                }
+            } catch (InterruptedException e) {
+                log.error("Error why trying to call python.", e);
+                throw new ResourceInitializationException(e);
+            }
+        }
         if (pythonExecutable == null) {
-            pythonExecutable = "python";
-            log.info("Python executable: {} (default)", pythonExecutable);
+            String msg = String.format("Could not find Python 3.x installation. The following commands were tried: %s. Please make Python 3.x available under one of those commands or specify the Python executable explicitly in the component descriptor.", String.join(", ", pythonCommands));
+            log.error(msg);
+            throw new ResourceInitializationException(new IllegalArgumentException(msg));
         }
         try {
             connector = new StdioPythonConnector(flairModel, pythonExecutable, storeEmbeddings, gpuNum);
diff --git a/jcore-flair-token-embedding-ae/src/main/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotator.java b/jcore-flair-token-embedding-ae/src/main/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotator.java
index d41381bc9..a268d48fd 100644
--- a/jcore-flair-token-embedding-ae/src/main/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotator.java
+++ b/jcore-flair-token-embedding-ae/src/main/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotator.java
@@ -4,6 +4,7 @@
 import de.julielab.ipc.javabridge.Options;
 import de.julielab.ipc.javabridge.ResultDecoders;
 import de.julielab.ipc.javabridge.StdioBridge;
+import de.julielab.java.utilities.IOStreamUtilities;
 import de.julielab.jcore.types.EmbeddingVector;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
@@ -30,6 +31,8 @@
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 @ResourceMetaData(name = "JCoRe Flair Token Embedding Annotator", description = "Adds the Flair compatible embedding vectors to the token annotations.")
 @TypeCapability(inputs = {"de.julielab.jcore.types.Sentence", "de.julielab.jcore.types.Token"}, outputs = {"de.julielab.jcore.types.EmbeddingVector"})
@@ -37,7 +40,7 @@ public class FlairTokenEmbeddingAnnotator extends JCasAnnotator_ImplBase {
 
     public static final String PARAM_EMBEDDING_PATH = "EmbeddingPath";
     public static final String PARAM_COMPUTATION_FILTER = "ComputationFilter";
-    public static final String PARAM_EMBEDDING_SOURCE  = "EmbeddingSource";
+    public static final String PARAM_EMBEDDING_SOURCE = "EmbeddingSource";
     public static final String PARAM_PYTHON_EXECUTABLE = "PythonExecutable";
     private final static Logger log = LoggerFactory.getLogger(FlairTokenEmbeddingAnnotator.class);
     /**
@@ -48,9 +51,9 @@ public class FlairTokenEmbeddingAnnotator extends JCasAnnotator_ImplBase {
     private String embeddingPath;
     @ConfigurationParameter(name = PARAM_COMPUTATION_FILTER, mandatory = false, description = "This parameter may be set to a fully qualified annotation type. If given, only for documents containing at least one annotation of this type embeddings will be retrieved from the computing flair python script. However, for contextualized embeddings, all embedding vectors are computed anyway and the the I/O cost is minor in comparison to the embedding computation. Thus, setting this parameter will most probably only result in small time savings.")
     private String computationFilter;
-    @ConfigurationParameter(name=PARAM_EMBEDDING_SOURCE, mandatory =  false, description = "The value of this parameter will be set to the source feature of the EmbeddingVector annotation instance created on the tokens. If left blank, the value of the " + PARAM_EMBEDDING_PATH + " will be used.")
+    @ConfigurationParameter(name = PARAM_EMBEDDING_SOURCE, mandatory = false, description = "The value of this parameter will be set to the source feature of the EmbeddingVector annotation instance created on the tokens. If left blank, the value of the " + PARAM_EMBEDDING_PATH + " will be used.")
     private String embeddingSource;
-    @ConfigurationParameter(name=PARAM_PYTHON_EXECUTABLE, mandatory = false, description = "The path to the python executable. Required is a python verion >=3.6.")
+    @ConfigurationParameter(name = PARAM_PYTHON_EXECUTABLE, mandatory = false, description = "The path to the python executable. Required is a python version >=3.6.")
     private String pythonExecutable;
     private StdioBridge<byte[]> flairBridge;
     private Gson gson;
@@ -68,9 +71,9 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
         computationFilter = (String) aContext.getConfigParameterValue(PARAM_COMPUTATION_FILTER);
         embeddingSource = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_EMBEDDING_SOURCE)).orElse(embeddingPath);
 
-        Optional<String>  pythonExecutableOpt = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_PYTHON_EXECUTABLE));
+        Optional<String> pythonExecutableOpt = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_PYTHON_EXECUTABLE));
         if (!pythonExecutableOpt.isPresent()) {
-            log.debug("No python executable given in the component descriptor, trying to read PYTHON environment variable." );
+            log.debug("No Python executable given in the component descriptor, trying to read PYTHON environment variable.");
             final String pythonExecutableEnv = System.getenv("PYTHON");
             if (pythonExecutableEnv != null) {
                 pythonExecutable = pythonExecutableEnv;
@@ -80,9 +83,35 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
             pythonExecutable = pythonExecutableOpt.get();
             log.info("Python executable: {} (from descriptor)", pythonExecutable);
         }
+        List<String> pythonCommands = List.of("python3", "python3.6", "python36", "python3.7", "python37", "python");
+        for (int i = 0; i < pythonCommands.size() && pythonExecutable == null; i++) {
+            String currentPythonExecutable = pythonCommands.get(i);
+            log.debug("Trying Python executable: {}", currentPythonExecutable);
+            try {
+                try {
+                    Process exec = new ProcessBuilder(List.of(currentPythonExecutable, "--version")).redirectErrorStream(true).start();
+                    List<String> pythonOutput = IOStreamUtilities.getLinesFromInputStream(exec.getInputStream());
+                    int exitCode = exec.waitFor();
+                    if (exitCode == 0 && !pythonOutput.isEmpty()) {
+                        String versionLine = pythonOutput.get(0);
+                        Matcher m = Pattern.compile("3\\..*$").matcher(versionLine);
+                        if (m.find()) {
+                            pythonExecutable = currentPythonExecutable;
+                            log.info("Found Python {} with command {}.", m.group(), pythonExecutable);
+                        }
+                    }
+                } catch (IOException e) {
+                    log.trace("Python command {} does not exist. Trying the next.", currentPythonExecutable);
+                }
+            } catch (InterruptedException e) {
+                log.error("Error why trying to call python.", e);
+                throw new ResourceInitializationException(e);
+            }
+        }
         if (pythonExecutable == null) {
-            pythonExecutable = "python3.6";
-            log.info("Python executable: {} (default)", pythonExecutable);
+            String msg = String.format("Could not find Python 3.x installation. The following commands were tried: %s. Please make Python 3.x available under one of those commands or specify the Python executable explicitly in the component descriptor.", String.join(", ", pythonCommands));
+            log.error(msg);
+            throw new ResourceInitializationException(new IllegalArgumentException(msg));
         }
 
         try {
@@ -183,7 +212,7 @@ private String constructEmbeddingRequest(JCas aJCas, List<Token> tokenToAddEmbed
                 }
                 ++tokenIndex;
             }
-            sentenceTextSb.deleteCharAt(sentenceTextSb.length()-1);
+            sentenceTextSb.deleteCharAt(sentenceTextSb.length() - 1);
             Map<String, Object> sentenceAndIndices = new HashMap<>();
             sentenceAndIndices.put("sentence", sentenceTextSb.toString());
             sentenceAndIndices.put("tokenIndicesToReturn", tokenIndicesToSet);
diff --git a/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java b/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java
index d67615d3e..f6ef8acce 100644
--- a/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java
+++ b/jcore-flair-token-embedding-ae/src/test/java/de/julielab/jcore/ae/fte/FlairTokenEmbeddingAnnotatorTest.java
@@ -18,6 +18,7 @@
  * Unit tests for jcore-flair-token-embedding-ae.
  */
 public class FlairTokenEmbeddingAnnotatorTest {
+
     @Test
     public void testEmbeddingAnnotator() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
@@ -30,8 +31,7 @@ public void testEmbeddingAnnotator() throws Exception {
 
         final String embeddingPath = "flair:src/test/resources/gene_small_best_lm.pt";
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.fte.desc.jcore-flair-token-embedding-ae",
-                FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath,
-                FlairTokenEmbeddingAnnotator.PARAM_PYTHON_EXECUTABLE, "python");
+                FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath);
 
         engine.process(jCas);
 
@@ -62,8 +62,7 @@ public void testEmbeddingAnnotatorWithFilterAnnotation() throws Exception {
         final String embeddingPath = "flair:src/test/resources/gene_small_best_lm.pt";
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.fte.desc.jcore-flair-token-embedding-ae",
                 FlairTokenEmbeddingAnnotator.PARAM_EMBEDDING_PATH, embeddingPath,
-                FlairTokenEmbeddingAnnotator.PARAM_COMPUTATION_FILTER, "de.julielab.jcore.types.Gene",
-                FlairTokenEmbeddingAnnotator.PARAM_PYTHON_EXECUTABLE, "python");
+                FlairTokenEmbeddingAnnotator.PARAM_COMPUTATION_FILTER, "de.julielab.jcore.types.Gene");
 
         engine.process(jCas);
 

From b14b0725a9a9a3c779a433b02105c1c65e718c86 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Jun 2021 10:16:02 +0200
Subject: [PATCH 065/269] Documentation updates. Travis CI is working.
 Finishing the current line of work.

Fixes #120,#119,#118.
---
 README.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 9035ccbb1..79c1fbc99 100644
--- a/README.md
+++ b/README.md
@@ -12,24 +12,29 @@ In order to automate the builds of complex NLP pipelines and properly represent
 A description for each individual component can be found in their respective `README.md`.
 
 ### Requirements & Dependencies
-In order to use our components you need at least [JDK 11](https://www.oracle.com/technetwork/java/javase/downloads/jdk11-downloads-5066655.html) (Java SE Development Kit 11), [UIMA 2.10](https://uima.apache.org/index.html) & [Maven 3](https://maven.apache.org/). We develop with the [Eclipse IDE for Java Developers](http://www.eclipse.org/downloads/) and [IntelliJ IDEA](https://www.jetbrains.com/idea/) Java IDEs. If course you're free to try it with different versions or tools than those mentioned, but we can't make promises for a flawless functioning of our components in these cases.
+In order to use our components you need at least [JDK 11](https://www.oracle.com/technetwork/java/javase/downloads/jdk11-downloads-5066655.html) (Java SE Development Kit 11), [UIMA 2.x](https://uima.apache.org/index.html) & [Maven 3](https://maven.apache.org/). We develop with the [Eclipse IDE for Java Developers](http://www.eclipse.org/downloads/) and [IntelliJ IDEA](https://www.jetbrains.com/idea/) Java IDEs. If course you're free to try it with different versions or tools than those mentioned, but we can't make promises for a flawless functioning of our components in these cases.
 
 ### UIMA's Collection Processing Engine (CPE)
-UIMA features a relatively easy way to combine UIMA components together in order to analyze a collection of artifacts. If you're not firm or willing to deal with Java Code, the usage of a CPE might be the right choice.
+UIMA offers a relatively easy way to combine UIMA components together in order to analyze a collection of artifacts. If you're not firm or willing to deal with Java Code, the usage of a CPE might be the right choice.
 For more detailed information see [UIMA's CPE Documentation](https://uima.apache.org/downloads/releaseDocs/2.1.0-incubating/docs/html/tutorials_and_users_guides/tutorials_and_users_guides.html#ugr.tug.cpe).
 
-We're also working on a simple [Python script](https://github.com/JULIELab/jcore-misc/tree/master/jcore-cpe-builder) that builds rudimentary and preconfigured CPEs of your choice. It's working but still work in progress so please bear with us and post issues.
+A newer alternative is [UIMA AS](https://uima.apache.org/doc-uimaas-what.html). It is today's officially recommended way to use and scale UIMA pipelines. Our existing CPE infrastructure serves us well, however, so we mostly stick to those for the time being.
+
+### JCoRe UIMA Pipeline Builder
+
+Most CPE configurations consisting of JCoRe components can be easily built using the [JCoRe UIMA Pipeline Builder](https://github.com/JULIELab/jcore-pipeline-modules).
+This is a Java program that offers a simple command line interface for the creation of CPEs. There is also support for UIMA AS.
 
 ### Maven Artifacts
 If not stated otherwise, all the components found in this project are at least in their latest release version also available as Maven artifacts:
 ```
 <dependency>
     <groupId>de.julielab</groupId>
-    <artifactId>#COMPONENT-NAME</artifactId>
+    <artifactId>COMPONENT-NAME</artifactId>
     <version>${jcore-version}</version>
 </dependency>
 ```
-Where `#COMPONENT-NAME` is exactly the same as the name on GitHub.
+Where `COMPONENT-NAME` is exactly the same as the name on GitHub.
 For instance, to get the Acronym Resolver, include this in your Maven dependencies:
 ```
 <dependency>

From 2317cfa0011351f23ea6df834b068daf7fe95b93 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Jun 2021 15:17:41 +0200
Subject: [PATCH 066/269] Adding trace-level logging to the
 `AnnotationDefinedFlow` in order to see the flow when actually running all
 components together.

---
 .../AnnotationDefinedFlow.java                | 24 +++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
index 0243a7f36..e93616aab 100644
--- a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
@@ -1,14 +1,18 @@
 package de.julielab.jcore.flow.annotationdefined;
 
 import de.julielab.jcore.types.casflow.ToVisit;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.metadata.FixedFlow;
 import org.apache.uima.analysis_engine.metadata.FlowConstraints;
+import org.apache.uima.cas.CASException;
 import org.apache.uima.flow.FinalStep;
 import org.apache.uima.flow.JCasFlow_ImplBase;
 import org.apache.uima.flow.SimpleStep;
 import org.apache.uima.flow.Step;
 import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * <p>Returns steps according an existing {@link ToVisit} annotation of the CAS or, if not present, the default aggregate flow.</p>
@@ -16,6 +20,7 @@
  * the currently read document already exists in the database.</p>
  */
 public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
+    private final static Logger log = LoggerFactory.getLogger(AnnotationDefinedFlow.class);
     private String[] toVisitKeys;
     private String[] fixedFlow;
     private int currentPos;
@@ -24,7 +29,8 @@ public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
      * <p>Creates a flow that follows to entries in {@link ToVisit#getDelegateKeys()} of <tt>toVisit</tt> or, if
      * <tt>toVisit</tt> is null, falls back to the default fixed flow.</p>
      * <p>If <tt>toVisit</tt> is not null but the <tt>delegateKeys</tt> are null or empty, no component in the aggregate using this flow will process the respective CAS.</p>
-     * @param toVisit An annotation containing the keys of the delegate AEs to visit. May be null which case the default fixed flow will be used.
+     *
+     * @param toVisit         An annotation containing the keys of the delegate AEs to visit. May be null which case the default fixed flow will be used.
      * @param flowConstraints The default fixed flow of the aggregate analysis engine.
      * @throws AnalysisEngineProcessException If <tt>flowConstraints</tt> is not a fixed flow.
      */
@@ -36,7 +42,21 @@ public AnnotationDefinedFlow(@Nullable ToVisit toVisit, FlowConstraints flowCons
         // 1. There are given keys to visit, use them.
         // 2. There are no keys given but the ToVisit annotation is not null, skip all components.
         // 3. There is not ToVisit annotation at all, use the default fixed flow.
-        if(toVisit != null && toVisit.getDelegateKeys() != null)
+        if (log.isTraceEnabled()) {
+            try {
+            String docId = JCoReTools.getDocId(toVisit.getCAS().getJCas());
+                if (toVisit != null) {
+                    String[] delegateKeys = toVisit.getDelegateKeys() != null ? toVisit.getDelegateKeys().toArray() : null;
+                    log.trace("Found ToVisit annotation for document {} with the following component keys: {}", docId, delegateKeys);
+                } else {
+                    log.trace("Got no ToVisit annotation for document {}.", docId);
+                }
+            } catch (CASException e) {
+                log.error("Could not retrieve JCas from ToVisit annotation.", e);
+                throw new AnalysisEngineProcessException(e);
+            }
+        }
+        if (toVisit != null && toVisit.getDelegateKeys() != null)
             toVisitKeys = toVisit.getDelegateKeys().toArray();
         else if (toVisit != null)
             toVisitKeys = new String[0];

From 362255c1c75e7d8c01d1036020a5aacc06c6ab6a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 10 Jun 2021 17:48:20 +0200
Subject: [PATCH 067/269] Created the a project for an integration test with
 XML to XMI database writing with hash comparison and flow control.

Fixed a few bugs on the way, now everything is working fine.
---
 .../jcore/ae/checkpoint/DBCheckpointAE.java   |   1 +
 .../jcore/reader/db/DBMultiplier.java         |   4 +
 .../jcore/reader/db/DBMultiplierReader.java   |   5 +
 .../jcore/reader/db/DBReaderBase.java         |   2 +
 .../AnnotationDefinedFlow.java                |  37 +-
 .../AnnotationDefinedFlowController.java      |   2 +-
 jcore-jedis-integration-tests/pom.xml         |  64 +++
 .../UpdateWithHashComparison.java             | 257 ++++++++++
 .../src/test/resources/logback-test.xml       |  19 +
 .../src/test/resources/medlineMappingFile.xml | 457 ++++++++++++++++++
 .../pubmed21n1016_excerpt_original.xml.gz     | Bin 0 -> 3038 bytes
 ...ed21n1016_excerpt_partially_changed.xml.gz | Bin 0 -> 3075 bytes
 .../src/test/resources/pubmedMappingFile.xml  | 436 +++++++++++++++++
 .../jcore/consumer/xmi/XMIDBWriter.java       |  17 +-
 .../jcore/reader/xml/XMLDBMultiplier.java     |   8 +-
 .../jcore/reader/xml/XMLDBMultiplierTest.java |   2 +-
 pom.xml                                       |   3 +-
 17 files changed, 1285 insertions(+), 29 deletions(-)
 create mode 100644 jcore-jedis-integration-tests/pom.xml
 create mode 100644 jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java
 create mode 100644 jcore-jedis-integration-tests/src/test/resources/logback-test.xml
 create mode 100644 jcore-jedis-integration-tests/src/test/resources/medlineMappingFile.xml
 create mode 100644 jcore-jedis-integration-tests/src/test/resources/pubmed21n1016_excerpt_original.xml.gz
 create mode 100644 jcore-jedis-integration-tests/src/test/resources/pubmed21n1016_excerpt_partially_changed.xml.gz
 create mode 100644 jcore-jedis-integration-tests/src/test/resources/pubmedMappingFile.xml

diff --git a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
index 1a70c23cd..264c32999 100644
--- a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
+++ b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
@@ -69,6 +69,7 @@ public class DBCheckpointAE extends JCasAnnotator_ImplBase {
      */
     @Override
     public void initialize(final UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
         componentDbName = (String) aContext.getConfigParameterValue(PARAM_CHECKPOINT_NAME);
         dbcConfigPath = (String) aContext.getConfigParameterValue(PARAM_COSTOSYS_CONFIG);
         indicateFinished = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_INDICATE_FINISHED)).orElse(false);
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
index 17040c15e..c83fcaebb 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
@@ -72,6 +72,10 @@ private DataBaseConnector getDataBaseConnector(String costosysConfig) throws Ana
     @Override
     public void process(JCas aJCas) throws AnalysisEngineProcessException {
         RowBatch rowbatch = JCasUtil.selectSingle(aJCas, RowBatch.class);
+        if (rowbatch.getIdentifiers() == null)
+            throw new AnalysisEngineProcessException(new IllegalArgumentException("The identifiers of the passed row batch are null."));
+        if (rowbatch.getIdentifiers().size() == 0)
+            throw new AnalysisEngineProcessException(new IllegalArgumentException("The identifiers of the passed row batch are empty."));
         tables = rowbatch.getTables().toStringArray();
         schemaNames = rowbatch.getTableSchemas().toStringArray();
         tableName = rowbatch.getTableName();
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
index 83370feae..bfe474de8 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
@@ -49,6 +49,9 @@ public class DBMultiplierReader extends DBSubsetReader {
     @Override
     public void initialize(UimaContext context) throws ResourceInitializationException {
         super.initialize(context);
+        // reset the state in case of reconfigure()
+        retriever = null;
+        dataTableDocumentIds = null;
 
         // Check whether a subset table name or a data table name was given.
         if (readDataTable) {
@@ -65,6 +68,8 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
     public void getNext(JCas jCas) throws CollectionException {
         log.trace("Requesting next batch of document IDs from the database.");
         List<Object[]> idList = getNextDocumentIdBatch();
+        if (idList.isEmpty())
+            throw new CollectionException(new IllegalStateException("There are no documents to read in the database. Please call hasNext() to check if there is more data to read."));
         log.trace("Received a list of {} ID from the database.", idList.size());
         RowBatch rowbatch = new RowBatch(jCas);
         FSArray ids = new FSArray(jCas, idList.size());
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReaderBase.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReaderBase.java
index 082909cb5..c46d6a105 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReaderBase.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReaderBase.java
@@ -95,6 +95,8 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
         } catch (FileNotFoundException e) {
             throw new ResourceInitializationException(e);
         }
+
+        numberFetchedDocIDs = 0;
     }
 
     private void checkTableExists() throws ResourceInitializationException {
diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
index e93616aab..c945ef0eb 100644
--- a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlow.java
@@ -5,15 +5,19 @@
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.metadata.FixedFlow;
 import org.apache.uima.analysis_engine.metadata.FlowConstraints;
-import org.apache.uima.cas.CASException;
 import org.apache.uima.flow.FinalStep;
 import org.apache.uima.flow.JCasFlow_ImplBase;
 import org.apache.uima.flow.SimpleStep;
 import org.apache.uima.flow.Step;
+import org.apache.uima.jcas.JCas;
 import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.Arrays;
+import java.util.Set;
+import java.util.stream.Collectors;
+
 /**
  * <p>Returns steps according an existing {@link ToVisit} annotation of the CAS or, if not present, the default aggregate flow.</p>
  * <p>This is, for example, used by the <tt>XMLDBMultiplier</tt> to let CASes skip large parts of the pipeline when
@@ -24,6 +28,7 @@ public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
     private String[] toVisitKeys;
     private String[] fixedFlow;
     private int currentPos;
+    private String docId;
 
     /**
      * <p>Creates a flow that follows to entries in {@link ToVisit#getDelegateKeys()} of <tt>toVisit</tt> or, if
@@ -32,9 +37,10 @@ public class AnnotationDefinedFlow extends JCasFlow_ImplBase {
      *
      * @param toVisit         An annotation containing the keys of the delegate AEs to visit. May be null which case the default fixed flow will be used.
      * @param flowConstraints The default fixed flow of the aggregate analysis engine.
+     * @param jCas
      * @throws AnalysisEngineProcessException If <tt>flowConstraints</tt> is not a fixed flow.
      */
-    public AnnotationDefinedFlow(@Nullable ToVisit toVisit, FlowConstraints flowConstraints) throws AnalysisEngineProcessException {
+    public AnnotationDefinedFlow(@Nullable ToVisit toVisit, FlowConstraints flowConstraints, JCas jCas) throws AnalysisEngineProcessException {
         if (!(flowConstraints instanceof FixedFlow))
             throw new AnalysisEngineProcessException(new IllegalArgumentException("This flow requires the FixedFlow to determine the default processing order. However, the flow constraints are of type " + flowConstraints.getClass().getCanonicalName()));
         this.fixedFlow = ((FixedFlow) flowConstraints).getFixedFlow();
@@ -43,22 +49,19 @@ public AnnotationDefinedFlow(@Nullable ToVisit toVisit, FlowConstraints flowCons
         // 2. There are no keys given but the ToVisit annotation is not null, skip all components.
         // 3. There is not ToVisit annotation at all, use the default fixed flow.
         if (log.isTraceEnabled()) {
-            try {
-            String docId = JCoReTools.getDocId(toVisit.getCAS().getJCas());
-                if (toVisit != null) {
-                    String[] delegateKeys = toVisit.getDelegateKeys() != null ? toVisit.getDelegateKeys().toArray() : null;
-                    log.trace("Found ToVisit annotation for document {} with the following component keys: {}", docId, delegateKeys);
-                } else {
-                    log.trace("Got no ToVisit annotation for document {}.", docId);
-                }
-            } catch (CASException e) {
-                log.error("Could not retrieve JCas from ToVisit annotation.", e);
-                throw new AnalysisEngineProcessException(e);
+            docId = JCoReTools.getDocId(jCas);
+            if (toVisit != null) {
+                String[] delegateKeys = toVisit.getDelegateKeys() != null ? toVisit.getDelegateKeys().toArray() : null;
+                log.trace("Found ToVisit annotation for document {} with the following component keys: {}", docId, delegateKeys);
+            } else {
+                log.trace("Got no ToVisit annotation for document {}, the CAS is routed through the aggregate in the default order.", docId);
             }
         }
-        if (toVisit != null && toVisit.getDelegateKeys() != null)
-            toVisitKeys = toVisit.getDelegateKeys().toArray();
-        else if (toVisit != null)
+        if (toVisit != null && toVisit.getDelegateKeys() != null) {
+            // filter for delegates actually contained in the current AAE.
+            Set<String> knownKeys = Arrays.stream(this.fixedFlow).collect(Collectors.toSet());
+            toVisitKeys = Arrays.stream(toVisit.getDelegateKeys().toArray()).filter(knownKeys::contains).toArray(String[]::new);
+        } else if (toVisit != null)
             toVisitKeys = new String[0];
         else
             toVisitKeys = null;
@@ -77,8 +80,10 @@ public Step next() {
         if ((toVisitKeys == null && currentPos < fixedFlow.length) || (toVisitKeys != null && currentPos < toVisitKeys.length)) {
             String nextAEKey = toVisitKeys != null ? toVisitKeys[currentPos] : fixedFlow[currentPos];
             ++currentPos;
+            log.trace("Next component key to visit for document {}: {}", docId, nextAEKey);
             return new SimpleStep(nextAEKey);
         }
+        log.trace("Flow finished for document {}.", docId);
         return new FinalStep();
     }
 }
diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
index 77a803e23..4158059a3 100644
--- a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
@@ -20,6 +20,6 @@ public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
         boolean exists = JCasUtil.exists(jCas, ToVisit.class);
         ToVisit toVisit = exists ? JCasUtil.selectSingle(jCas, ToVisit.class) : null;
         // When toVisit is null, the default, fixed flow is used.
-        return new AnnotationDefinedFlow(toVisit, getContext().getAggregateMetadata().getFlowConstraints());
+        return new AnnotationDefinedFlow(toVisit, getContext().getAggregateMetadata().getFlowConstraints(), jCas);
     }
 }
diff --git a/jcore-jedis-integration-tests/pom.xml b/jcore-jedis-integration-tests/pom.xml
new file mode 100644
index 000000000..2bcc39022
--- /dev/null
+++ b/jcore-jedis-integration-tests/pom.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <parent>
+        <artifactId>jedis-parent</artifactId>
+        <groupId>de.julielab</groupId>
+        <version>2.6.0-SNAPSHOT</version>
+        <relativePath>../jedis-parent</relativePath>
+    </parent>
+    <modelVersion>4.0.0</modelVersion>
+
+    <artifactId>jcore-jedis-integration-tests</artifactId>
+
+    <dependencies>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-xml-db-reader</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-xmi-db-writer</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-db-checkpoint-ae</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-flow-controllers</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>costosys</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-db-test-utilities</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java b/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java
new file mode 100644
index 000000000..52754055b
--- /dev/null
+++ b/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java
@@ -0,0 +1,257 @@
+package de.julielab.jcore.jedis.integrationtests;
+
+import de.julielab.costosys.dbconnection.DataBaseConnector;
+import de.julielab.costosys.dbconnection.SubsetStatus;
+import de.julielab.jcore.ae.checkpoint.DBCheckpointAE;
+import de.julielab.jcore.consumer.xmi.XMIDBWriter;
+import de.julielab.jcore.db.test.DBTestUtils;
+import de.julielab.jcore.flow.annotationdefined.AnnotationDefinedFlowController;
+import de.julielab.jcore.reader.db.DBMultiplierReader;
+import de.julielab.jcore.reader.xml.XMLDBMultiplier;
+import de.julielab.jcore.types.Annotation;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.factory.*;
+import org.apache.uima.flow.FlowControllerDescription;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Nested;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+
+import java.io.File;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@Testcontainers
+public class UpdateWithHashComparison {
+    private static final String SOURCE_XML_TABLE = "_data.source_xml_table";
+    private static final String TARGET_XMI_TABLE = "_data_xmi.target_xmi_table";
+    private static final String XML_SUBSET_TABLE = "test_subset";
+    private static final String XMI_MIRROR_TABLE = "test_xmi_mirror";
+    @Container
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
+    private static String costosysConfigSourceTable;
+    private static String costosysConfigTargetTable;
+    /**
+     * The collection reader that feeds the XMLDBMultiplier the database rows to read.
+     */
+    private static CollectionReader testCr;
+    /**
+     * The top-level aggregate containing the XMLDBMultiplier and two "child" aggregates, one for the analysis engines
+     * and one for the CAS consumers. In this test, the aggregate delegates are all realized by instances of {@link TestAnnotator}.
+     */
+    private static AnalysisEngine testAggregate;
+    private static JCas cas;
+    private static DataBaseConnector dbc;
+    private static List<String> namesOfRunComponents = new ArrayList<>();
+
+    @BeforeAll
+    public static void setup() throws Exception {
+        DBTestUtils.createAndSetHiddenConfig(Path.of("src", "test", "resources", "hiddenConfig").toString(), postgres);
+
+        dbc = DBTestUtils.getDataBaseConnector(postgres);
+        dbc.setActiveTableSchema("medline_2017");
+        costosysConfigSourceTable = DBTestUtils.createTestCostosysConfig("medline_2017", 1, postgres);
+        costosysConfigTargetTable = DBTestUtils.createTestCostosysConfig("xmi_text", 1, postgres);
+        new File(costosysConfigSourceTable).deleteOnExit();
+        new File(costosysConfigTargetTable).deleteOnExit();
+        prepareSourceXMLTable(dbc);
+        dbc.defineMirrorSubset(XML_SUBSET_TABLE, SOURCE_XML_TABLE, true, "Test subset");
+        assertThat(dbc.getNumRows(SOURCE_XML_TABLE)).isEqualTo(3);
+        createTestPipelineComponents();
+    }
+
+    @AfterAll
+    public static void shutdown() {
+        dbc.close();
+    }
+
+    private static void prepareSourceXMLTable(DataBaseConnector dbc) throws Exception {
+        dbc.createTable(SOURCE_XML_TABLE, "Test XML Table");
+        dbc.importFromXMLFile(Path.of("src", "test", "resources", "pubmed21n1016_excerpt_original.xml.gz").toString(), SOURCE_XML_TABLE);
+    }
+
+    /**
+     * <p>Creates test components in a structure that mimics the structure used by the <tt>jcore-pipeline-builder</tt>.</p>
+     * <p>This consists of:
+     * <ol>
+     *     <li>a <tt>CollectionReader</tt></li>
+     *     <li>an AAE containing all other components:
+     *     <ol>
+     *         <li>an optional <tt>CAS multiplier</tt></li>
+     *         <li>an aggregate containing all AEs</li>
+     *         <li>an aggregate containing all CAS consumers</li>
+     *     </ol>
+     *     </li>
+     *     The CAS consumers in this test consist of two "mock" CCs, a "real" XMI Writer and DB Checkpoint AE.
+     * </ol>
+     * We here want to test if we can successfully route the CAS through those inner AAEs when the multiplier adds
+     * the correct {@link de.julielab.jcore.types.casflow.ToVisit} annotation using a {@link de.julielab.jcore.flow.annotationdefined.AnnotationDefinedFlowController}.
+     * </p>
+     */
+    private static void createTestPipelineComponents() throws Exception {
+        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types", "de.julielab.jcore.types.jcore-xmi-splitter-types");
+
+        testCr = CollectionReaderFactory.createReader(DBMultiplierReader.class,
+                tsDesc,
+                DBMultiplierReader.PARAM_TABLE, XML_SUBSET_TABLE,
+                DBMultiplierReader.PARAM_RESET_TABLE, false,
+                DBMultiplierReader.PARAM_COSTOSYS_CONFIG_NAME, costosysConfigSourceTable,
+                // We set a batch size of 1 to have more refined testing.
+                // Otherwise, the multiplier would receive all 3 test documents at once and
+                // would process them all in one batch
+                DBMultiplierReader.PARAM_BATCH_SIZE, 1
+        );
+
+        AnalysisEngineDescription testAe1 = AnalysisEngineFactory.createEngineDescription(TestAnnotator.class, tsDesc, "name", "TestAE 1");
+        AnalysisEngineDescription testAe2 = AnalysisEngineFactory.createEngineDescription(TestAnnotator.class, tsDesc, "name", "TestAE 2");
+        AnalysisEngineDescription testCc1 = AnalysisEngineFactory.createEngineDescription(TestAnnotator.class, tsDesc, "name", "TestCC 1");
+        AnalysisEngineDescription testCc2 = AnalysisEngineFactory.createEngineDescription(TestAnnotator.class, tsDesc, "name", "TestCC 2");
+        AnalysisEngineDescription xmiDbWriter = AnalysisEngineFactory.createEngineDescription(XMIDBWriter.class,
+                XMIDBWriter.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
+                XMIDBWriter.PARAM_ANNOS_TO_STORE, new String[]{"de.julielab.jcore.types.Annotation"},
+                XMIDBWriter.PARAM_STORE_ALL, false,
+                XMIDBWriter.PARAM_STORE_BASE_DOCUMENT, true,
+                XMIDBWriter.PARAM_STORE_RECURSIVELY, false,
+                XMIDBWriter.PARAM_ADD_SHA_HASH, "document_text",
+                XMIDBWriter.PARAM_COSTOSYS_CONFIG, costosysConfigTargetTable,
+                XMIDBWriter.PARAM_UPDATE_MODE, true,
+                XMIDBWriter.PARAM_DO_GZIP, false
+        );
+        AnalysisEngineDescription dbCheckpointAe = AnalysisEngineFactory.createEngineDescription(DBCheckpointAE.class,
+                DBCheckpointAE.PARAM_CHECKPOINT_NAME, "end",
+                DBCheckpointAE.PARAM_COSTOSYS_CONFIG, costosysConfigSourceTable,
+                DBCheckpointAE.PARAM_INDICATE_FINISHED, true
+        );
+
+        FlowControllerDescription flowControllerDescription = FlowControllerFactory.createFlowControllerDescription(AnnotationDefinedFlowController.class);
+        AnalysisEngineDescription aeAaeDesc = AnalysisEngineFactory.createEngineDescription(List.of(testAe1, testAe2), List.of("TestAE 1", "TestAE 2"), null, null, flowControllerDescription);
+        AnalysisEngineDescription ccAaeDesc = AnalysisEngineFactory.createEngineDescription(List.of(testCc1, testCc2, xmiDbWriter, dbCheckpointAe), List.of("TestCC 1", "TestCC 2", "XMI Writer", "Checkpoint Writer"), null, null, flowControllerDescription);
+
+        AnalysisEngineDescription multiplierDescription = AnalysisEngineFactory.createEngineDescription(XMLDBMultiplier.class,
+                tsDesc,
+                XMLDBMultiplier.PARAM_MAPPING_FILE, Path.of("src", "test", "resources", "medlineMappingFile.xml").toString(),
+                // The core of this whole test: The components to be visited in case of matching hash codes.
+                // We want to skip all components except the checkpoint writer that marks the document as
+                // "processed" in the XML subset table
+                XMLDBMultiplier.PARAM_TO_VISIT_KEYS, new String[]{"Checkpoint Writer"},
+                // The next three parameters are required for the hash comparison
+                XMLDBMultiplier.PARAM_ADD_SHA_HASH, "document_text",
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text");
+
+        testAggregate = AnalysisEngineFactory.createEngine(List.of(multiplierDescription, aeAaeDesc, ccAaeDesc), List.of("multiplier", "AeAAE", "CcAAE"), null, null);
+
+        cas = JCasFactory.createJCas(tsDesc);
+    }
+
+    @Test
+    public void testInitialProcessingProcessing() throws Exception {
+        assertThat(testCr.hasNext());
+        while (testCr.hasNext()) {
+            testCr.getNext(cas.getCas());
+            testAggregate.process(cas);
+            // Check that all components have been visited as expected from a normal, fixed flow
+            assertThat(namesOfRunComponents).containsExactly("TestAE 1", "TestAE 2", "TestCC 1", "TestCC 2");
+            namesOfRunComponents.clear();
+            cas.reset();
+        }
+        testAggregate.collectionProcessComplete();
+        assertThat(dbc.tableExists(TARGET_XMI_TABLE));
+        // After this first processing, the XMI document table exists. We can now create a mirror on it. This is important
+        // because we want to see that the mirror is only reset for rows that have actually changed in subsequent tests.
+        dbc.defineMirrorSubset(XMI_MIRROR_TABLE, TARGET_XMI_TABLE, true, "The XMI test mirror table.", "xmi_text");
+        // We mark the XMI mirror subset as completely processed. This simulates a state where the initial batch of
+        // documents has been completely processed, before the update comes in.
+        dbc.markAsProcessed(XMI_MIRROR_TABLE);
+        SubsetStatus status = dbc.status(XML_SUBSET_TABLE, EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED, DataBaseConnector.StatusElement.IN_PROCESS));
+        // Check that all rows have been processed in the XML source subset table.
+        assertThat(status.isProcessed).isEqualTo(3);
+        assertThat(status.inProcess).isEqualTo(0);
+    }
+
+    /**
+     * Adds its name to {@link #namesOfRunComponents}.
+     */
+    public static class TestAnnotator extends JCasAnnotator_ImplBase {
+        @ConfigurationParameter(name = "name")
+        private String name;
+
+        @Override
+        public void initialize(UimaContext aContext) throws ResourceInitializationException {
+            super.initialize(aContext);
+            this.name = (String) aContext.getConfigParameterValue("name");
+        }
+
+        @Override
+        public void process(JCas jCas) {
+            namesOfRunComponents.add(name);
+            new Annotation(jCas).addToIndexes();
+        }
+    }
+
+    @Nested
+    class AfterInitialProcessing {
+        @Test
+        public void updateXML() throws Exception {
+            dbc.updateFromXML(Path.of("src", "test", "resources", "pubmed21n1016_excerpt_partially_changed.xml.gz").toString(), SOURCE_XML_TABLE, true);
+            // The update contains all three originally imported XML documents. Only that the second has not been changed.
+            // But the XML mirror should have been reset completely.
+            SubsetStatus status = dbc.status(XML_SUBSET_TABLE, EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED, DataBaseConnector.StatusElement.IN_PROCESS));
+            // Check that the XML mirror subset has been reset due to the update
+            assertThat(status.isProcessed).isEqualTo(0);
+            assertThat(status.inProcess).isEqualTo(0);
+        }
+
+        @Nested
+        class AfterUpdatingXML {
+            @Test
+            public void testOnlyNewDocumentsProcessed() throws Exception {
+
+                testCr.reconfigure();
+                testAggregate.reconfigure();
+                assertThat(testCr.hasNext()).withFailMessage("The XML DB Collection reader does not report any non-processed rows.").isTrue();
+                // Run the whole pipeline again. Only this time we only expect all the components run in a single case.
+                List<String> allNamesOfRunComponents = new ArrayList<>();
+                while (testCr.hasNext()) {
+                    cas.reset();
+                    testCr.getNext(cas.getCas());
+                    testAggregate.process(cas);
+                    // Check that all components have been visited as expected from a normal, fixed flow
+                    allNamesOfRunComponents.addAll(namesOfRunComponents);
+                    namesOfRunComponents.clear();
+                    cas.reset();
+                }
+                testAggregate.collectionProcessComplete();
+                // There should be only two components documents now that have visited all components
+                assertThat(allNamesOfRunComponents).containsExactly("TestAE 1", "TestAE 2", "TestCC 1", "TestCC 2", "TestAE 1", "TestAE 2", "TestCC 1", "TestCC 2");
+                testAggregate.collectionProcessComplete();
+                // Check again that all the XML documents have been processed.
+                SubsetStatus status = dbc.status(XML_SUBSET_TABLE, EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED));
+                // Check that all rows have been processed in the XML source subset table.
+                assertThat(status.isProcessed).isEqualTo(3);
+
+                // Now the more interesting part: In the XMI mirror there should now be two unprocessed tables, namely
+                // the two documents with a changed document text. The unchanged document should still be marked as
+                // processed.
+                SubsetStatus xmiMirrorStatus = dbc.status(XMI_MIRROR_TABLE, EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED));
+                // Check that all rows have been processed in the XML source subset table.
+                assertThat(xmiMirrorStatus.isProcessed).isEqualTo(1);
+            }
+        }
+    }
+}
diff --git a/jcore-jedis-integration-tests/src/test/resources/logback-test.xml b/jcore-jedis-integration-tests/src/test/resources/logback-test.xml
new file mode 100644
index 000000000..e2ec34c57
--- /dev/null
+++ b/jcore-jedis-integration-tests/src/test/resources/logback-test.xml
@@ -0,0 +1,19 @@
+<configuration>
+
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <!-- encoders are assigned the type
+             ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
+        </encoder>
+    </appender>
+
+    <logger name="de.julielab" level="DEBUG"/>
+    <logger name="de.julielab.jcore.flow.annotationdefined.AnnotationDefinedFlow" level="TRACE"/>
+    <logger name="de.julielab.jcore.reader.xml.XMLDBMultiplier" level="TRACE"/>
+    <logger name="de.julielab.jcore.consumer.xmi.XmiDataInserter" level="TRACE"/>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+</configuration>
\ No newline at end of file
diff --git a/jcore-jedis-integration-tests/src/test/resources/medlineMappingFile.xml b/jcore-jedis-integration-tests/src/test/resources/medlineMappingFile.xml
new file mode 100644
index 000000000..cd9892953
--- /dev/null
+++ b/jcore-jedis-integration-tests/src/test/resources/medlineMappingFile.xml
@@ -0,0 +1,457 @@
+<mappings>
+	<documentText>
+		<partOfDocumentText id="0">
+			<xPath>/MedlineCitation/Article/ArticleTitle</xPath>
+		</partOfDocumentText>
+		<partOfDocumentText id="1">
+			<xPath>/MedlineCitation/Article/Abstract</xPath>
+			<externalParser>de.julielab.jcore.reader.xmlmapper.mapper.StructuredAbstractParser
+			</externalParser>
+		</partOfDocumentText>
+		<partOfDocumentText id="2">
+			<xPath>/MedlineCitation/OtherAbstract</xPath>
+		</partOfDocumentText>
+		<partOfDocumentText id="3">
+			<xPath>/MedlineCitation/Article/VernacularTitle</xPath>
+		</partOfDocumentText>
+	</documentText>
+
+	<tsType>
+		<tsFullClassName>de.julielab.jcore.types.Title</tsFullClassName>
+		<offset>
+			<partOfDocumentText>
+				<id>0</id>
+			</partOfDocumentText>
+		</offset>
+		<tsFeature>
+			<tsFeatureName>titleType</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+			<valueMapping>
+				<default>document</default>
+			</valueMapping>
+		</tsFeature>
+	</tsType>
+
+	<tsType>
+		<tsFullClassName>
+			de.julielab.jcore.types.pubmed.AbstractText
+		</tsFullClassName>
+		<offset>
+			<partOfDocumentText>
+				<id>2</id>
+			</partOfDocumentText>
+		</offset>
+		<tsFeature>
+			<tsFeatureName>abstractType</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+			<valueMapping>
+				<default>other</default>
+			</valueMapping>
+		</tsFeature>
+	</tsType>
+
+    <tsType>
+        <tsFullClassName>de.julielab.jcore.types.Title</tsFullClassName>
+        <offset>
+            <partOfDocumentText>
+                <id>3</id>
+            </partOfDocumentText>
+        </offset>
+        <tsFeature>
+            <tsFeatureName>titleType</tsFeatureName>
+            <tsFullClassName>java.lang.String</tsFullClassName>
+            <valueMapping>
+                <default>document_vernacular</default>
+            </valueMapping>
+        </tsFeature>
+    </tsType>
+
+	<tsType>
+		<tsFullClassName>
+			de.julielab.jcore.types.pubmed.Header
+		</tsFullClassName>
+		<tsFeature>
+			<xPath>
+				/MedlineCitation/ArticleIdList/ArticleId[@IdType="doi"]
+			</xPath>
+			<tsFeatureName>doi</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<xPath>/MedlineCitation/PMID</xPath>
+			<tsFeatureName>docId</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<xPath>/MedlineCitation/@Status</xPath>
+			<tsFeatureName>citationStatus</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<xPath>
+				/MedlineCitation/Article/Language
+			</xPath>
+			<tsFeatureName>language</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+			<valueMapping>
+				<xmlElement>de</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>en</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>es</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>fr</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>it</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>pt</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>eng</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>ger</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>fre</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>ita</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<default>other</default>
+			</valueMapping>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>source</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+			<externalParser>
+				de.julielab.jcore.reader.xmlmapper.typeParser.SourceParser
+			</externalParser>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>authors</tsFeatureName>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<externalParser>de.julielab.jcore.reader.xmlmapper.typeParser.FSArrayParser
+			</externalParser>
+			<isType>true</isType>
+			<tsFeature>
+				<tsFeatureName>authorInfo</tsFeatureName>
+				<tsFullClassName>
+					de.julielab.jcore.types.AuthorInfo
+				</tsFullClassName>
+				<isType>true</isType>
+				<!-- Give us only author elements that have a LastName-child-element; 
+					sometimes there are general source elements 'CollectiveName' without particular 
+					persons and thus without real names. This leads to empty strings as author 
+					names in some applications, e.g. Semedico. -->
+				<xPath>
+					/MedlineCitation/Article/AuthorList/Author[LastName]
+				</xPath>
+				<tsFeature>
+					<tsFeatureName>foreName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>ForeName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>foreName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>FirstName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>lastName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>LastName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>initials</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>Initials</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>affiliation</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>
+						AffiliationInfo/Affiliation
+					</xPath>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeatureName>pubTypeList</tsFeatureName>
+			<isType>true</isType>
+			<tsFeature>
+				<tsFullClassName>
+					de.julielab.jcore.types.Journal
+				</tsFullClassName>
+				<xPath>
+					/MedlineCitation/Article/PublicationTypeList/PublicationType
+				</xPath>
+				<tsFeatureName>Journal</tsFeatureName>
+				<isType>true</isType>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>name</tsFeatureName>
+					<xPath>.</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>ISSN</tsFeatureName>
+					<xPath>
+						/MedlineCitation/Article/Journal/ISSN
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Volume</tsFeatureName>
+					<xPath>
+						/MedlineCitation/Article/Journal/JournalIssue/Volume
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Issue</tsFeatureName>
+					<xPath>
+						/MedlineCitation/Article/Journal/JournalIssue/Issue
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Title</tsFeatureName>
+					<xPath>
+						/MedlineCitation/Article/Journal/Title
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>ShortTitle</tsFeatureName>
+					<xPath>
+						/MedlineCitation/MedlineJournalInfo/MedlineTA
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>nlmId</tsFeatureName>
+					<xPath>
+						/MedlineCitation/MedlineJournalInfo/NlmUniqueID
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Pages</tsFeatureName>
+					<xPath>
+						/MedlineCitation/Article/Pagination/MedlinePgn
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<isType>true</isType>
+					<tsFullClassName>
+						de.julielab.jcore.types.Date
+					</tsFullClassName>
+					<tsFeatureName>PubDate</tsFeatureName>
+					<externalParser>
+						de.julielab.jcore.reader.xmlmapper.typeParser.PubDateParser
+					</externalParser>
+					<xPath>
+						/MedlineCitation/Article/Journal/JournalIssue/PubDate
+					</xPath>
+					<tsFeature>
+						<tsFullClassName>int</tsFullClassName>
+						<tsFeatureName>month</tsFeatureName>
+					</tsFeature>
+					<tsFeature>
+						<tsFullClassName>int</tsFullClassName>
+						<tsFeatureName>year</tsFeatureName>
+					</tsFeature>
+					<tsFeature>
+						<tsFullClassName>int</tsFullClassName>
+						<tsFeatureName>day</tsFeatureName>
+					</tsFeature>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFullClassName>org.apache.uima.jcas.cas.FSArray</tsFullClassName>
+			<tsFeatureName>otherIDs</tsFeatureName>
+			<isType>true</isType>
+			<tsFeature>
+				<tsFullClassName>de.julielab.jcore.types.pubmed.OtherID
+				</tsFullClassName>
+				<xPath>/MedlineCitation/OtherID</xPath>
+				<isType>true</isType>
+				<tsFeature>
+					<tsFeatureName>id</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>.</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>source</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>@Source</xPath>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+	</tsType>
+	<tsType>
+		<tsFullClassName>
+			de.julielab.jcore.types.pubmed.ManualDescriptor
+		</tsFullClassName>
+		<tsFeature>
+			<xPath>/MedlineCitation/GeneSymbolList</xPath>
+			<tsFeatureName>GeneSymbolList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.StringArray
+			</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>KeywordList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>Keyword</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/MedlineCitation/KeywordList/Keyword
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.Keyword
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>Name</tsFeatureName>
+					<xPath>.</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>ChemicalList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>Chemical</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/MedlineCitation/ChemicalList/Chemical
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.Chemical
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>RegistryNumber</tsFeatureName>
+					<xPath>RegistryNumber</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>NameOfSubstance</tsFeatureName>
+					<xPath>NameOfSubstance</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>DBInfoList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>DBInfo</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/MedlineCitation/DataBankList/DataBank
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.DBInfo
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>Name</tsFeatureName>
+					<xPath>DataBankName</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>AcList</tsFeatureName>
+					<xPath>
+						AccessionNumberList
+						<!-- /MedlineCitation/DataBankList/DataBank/ -->
+					</xPath>
+					<isType>true</isType>
+					<tsFullClassName>
+						org.apache.uima.jcas.cas.StringArray
+					</tsFullClassName>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>MeSHList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>meshHeading</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/MedlineCitation/MeshHeadingList/MeshHeading
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.MeshHeading
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>DescriptorName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>DescriptorName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>
+						DescriptorNameMajorTopic
+					</tsFeatureName>
+					<xPath>DescriptorName/@MajorTopicYN</xPath>
+					<tsFullClassName>boolean</tsFullClassName>
+					<valueMapping>
+						<xmlElement>Y</xmlElement>
+						<value>true</value>
+					</valueMapping>
+					<valueMapping>
+						<xmlElement>N</xmlElement>
+						<value>false</value>
+					</valueMapping>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>QualifierName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>QualifierName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>
+						QualifierNameMajorTopic
+					</tsFeatureName>
+					<xPath>QualifierName/@MajorTopicYN</xPath>
+					<tsFullClassName>boolean</tsFullClassName>
+					<valueMapping>
+						<xmlElement>Y</xmlElement>
+						<value>true</value>
+					</valueMapping>
+					<valueMapping>
+						<xmlElement>N</xmlElement>
+						<value>false</value>
+					</valueMapping>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+	</tsType>
+</mappings>
\ No newline at end of file
diff --git a/jcore-jedis-integration-tests/src/test/resources/pubmed21n1016_excerpt_original.xml.gz b/jcore-jedis-integration-tests/src/test/resources/pubmed21n1016_excerpt_original.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..365b8d3e0e6dc6da6a0355a1a07f4791b1292c1b
GIT binary patch
literal 3038
zcmV<43nBC$iwFpSV!&Vk18{X>ZDnLKF>Wz1F*aXicw=R9aCBd9a%pF2ZeeULcx`L|
z?OWS++cp+`&sSjdl&odlEcwz|vbr+1QoFGnckCv0UuHohB%&q(mH_0)U)7iSf%&r8
z2P7rZmsn}4$?DWEH3b|ToI7xK(80GKlbFp#W~5dJjc%*cU_u2tlxlX+aCX|<Z+ts^
zG5FijyTi+?^AS6DlSG8CGb@8wTnNi9t}ZS|W7deQP5<6*hc;}fIB6*vwPt$WR>4HJ
zQ@^OKoh?59)%~W^?Q~n9Zh-q;ro$JE)dy(=V`CA<Qi(%pxrM;&0)L%3Xn+<|O{Hyf
zJUTi(IU6<DyBj64gT~qExRI|Wp5v1v_I{~tW7zBN@9ppQ`hzx2E$NQ96^A-WV_`*D
zj$-^u@NBr<>3%b4`_I+PSSuS1yW4|y{<WGp;)`LgGicM_l7IWb4g9wM6mw}F+H||~
z$flj{!<%;YAK3H``$1t}^6OZKB8Dw9scfUd>wE1o#pBfo#!fCS&e(}D>T;2agT^Qp
zfz7m%L1WnMeA93CJNtWsHc{1<6|At(2?j5U5N?w$IcS_HD>5aj8~YXSb?g!`+y=Me
zvp(-x4Yzl(ng6QKLMu|bKKdy?)(m?jf_s*&*mt{Evu|nT2lgDatBTiVE~Q0uzf0i-
z!^Fol6UGeM`DoqX?_N(P8NBBspH{F@Nq%?o#l4LyLxsolEk@HRJSXR(nRrJugP4b*
z2w5nF_o)on4WEmdlc6wdsxt;Z@knI$)=!w$pk3p*+UT6mq`D)}Du>T!YS`P~+wL~;
zUrNBrct!q)m<&dzI`Cu2=Fv%*$6%;sgT3dm46n`(8dnsEt!{6(-+ni0_4c>B`$&_@
z!m33k#%4UQH8IP<OYzYTFPAzaH^eN1e?ykW+`=G<VK>r7ERwT`iRn}Z5*2j8cqUko
zxj^z5s~BgKO!JUsT+Ia2Q^wOYmbo+r1VQYm=vK@d=p=EfB-z5skY`&=CS;Ns<0@oE
zD1+}7d#41u77NsFK8bTSL8Nr7XA34tz)(zR0g0alS}F3nRxNfJ(X`I4S(@oo8#viG
zueue@7}3Re95)}1hEPo5u#ioltRmI(P-1x~<yPUuj@HCN&B#g`#fE1YU(nvd2<Q+`
zo5rQ7Mi@bI?Ck>U5ZT^@6DMQK43@<U5?>Qb(1DGSFd$BW8$K0?OU)9gWFl{AW~L-O
z!IfovVsw`J;{t9-gypgE##<?c-?n1-X(oPk@S#L>vlDWiu^bk{Ua*U?v{f2uOZ5&e
zUSKw^*ky~^NHF>)brLv&D`^t)0}d!Q;i$h%6Wb*d^39B=P?nfmA2C!9@sWvXoIA^q
z!K(Hr^Muj_&Jaf^NBfeH2w94-$3h8o&{rmfyI}!@BrFw*1acUXEfpDsHdldYQJoQ)
zOhDr0F)p@P7U`Ru5$&1@B;GBgsE7yaW8*ow1{7E=A@BBX2Zuw4eRq7D2T0+|P{VoT
zu0+#XC^ONRMNC9`rpXYgj(H|;3l+$L;E&xhKBJK11|fqnYlU+8?SN<6fvnT&?6B}3
z?I3BfQ^beOLzaXpmXT_RL+jGqcM1O}6GX7cGMzOM4v>#q&L}CEOUeH|s4$VXZOE;n
zVjx5*4<ae%UKrNv?RZ_izkAIfSy`yXJ_t5-&=4kq?V*-WJ1+vhlu4wan+o~T;Zm0P
zau2@!w@rNEIP_}H!3$XtWZIBxDW6wcT-JUsYZhfYxO+?I1+Ca8lyn(Hisq?IW==Vz
zm{VjSVic4##MU9sZ-jR;7$T7clSnp0VXZBB8$QEqC_~c8pBxeCOQ~H6%}PNHWEMDS
z!I<hLd`{J?nrjcxl8rT;p4hFHx0EYINed|{0)Z2gq+Ln}PfdhE4YO6oIzNiR7U4D%
zbi7qhg38f!|JWG3b_mz(RH8<dx4YsnsvRw!s&-b@?+~XbGG`ojo$t6Fw3ol?L~meq
zKm0++U!)kcmjichNJ^5nX7~XYh4Q<0ImzYuog!YjrzQLe!8Mym^|XvXZA8X5?iX|n
zPs=&xhA-T;d|J-&M&vx>Gj5Gb))4g)mjV5>oR=H2Ckm(g__UOVQHEU5X|zUo9&SX`
z4>)7^0u|XMI9{$@yhc9Jk{fRSz>F$Ks4`SWPjMG?IKGCs8<AYaugVqLP0BkWU2^No
z@6WRzPYxPKo$hXbd%rO(uKujJp8L+EN?p5VKfclOe!m<jZt7)Eqo%sT>JVqH%@)HS
zl|N{yLl<aK%Hc}$nsd2)UGHU9!z<FWH~~A<c3j)Loo=_c*X^MBst&J+sp&kG>KgZo
zyS->RywvP*O$H^#aDm&fu)`B%{GFi}H6G<;F`T0pXaxC^<1wW|%>+4YQ8bLyf|qy(
z4ya7r)=!O3^sJAg4`r*mf3H;?97E|L`ey&P#!M=Q{<Q;U`ByzoRuZqHNsjmYKWLKV
ze*U_ubk|k|JnHdQEf%>>!!)HK6V>v+n7acIorHI4*K(Yd4S4D56{fbN`Sdy|8^iu~
zf3LT*?h@+tAL{S#t*#8M<oD`Uva|eMsdN8R^l|4IeSAhAEAxLD^zq!c^ohX5z$0P>
z7|_&Nh?dy5cpyNZR`mY3Pk59q;#uMwLL*Y1w{3($F$-Qzgb`o(W>qNT=Rm822n{bs
zJ^VtvvbRHcWxuy^ys}RY{=InRc%?C{B;r`+EppA2FAvXOF&7i0g{!HB|1Ciw`OsWy
zR0sBQ^yB+iEQZkugL7FRR$wPwRC1P<@Dw!Hoz{Mb5Rp*KMXXcoQa1il&4saYwnQ2d
zY~cONf=FhtgI##IAqV9k`V0vP>CrqWoe0HSZ1k`q{k^72?nK7R<KtH(NX7S_c8bpe
zCrc*Gq_meyNS@<kvwjq@mzPI}<5vVM7@R8v^!Wfzxy<P6+%^O4mw{Nz5Z7=IvZz@B
zO;jxhdYC!4@)o;^WDt?)g^dflb~}Vg)RmkG95UXZTeP4mZ2@o*@~C0WR0`uE!H6Kk
z>69Xc{7}WKVf%t2jW_|bk(}Cc=n*wSF(Y&gL{)f>gh5ZF31iIkd?ikGYn|hjD}E^b
z+QYbBgetqGVqR04cIsN`8?}Ufs0^Tv;B0+NV6t9+&uh8UVThL-LGUT9Nj!?}sQ9qM
z^Ln%s{n)`<@B}=c$)E(69@yHmoO;j^YD^Ghu{I)Ci9R4&5fvU|Cv+8&lC!IoK1gwV
zd^hdlm{<}#2ZeBGdVrMDE01Dr0-fe=q`{@B=c4>4gRhHao!B+7theZd*MK$NZUieh
zh9_qZ)#zzCN7A_KH9(EgMg)D!l|Xi`gA=_KYf8EqlU|5xdChOu0<^L}63==@T%Hk^
zXT;^tt;sXu@_C5M+VIH=aoPO>#Km_ge@?_@_Ze|{MqDcMe;LH(jhu+A>06gA-|$QH
zuK|0*C)uKTE;60}TX;6bC*d-f3muP}0iRQ+O7D$u@$)2^o~@7<Ph`Z5-au@zSl^)i
zMe{1C!+C?oo4Xocv^c(Tk%^1Z1_|v<^(}^P;=Hc(R)9JhE6@R=k;CsV1aSI&0&sTr
zHxA(J?S8?7v)63;`JQ??dUJmAYU?47B|P-)r{QTD(L0<QZrBvO=m?^hD&^a-FZ3!s
z&&prF)Sdg7GizSY8{vxLj1+oMpNRYCKQiwH=uMS>ZQ@CamuAp*cfZ5_^Y5g5EHvkV
zbIE<O3Kf~<&TAfAroBPRXIoY2{qvYowYn-N)-$WT>k0GcQlD)X7HAiT)cY?V78P&3
zusAK=+&#+3zc=&=h3IVd*!Y}#n(a=rw*$q0JN&w^z+oiR)7{}QmutNrIo?PYyPk|c
zpwanNQeXL$ls|0dsRPe@+9zKNeXXZLKSygx=wAzcT+5J~c`h_w1FrZvl9M&?ik~+k
z;OQC&)fHdIfL9xl?|ZqPpTF9Juq(&cNazc9yUxR<i)T#g8IyX(q@FRUXH4ooCbc%e
gw8EtJz5tW*P1hfUN!=On|6k&N0cG=RhA=q*0HA8@bpQYW

literal 0
HcmV?d00001

diff --git a/jcore-jedis-integration-tests/src/test/resources/pubmed21n1016_excerpt_partially_changed.xml.gz b/jcore-jedis-integration-tests/src/test/resources/pubmed21n1016_excerpt_partially_changed.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..ee654253528e14515077c8f593bf2c0640e9770a
GIT binary patch
literal 3075
zcmV+e4E*ySiwFqCWx!wn18{X>ZDnLKF>Wz1F*aXicw=R9aCBdAVRCe7VQg%9Ut?%t
zZf9j=E_iKh0PS1da@#f*zRy!&b(PFmf0q1{9a$Z(Eyrps+pV3X?#&KFLLzFCU<p8u
z{8W3hFR)Lx-vLR9wB=ZBs?Buko0<X+4$d!t^X0Vn-RCG|Gm&bk;)7bd(W)^Kd&-yb
z^q^+Vq`qJKuK%j{x8sYW>zm5~yR_p-_=l;HUMQ}FVOKX-*MkwO1;!+QZ#I47H{vjA
z#4>11)vOtNW7$mHqNcJY|NLkBZL8gCH$Ytj_lHdTuNbQi(g?;z!VhIEj-=rR0<$ap
zwfdk2T1?erW$L5B@!9ZvP-7Q&u}BYU=Vzl@wwid3hR5vVLfcxu)860P-|cjJO`2NJ
z9djd&RFs6m2)`J`_>JIcf4kLw+iSYd<;+OMCg``fd(G@?IdjbC{Z6abq`w9K=E@D+
zwm*uQ)T=h#ZauMStG#;D*8Yl3AF%J`_9efLlrKWqGL^BZm3V!xY#Q@$IfAj_)zvv0
zY8_wC6LC-*gu*kailtZUw_9(!^=@l_uh%51%Cejl7FxmJMd8D3(nbfhVQfSii}J>9
z#YYv|Nc6YCE&r^}J68SeU2NvQs<Y6Fl&+3`$&VGo9t-c0WlQ$m?o{ksSb4>sy=GbQ
z%FMMii0+F7UeHW@PEw(DubGWj4SsPr9;fi0i+oza1_k-U$ybjyE)C@#&$bv$Ch(k`
ziF)K5QTIab`@(0w6was8V|RQerdIkwvx!O>{KNy1ntL~)8@*<Q<8q@*K9%tUftERZ
zIgR_B{k`pW9si{SERC1sU&W+1I8&Y*LpBeFeinnik~Q{`htj_}Kd9YMAU4{a-EQ+@
z(CF-MxA&1IrG;gSjI~L*XDVVAgV*A-=^vdQp1&I$vy+P-+4bP+dU*b>!LApEK*ES=
z3a9%l3AurlBF*lk30NSf0TYvn^dyeeIpe8dUTQtbL#$$)jZ?*amhyNin3^!2B%#bi
z)FALeODA#3=AMco8y6&7Sn2b0i^+)WG-W*YnHI6ecY|HTg58Qa4s<>aGd5ljt58kn
zOp<`U7}Ek0KlM}`Qz)nynguki(p#3KDp4Aq*Vfs?h<b>SV>}G&t3$*W6L`^QV<;Pw
z>REiTEWR?UaAHSmLJ?2NN(v_sPg6dpy@eLgA)M5;O%jFJg5=oSdd4E$oe9TQhK6Y@
z3+E)hB9@>78!cf#Tn%@8A`q;KMKYF=yr-FIEa3?r8^*_4rHQ*k;D$&T9%^U2rBe89
zN`{}L;#UhFN`yQclIyhLun_iwU9_RCl0X?c1mWU2X2X(QwwMV7qi<3tf+Kh=bwqx^
zt;NP1hcZ*dcEN;vGvx`CCFaIO3>}jANW~<~oTbTNW&4wPe5pOFi6fMweMv~fF-6!@
zp#(bUE2G@qumD05mhwdeIW)<Zh?GK`$DU}=;UiKRfyBw9ZD_D8P<I(4+BFr(#(PLn
z5)anL#xrsaD6m>W-tFBMZj2UtcY2=%NbXBt!FlAaNYPp-Ggg>IOayAG$PkGNc`EO7
z707|$kKIx}rI6zqA%ihvL~OGs15cF&S*y|7Vg5gwUesV`h!2y6ED4n_Bh?Ux#wMBX
zBK}!Mh+vVXDy<_NARjlJQBp9QkpDYSVJuBklUqf}K!{TA1yanMFs#$rak@Hxcj{iW
zv`~Y65^Q3jAxs3@LoJtfP6T|#I+B{6FXT&$ms;fR0PyX;uj31Mq*HSSUdReBRhnE&
z`MliXy7GH*W>L0-yR&q5pym7cl3s{N(JYn8%n64SGm0!kObjJ8v9*ZvJK>xRhDc<=
zB$7>2SSv%`hR-nTOP_Rd*GWM7Qfik%vvi<(GWD!9U`*#Fd`_oVIoDRul8qHzso1TO
zx0EaSk>*oUcmmfbNxP5^o*Ii7XP7B7R@wawwg|VWpgXU8J(TXN$9GBZ&>~#ZGl?^z
zc=F{pROP;LRFw;?`mi{|A#=|00D8w&uetbDB|3rC-S8(Nf03fsTns#THpP;()%{O+
zJrv)S7fUA3?-cRIJ}==<2(IZ^#?Q<6(?(=GvA>{<cwWvC*L-eo<@0imHX`RapK_yZ
zw1%kHcnqlL<-FdIJ#lc_&(BMF6r{)nl>}>q=g~$){e(M)&v7E#2-nN44cEvgT5`wD
zADB_a2%QY2(KEaSEv~OV-bN%>{;Tw^w&UWBNH5>&^2hV+!|<SX+-mQ3xA$xPydGfr
zo86TwW$Mb?`{|7qkN(9#{(LVAop_>3td4N!nsna(5W9<(%65Uer5G+%v>BJ{!)k%E
z9A1*1hY{G}Y{y%^-D<Zxd+in~vhwhfm@3gT8Q<bv@vuNGh8LPWtx2x{9<J~j7N$Q$
z#y=Q(RpC)g=EE7Jf$ESiIG$1}R7{Xz8F>{+b$J1D;DSoUef8G(LLvJ!BvI6_`;Y3^
z-YJysqnLJoE6ikUQPj4;Ec>d)$x`BVG|51q{|8N?%+KFemG;`IfJZgn%7;a!Q$I;)
zNC%bt&*vVXMnnI>*|ivFMO9w7dI_{GXuiBo%38m>-QDZ#th<B&fK^5Rqt&IMrTk9a
zQg#-<OXuAG6q4L|fh1ob$<q8^29msVC4D6DFmQ-i4hGa!>Z2rfB_0S+r{%Rj-V+XG
z3wRp2f>4WuXJs2<P|Sc=9bv@hu2>bZc5|RrUVws^<2?LI5VNyGAZE9-aS*dh4*tC$
z=4h!gtR%uvW+igPl&_C2-!K~zq=l!cf&UFbA=%JODx41N_29$DH!OtF34^n#CzfC*
zTvTwDhVT>=*R95Wix3fC%tWXX>{1l|GM))-<aB{FMA*RjmwACqVFxpJa6<;lLG&pS
z64Il1h*czF-e7~(igfqtar7WEUZ0-6Awf#MceE3H<~Uh2W;&t0Y((;$p6d0Zh`qi(
zJ{r9tU_s+vA)wC%aKdFuUzeutDYp#7O8R()JCG%w=Fmjha-fHqGb?YgyFhvYiJsdy
zw`;pam_${{slX-U47x=N%F^Zl7a@-d)=Z?(4iXFqGMr2(QpgWwyc)L8DUyH_FdN8;
zDTba<!xvLR$3Rqu=SUdTM2axRRLz#+l($wHUb*ClqhC1~*NISOw@}P!O4ByJjn!Se
zfPUy0z&V1u^(leLdfh#z<xYzsUhV|Jr?e(=D7K};haFv3qn+qS7T$s<;Q3T~1-P_g
zYsYfpKu4%CMv#R{i%cc@fM|JCIE<apRYXe0u5$E2iqq4FX%~jXlHfTggiF%_q?BG+
z6f5niBy%GTE=)ZW#XlK*TP$m6*1)pfqY_>N)_A`Wtl$)$oLZbl&&xTG+TN}KY790a
z=zAUuWam0K(R;C`q?<A6mAI9+{BA8kEBzz!tQW-P1#x*nT>jjeydW;W4RKi;K3O6z
zyI+C0xC-UZiMZ^(ATBS6OKJWugSec?u}JH#blGwRzeN4&u@gQ{=k-gGs_fsw(+NHa
zm%*HGd06-Oj4D;?H^Rfuk)&$6L|z<`7E|hh*kYl&L-~v1RZxX<g2J2I3SX2su5gi&
z4N(ROWp%k1!xeE(SLzj@ipB_3fGFhf`zryQZkGU@o&AjiID5Na(S>$+c6Gs4wxS)f
z$#3`J>%qz8@XgjLUq;+_g{kIA5>SWE9oK9EUUWH84@=QC_Lcso#;vR`rt0>m++2x=
ztVAv;&Pbt^`UKoH2Fa{Spl&PIhvG=`9yDmXyWe8}`FB*b5zV+~ZS<I|L`7zK(C33^
zwlhf4&{dY+HO>^ORaF@p$E?)x<7bUjzu7L#Q#SMo6<D+!<y~M{oaEhjPcm|yh%VI-
zxAhJiU2;dW-KuwXp!mOzzAa#I6o~lwy783DwK_{qH&WlOC*uz&fqs=#aXu&I51VO%
z;90f%Y#-6LY9jhMSW80JNA!6uLvE&#XtV~9@pB-@YakgvZ$!YeHL$B2zK#KJHX`5m
zay>tPvsSSi%hyQgD|^3AOVia0TJ?ffy`WVuXw?f^^%$*M8+uxzReN87R=INQ4??RR
R47mS|@xLZ-s&QXA006tW2Jip?

literal 0
HcmV?d00001

diff --git a/jcore-jedis-integration-tests/src/test/resources/pubmedMappingFile.xml b/jcore-jedis-integration-tests/src/test/resources/pubmedMappingFile.xml
new file mode 100644
index 000000000..9a76854ae
--- /dev/null
+++ b/jcore-jedis-integration-tests/src/test/resources/pubmedMappingFile.xml
@@ -0,0 +1,436 @@
+<mappings>
+	<documentText>
+		<partOfDocumentText id="0">
+			<xPath>/PubmedArticle/MedlineCitation/Article/ArticleTitle</xPath>
+		</partOfDocumentText>
+		<partOfDocumentText id="1">
+			<xPath>/PubmedArticle/MedlineCitation/Article/Abstract</xPath>
+			<externalParser>de.julielab.jcore.reader.xmlmapper.mapper.StructuredAbstractParser
+			</externalParser>
+		</partOfDocumentText>
+		<partOfDocumentText id="2">
+			<xPath>/PubmedArticle/MedlineCitation/OtherAbstract</xPath>
+		</partOfDocumentText>
+		<partOfDocumentText id="3">
+			<xPath>/PubmedArticle/MedlineCitation/Article/VernacularTitle</xPath>
+		</partOfDocumentText>
+	</documentText>
+
+    <tsType>
+        <tsFullClassName>de.julielab.jcore.types.Title</tsFullClassName>
+        <offset>
+            <partOfDocumentText>
+                <id>0</id>
+            </partOfDocumentText>
+        </offset>
+        <tsFeature>
+            <tsFeatureName>titleType</tsFeatureName>
+            <tsFullClassName>java.lang.String</tsFullClassName>
+            <valueMapping>
+                <default>document</default>
+            </valueMapping>
+        </tsFeature>
+    </tsType>
+
+    <tsType>
+        <tsFullClassName>
+            de.julielab.jcore.types.pubmed.AbstractText
+        </tsFullClassName>
+        <offset>
+            <partOfDocumentText>
+                <id>2</id>
+            </partOfDocumentText>
+        </offset>
+        <tsFeature>
+            <tsFeatureName>abstractType</tsFeatureName>
+            <tsFullClassName>java.lang.String</tsFullClassName>
+            <valueMapping>
+                <default>other</default>
+            </valueMapping>
+        </tsFeature>
+    </tsType>
+
+    <tsType>
+        <tsFullClassName>de.julielab.jcore.types.Title</tsFullClassName>
+        <offset>
+            <partOfDocumentText>
+                <id>3</id>
+            </partOfDocumentText>
+        </offset>
+        <tsFeature>
+            <tsFeatureName>titleType</tsFeatureName>
+            <tsFullClassName>java.lang.String</tsFullClassName>
+            <valueMapping>
+                <default>document_vernacular</default>
+            </valueMapping>
+        </tsFeature>
+    </tsType>
+
+	<tsType>
+		<tsFullClassName>
+			de.julielab.jcore.types.pubmed.Header
+		</tsFullClassName>
+		<tsFeature>
+			<xPath>
+				/PubmedArticle/MedlineCitation/ArticleIdList/ArticleId[@IdType="doi"]
+			</xPath>
+			<tsFeatureName>doi</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<xPath>/PubmedArticle/MedlineCitation/PMID</xPath>
+			<tsFeatureName>docId</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<xPath>/PubmedArticle/MedlineCitation/@Status</xPath>
+			<tsFeatureName>citationStatus</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<xPath>
+				/PubmedArticle/MedlineCitation/Article/Language
+			</xPath>
+			<tsFeatureName>language</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+			<valueMapping>
+				<xmlElement>de</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>en</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>es</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>fr</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>it</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>pt</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>eng</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>ger</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>fre</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<xmlElement>ita</xmlElement>
+			</valueMapping>
+			<valueMapping>
+				<default>other</default>
+			</valueMapping>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>source</tsFeatureName>
+			<tsFullClassName>java.lang.String</tsFullClassName>
+			<externalParser>
+				de.julielab.jcore.reader.xmlmapper.typeParser.SourceParser
+			</externalParser>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>authors</tsFeatureName>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<externalParser>de.julielab.jcore.reader.xmlmapper.typeParser.FSArrayParser
+			</externalParser>
+			<isType>true</isType>
+			<tsFeature>
+				<tsFeatureName>authorInfo</tsFeatureName>
+				<tsFullClassName>
+					de.julielab.jcore.types.AuthorInfo
+				</tsFullClassName>
+				<isType>true</isType>
+				<!-- Give us only author elements that have a LastName-child-element; 
+					sometimes there are general source elements 'CollectiveName' without particular 
+					persons and thus without real names. This leads to empty strings as author 
+					names in some applications, e.g. Semedico. -->
+				<xPath>
+					/PubmedArticle/MedlineCitation/Article/AuthorList/Author[LastName]
+				</xPath>
+				<tsFeature>
+					<tsFeatureName>foreName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>ForeName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>foreName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>FirstName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>lastName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>LastName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>initials</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>Initials</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>affiliation</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>
+						AffiliationInfo/Affiliation
+					</xPath>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeatureName>pubTypeList</tsFeatureName>
+			<isType>true</isType>
+			<tsFeature>
+				<tsFullClassName>
+					de.julielab.jcore.types.Journal
+				</tsFullClassName>
+				<xPath>
+					/PubmedArticle/MedlineCitation/Article/PublicationTypeList/PublicationType
+				</xPath>
+				<tsFeatureName>Journal</tsFeatureName>
+				<isType>true</isType>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>name</tsFeatureName>
+					<xPath>.</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>ISSN</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/Article/Journal/ISSN
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Volume</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/Article/Journal/JournalIssue/Volume
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Issue</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/Article/Journal/JournalIssue/Issue
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Title</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/Article/Journal/Title
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>ShortTitle</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/MedlineJournalInfo/MedlineTA
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>nlmId</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/MedlineJournalInfo/NlmUniqueID
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<tsFeatureName>Pages</tsFeatureName>
+					<xPath>
+						/PubmedArticle/MedlineCitation/Article/Pagination/MedlinePgn
+					</xPath>
+				</tsFeature>
+				<tsFeature>
+					<isType>true</isType>
+					<tsFullClassName>
+						de.julielab.jcore.types.Date
+					</tsFullClassName>
+					<tsFeatureName>PubDate</tsFeatureName>
+					<externalParser>
+						de.julielab.jcore.reader.xmlmapper.typeParser.PubDateParser
+					</externalParser>
+					<xPath>
+						/PubmedArticle/MedlineCitation/Article/Journal/JournalIssue/PubDate
+					</xPath>
+					<tsFeature>
+						<tsFullClassName>int</tsFullClassName>
+						<tsFeatureName>month</tsFeatureName>
+					</tsFeature>
+					<tsFeature>
+						<tsFullClassName>int</tsFullClassName>
+						<tsFeatureName>year</tsFeatureName>
+					</tsFeature>
+					<tsFeature>
+						<tsFullClassName>int</tsFullClassName>
+						<tsFeatureName>day</tsFeatureName>
+					</tsFeature>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+	</tsType>
+	<tsType>
+		<tsFullClassName>
+			de.julielab.jcore.types.pubmed.ManualDescriptor
+		</tsFullClassName>
+		<tsFeature>
+			<xPath>/PubmedArticle/MedlineCitation/GeneSymbolList</xPath>
+			<tsFeatureName>GeneSymbolList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.StringArray
+			</tsFullClassName>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>KeywordList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>Keyword</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/PubmedArticle/MedlineCitation/KeywordList/Keyword
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.Keyword
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>Name</tsFeatureName>
+					<xPath>.</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>ChemicalList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>Chemical</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/PubmedArticle/MedlineCitation/ChemicalList/Chemical
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.Chemical
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>RegistryNumber</tsFeatureName>
+					<xPath>RegistryNumber</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>NameOfSubstance</tsFeatureName>
+					<xPath>NameOfSubstance</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>DBInfoList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>DBInfo</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/PubmedArticle/MedlineCitation/DataBankList/DataBank
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.DBInfo
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>Name</tsFeatureName>
+					<xPath>DataBankName</xPath>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>AcList</tsFeatureName>
+					<xPath>
+						AccessionNumberList
+						<!-- /PubmedArticle/MedlineCitation/DataBankList/DataBank/ -->
+					</xPath>
+					<isType>true</isType>
+					<tsFullClassName>
+						org.apache.uima.jcas.cas.StringArray
+					</tsFullClassName>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+		<tsFeature>
+			<tsFeatureName>MeSHList</tsFeatureName>
+			<isType>true</isType>
+			<tsFullClassName>
+				org.apache.uima.jcas.cas.FSArray
+			</tsFullClassName>
+			<tsFeature>
+				<tsFeatureName>meshHeading</tsFeatureName>
+				<isType>true</isType>
+				<xPath>
+					/PubmedArticle/MedlineCitation/MeshHeadingList/MeshHeading
+				</xPath>
+				<tsFullClassName>
+					de.julielab.jcore.types.MeshHeading
+				</tsFullClassName>
+				<tsFeature>
+					<tsFeatureName>DescriptorName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>DescriptorName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>
+						DescriptorNameMajorTopic
+					</tsFeatureName>
+					<xPath>DescriptorName/@MajorTopicYN</xPath>
+					<tsFullClassName>boolean</tsFullClassName>
+					<valueMapping>
+						<xmlElement>Y</xmlElement>
+						<value>true</value>
+					</valueMapping>
+					<valueMapping>
+						<xmlElement>N</xmlElement>
+						<value>false</value>
+					</valueMapping>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>QualifierName</tsFeatureName>
+					<tsFullClassName>java.lang.String</tsFullClassName>
+					<xPath>QualifierName</xPath>
+				</tsFeature>
+				<tsFeature>
+					<tsFeatureName>
+						QualifierNameMajorTopic
+					</tsFeatureName>
+					<xPath>QualifierName/@MajorTopicYN</xPath>
+					<tsFullClassName>boolean</tsFullClassName>
+					<valueMapping>
+						<xmlElement>Y</xmlElement>
+						<value>true</value>
+					</valueMapping>
+					<valueMapping>
+						<xmlElement>N</xmlElement>
+						<value>false</value>
+					</valueMapping>
+				</tsFeature>
+			</tsFeature>
+		</tsFeature>
+	</tsType>
+</mappings>
\ No newline at end of file
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index 004c085d9..3596db300 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -115,7 +115,6 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     public static final String PARAM_FEATURES_TO_MAP_DRYRUN = "BinaryFeaturesToMapDryRun";
     public static final String PARAM_BINARY_FEATURES_BLACKLIST = "BinaryFeaturesBlacklist";
     public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
-    public static final String PARAM_SKIP_MATCHING_HASH = "SkipMatchingHash";
     private static final Logger log = LoggerFactory.getLogger(XMIDBWriter.class);
     // The mappings are keyed by the costosys.xml path and the table schema, see 'mappingCacheKey'.
     // The idea is to save costly database connections by sharing updating mapping across threads.
@@ -250,7 +249,6 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     private String[] binaryFeaturesBlacklistParameter;
     @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "Possible values: document_text. If this parameter is set to a valid value, the SHA256 hash for the given value will be calculated, base64 encoded and added to each document as a new column in the document table. The column will be named after the parameter value, suffixed by '_sha256'.")
     private String documentItemToHash;
-    @ConfigurationParameter(name =PARAM_SKIP_MATCHING_HASH, mandatory = false, description = "Only in effect, if: " + PARAM_ADD_SHA_HASH + " is active; if the target XMI table has also been read from by the XMI DB reader and the reader has been configured to read the document's current hash value. Then, compares the hash value retrieved and relied by the XMI DB reader to the  ")
     private Map<DocumentId, String> shaMap;
     private String mappingCacheKey;
     private DocumentReleaseCheckpoint docReleaseCheckpoint;
@@ -322,7 +320,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         }
 
         if (xmiMetaSchema.isBlank())
-            throw new ResourceInitializationException(new IllegalArgumentException("The XMI meta table Postgres schema must either be omitted at all or non-empty but was."));
+            throw new ResourceInitializationException(new IllegalArgumentException("The XMI meta table Postgres schema must either be omitted at all or non-empty but was '" + xmiMetaSchema + "'."));
 
         unqualifiedAnnotationNames = Collections.emptyList();
 
@@ -852,11 +850,15 @@ private DocumentId getDocumentId(JCas aJCas) {
             AnnotationIndex<Annotation> headerIndex = aJCas.getAnnotationIndex(Header.type);
             FSIterator<Annotation> headerIt = headerIndex.iterator();
             if (!headerIt.hasNext()) {
-                int min = Math.min(100, aJCas.getDocumentText().length());
+                String docText = "<no text>";
+                if (aJCas.getDocumentText() != null) {
+                    int min = Math.min(100, aJCas.getDocumentText().length());
+                    docText = aJCas.getDocumentText().substring(0, min);
+                }
                 log.warn(
                         "Got document without a header and without DBProcessingMetaData; cannot obtain document ID." +
                                 " This document will not be written into the database. Document text begins with: {}",
-                        aJCas.getDocumentText().substring(0, min));
+                        docText);
                 ++headerlessDocuments;
                 return null;
             }
@@ -1044,8 +1046,9 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
         } catch (XmiDataInsertionException e) {
             throw new AnalysisEngineProcessException(e);
         }
-        log.info("{} documents without a head occured overall. Those could not be written into the database.",
-                headerlessDocuments);
+        if (headerlessDocuments > 0)
+            log.info("{} documents without a head occured overall. Those could not be written into the database.",
+                    headerlessDocuments);
         dbc.close();
     }
 
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index 8cd4ce9b4..03c2b1160 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -138,7 +138,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
      * @param jCas The newly read JCas.
      */
     private void setToVisitAnnotation(JCas jCas) {
-        if (xmiStorageDataTable != null) {
+        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable)) {
             DBProcessingMetaData dbProcessingMetaData = JCasUtil.selectSingle(jCas, DBProcessingMetaData.class);
             StringArray pkArray = dbProcessingMetaData.getPrimaryKey();
             String pkString = String.join(",", pkArray.toArray());
@@ -146,6 +146,8 @@ private void setToVisitAnnotation(JCas jCas) {
             if (existingHash != null) {
                 String newHash = getHash(jCas);
                 if (existingHash.equals(newHash)) {
+                    if (log.isTraceEnabled())
+                    log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", pkString, toVisitKeys);
                     ToVisit toVisit = new ToVisit(jCas);
                     if (toVisitKeys != null && toVisitKeys.length != 0) {
                         StringArray keysArray = new StringArray(jCas, toVisitKeys.length);
@@ -186,7 +188,7 @@ protected List<Map<String, Object>> getAllRetrievedColumns() {
      * @throws AnalysisEngineProcessException If the SQL request fails.
      */
     private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
-        if (xmiStorageDataTable != null) {
+        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable) && rowBatch.getIdentifiers() != null && rowBatch.getIdentifiers().size() > 0) {
             String hashColumn = documentItemToHash + "_sha256";
             // Extract the document IDs in this RowBatch. The IDs could be composite keys.
             List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
@@ -217,7 +219,7 @@ private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) th
                     id2hash.put(pkSb.toString(), hash);
                 }
             } catch (SQLException e) {
-                log.error("Could not retrieve hashes from the database. SQL query was {}:", sql, e);
+                log.error("Could not retrieve hashes from the database. SQL query was '{}':", sql, e);
                 throw new AnalysisEngineProcessException(e);
             }
             return id2hash;
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
index a56950c00..f14839236 100644
--- a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -49,7 +49,7 @@ public class XMLDBMultiplierTest {
     private static final String MAX_XMI_ID_FIELD_NAME = "max_xmi_id";
     private static final String SOFA_MAPPING_FIELD_NAME = "sofa_mapping";
     private static final String SUBSET_TABLE = "test_subset";
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer("postgres:11.12");
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
     private static String costosysConfig;
 
     @BeforeAll
diff --git a/pom.xml b/pom.xml
index 87deb6229..6db724ae5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -210,7 +210,8 @@
     <module>jcore-xmi-writer</module>
             
     <module>jedis-parent</module>
-            
+      <module>jcore-jedis-integration-tests</module>
+
   </modules>
       
   <scm>

From 639d1a3f8b739c8cf1eb1805fae3cc4faf842459 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Jun 2021 14:12:40 +0200
Subject: [PATCH 068/269] Excluding jUnit 3 from a subdependency.

---
 jcore-banner-ae/pom.xml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/jcore-banner-ae/pom.xml b/jcore-banner-ae/pom.xml
index d50f90b07..6235ec58d 100644
--- a/jcore-banner-ae/pom.xml
+++ b/jcore-banner-ae/pom.xml
@@ -37,6 +37,10 @@
                     <artifactId>log4j</artifactId>
                     <groupId>log4j</groupId>
                 </exclusion>
+                <exclusion>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
         <dependency>

From 0c267ef7c44c6a93730a789f324146dd316f1857 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Jun 2021 14:21:28 +0200
Subject: [PATCH 069/269] More junit 3 and 4 exclusions.

---
 jcore-jnet-ae/pom.xml | 9 ++++++++-
 jcore-jsbd-ae/pom.xml | 9 ++++++++-
 jcore-jtbd-ae/pom.xml | 9 ++++++++-
 3 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/jcore-jnet-ae/pom.xml b/jcore-jnet-ae/pom.xml
index 31f7e544b..7805ab9f6 100644
--- a/jcore-jnet-ae/pom.xml
+++ b/jcore-jnet-ae/pom.xml
@@ -17,8 +17,9 @@
     <build>
         <plugins>
             <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-assembly-plugin</artifactId>
-                <version>2.4</version>
+                <version>3.3.0</version>
                 <configuration>
                     <descriptorRefs>
                         <descriptorRef>jar-with-dependencies</descriptorRef>
@@ -106,6 +107,12 @@
             <groupId>de.julielab</groupId>
             <artifactId>uea-stemmer</artifactId>
             <version>0.1</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-jsbd-ae/pom.xml b/jcore-jsbd-ae/pom.xml
index e21b02e2b..c23dc7e7c 100644
--- a/jcore-jsbd-ae/pom.xml
+++ b/jcore-jsbd-ae/pom.xml
@@ -17,8 +17,9 @@
     <build>
         <plugins>
             <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-assembly-plugin</artifactId>
-                <version>2.4</version>
+                <version>3.3.0</version>
                 <configuration>
                     <descriptorRefs>
                         <descriptorRef>jar-with-dependencies</descriptorRef>
@@ -101,6 +102,12 @@
             <groupId>cc.mallet</groupId>
             <artifactId>mallet</artifactId>
             <version>2.0.8</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>org.apache.commons</groupId>
diff --git a/jcore-jtbd-ae/pom.xml b/jcore-jtbd-ae/pom.xml
index 54671bfc1..c773cf55d 100644
--- a/jcore-jtbd-ae/pom.xml
+++ b/jcore-jtbd-ae/pom.xml
@@ -16,8 +16,9 @@
     <build>
         <plugins>
             <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-assembly-plugin</artifactId>
-                <version>2.4</version>
+                <version>3.3.0</version>
                 <configuration>
                     <descriptorRefs>
                         <descriptorRef>jar-with-dependencies</descriptorRef>
@@ -89,6 +90,12 @@
             <groupId>cc.mallet</groupId>
             <artifactId>mallet</artifactId>
             <version>2.0.8</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>junit</groupId>
+                    <artifactId>junit</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>

From e152e95fda814f5bc79d0129a714f2672f6b4cb2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Jun 2021 14:44:55 +0200
Subject: [PATCH 070/269] More test fixes.

---
 .../reader/db/DBMultiplierReaderTest.java      |  8 +++++---
 .../jcore/reader/db/DBMultiplierTest.java      |  8 +++++---
 .../julielab/jcore/reader/db/DBReaderTest.java |  9 +++++----
 jcore-elasticsearch-consumer/pom.xml           | 18 ++++++++++++------
 .../consumer/es/ElasticSearchConsumerIT.java   | 10 ++++++----
 .../jcore/ae/jnet/cli/JNETApplicationTest.java |  4 ++--
 .../jnet/uima/ConsistencyPreservationTest.java | 16 +++++++++++++---
 .../ae/jnet/uima/EntityAnnotatorTest.java      | 16 +++++++++-------
 .../jcore/ae/jtbd/Sentence2TokenPipeTest.java  |  8 ++++++--
 .../jcore/ae/jtbd/main/TokenAnnotatorTest.java |  5 +++--
 10 files changed, 66 insertions(+), 36 deletions(-)

diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
index 11aa0d9ab..33f73c0eb 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
@@ -12,10 +12,11 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.ClassRule;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.IOException;
 import java.sql.SQLException;
@@ -23,9 +24,10 @@
 import static de.julielab.jcore.reader.db.TableReaderConstants.*;
 import static org.junit.jupiter.api.Assertions.*;
 
+@Testcontainers
 public class DBMultiplierReaderTest {
-    @ClassRule
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    @Container
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
 
     @BeforeAll
     public static void setup() throws SQLException {
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
index fa378c49e..7a90917ad 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
@@ -19,12 +19,13 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
-import org.junit.ClassRule;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -34,10 +35,11 @@
 import static de.julielab.jcore.reader.db.TableReaderConstants.*;
 import static org.junit.jupiter.api.Assertions.*;
 
+@Testcontainers
 public class DBMultiplierTest {
     private final static Logger log = LoggerFactory.getLogger(DBMultiplierTest.class);
-    @ClassRule
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    @Container
+    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer("postgres:11.12");
 
     @BeforeAll
     public static void setup() throws SQLException, IOException {
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
index 015d3e3f5..6cb6f3fcf 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
@@ -12,12 +12,13 @@
 import org.apache.uima.fit.factory.CollectionReaderFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.ClassRule;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.testcontainers.containers.PostgreSQLContainer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -27,10 +28,10 @@
 import static de.julielab.jcore.reader.db.TableReaderConstants.*;
 import static org.junit.jupiter.api.Assertions.*;
 
-
+@Testcontainers
 public class DBReaderTest {
-    @ClassRule
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    @Container
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
 
     @BeforeAll
     public static void setup() throws SQLException {
diff --git a/jcore-elasticsearch-consumer/pom.xml b/jcore-elasticsearch-consumer/pom.xml
index a4fed0dc9..57f9452c2 100644
--- a/jcore-elasticsearch-consumer/pom.xml
+++ b/jcore-elasticsearch-consumer/pom.xml
@@ -82,12 +82,6 @@
             <groupId>org.testng</groupId>
             <artifactId>testng</artifactId>
         </dependency>
-        <dependency>
-            <groupId>org.testcontainers</groupId>
-            <artifactId>testcontainers</artifactId>
-            <version>1.12.0</version>
-            <scope>test</scope>
-        </dependency>
         <dependency>
             <groupId>ch.qos.logback</groupId>
             <artifactId>logback-classic</artifactId>
@@ -97,6 +91,18 @@
             <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>testcontainers</artifactId>
+            <version>1.15.3</version>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.testcontainers</groupId>
+            <artifactId>junit-jupiter</artifactId>
+            <version>1.15.3</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
     <organization>
         <name>JULIE Lab Jena, Germany</name>
diff --git a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
index 36a71fbe0..c780ee2f9 100644
--- a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
+++ b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
@@ -7,7 +7,6 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.junit.ClassRule;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
@@ -15,6 +14,8 @@
 import org.testcontainers.containers.GenericContainer;
 import org.testcontainers.containers.output.OutputFrame;
 import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
 import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
 
 import java.net.URL;
@@ -23,12 +24,13 @@
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
+@Testcontainers
 public class ElasticSearchConsumerIT {
     public static final String TEST_INDEX = "testindex";
     public static final String TEST_CLUSTER = "testcluster";
     private final static Logger log = LoggerFactory.getLogger(ElasticSearchConsumerIT.class);
     // in case we need to disable X-shield: https://stackoverflow.com/a/51172136/1314955
-    @ClassRule
+    @Container
     public static GenericContainer es = new GenericContainer("docker.elastic.co/elasticsearch/elasticsearch:7.0.1")
             .withEnv("xpack.security.enabled", "false")
             .withEnv("discovery.type", "single-node")
@@ -57,8 +59,8 @@ public void testMinimal() throws Exception {
         consumer.collectionProcessComplete();
         final URL url = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_doc/987");
         final ObjectMapper om = new ObjectMapper();
-        final Map<?,?> map = om.readValue(url.openStream(), Map.class);
-        assertEquals(jCas.getDocumentText(), ((Map)map.get("_source")).get("text"));
+        final Map<?, ?> map = om.readValue(url.openStream(), Map.class);
+        assertEquals(jCas.getDocumentText(), ((Map) map.get("_source")).get("text"));
     }
 
     /**
diff --git a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java
index 4cc449a62..153d2714c 100644
--- a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java
+++ b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/cli/JNETApplicationTest.java
@@ -6,7 +6,7 @@
 
 package de.julielab.jcore.ae.jnet.cli;
 
-import org.junit.After;
+import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
 
 import java.io.File;
@@ -32,7 +32,7 @@ public class JNETApplicationTest {
 	
 
-    @After 
+    @AfterEach
     public void deleteModel() {
     	File modelFile = new File(UNITTEST_MODEL_GZ);
 		if (modelFile.exists())
diff --git a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/ConsistencyPreservationTest.java b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/ConsistencyPreservationTest.java
index 3031116d3..f551411fd 100644
--- a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/ConsistencyPreservationTest.java
+++ b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/ConsistencyPreservationTest.java
@@ -18,7 +18,6 @@
 package de.julielab.jcore.ae.jnet.uima;
 
 import de.julielab.jcore.types.*;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIterator;
@@ -28,6 +27,7 @@
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.util.CasCreationUtils;
 import org.apache.uima.util.XMLInputSource;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -35,6 +35,8 @@
 import java.util.Iterator;
 import java.util.TreeSet;
 
+import static org.junit.jupiter.api.Assertions.*;
+
 /**
  * Please note that in the original test there were "GoodEntityMentions" and
  * "BadEntityMentions". Both types were only used for this test which caused
@@ -45,7 +47,7 @@
  * @author faessler
  * 
  */
-public class ConsistencyPreservationTest extends TestCase {
+public class ConsistencyPreservationTest  {
 
 	private static final Logger LOGGER = LoggerFactory.getLogger(ConsistencyPreservationTest.class);
 
@@ -133,12 +135,14 @@ private void initJCas4DoAbbreviationBased(final JCas jcas) throws Exception {
 		e5.addToIndexes();
 	}
 
+	@Test
 	public void testConsistencyPreservation() throws Exception {
 		final String modeString = ConsistencyPreservation.MODE_STRING + "," + ConsistencyPreservation.MODE_ACRO2FULL
 				+ "," + ConsistencyPreservation.MODE_FULL2ACRO;
 		new ConsistencyPreservation(modeString);
 	}
 
+	@Test
 	public void testAcroMatch() throws Exception {
 		final String modeString = ConsistencyPreservation.MODE_FULL2ACRO + "," + ConsistencyPreservation.MODE_ACRO2FULL;
 
@@ -186,6 +190,7 @@ public void testAcroMatch() throws Exception {
 
 	}
 
+	@Test
 	public void testStringMatch() throws Exception {
 		LOGGER.info("testStringMatch() -  starting...");
 		final CAS cas = CasCreationUtils.createCas(
@@ -229,6 +234,7 @@ public void testStringMatch() throws Exception {
 		assertTrue(allOK);
 	}
 
+	@Test
 	public void testStringMatch2() throws Exception {
 		// This test checks whether the consistence preservation algorithm
 		// correctly detects already existing annotations even when there are
@@ -269,6 +275,7 @@ public void testStringMatch2() throws Exception {
 		assertEquals(3, count);
 	}
 
+	@Test
 	public void testStringMatch3() throws Exception {
 		// This test checks whether the consistence preservation algorithm
 		// correctly detects already existing annotations even when there are
@@ -309,6 +316,7 @@ public void testStringMatch3() throws Exception {
 		assertEquals(5, count);
 	}
 
+	@Test
 	public void testStringMatchTokenBoundaries() throws Exception {
 		// This test checks whether the consistency preservation algorithm
 		// sticks to token boundaries if the respective mode is on
@@ -350,6 +358,7 @@ public void testStringMatchTokenBoundaries() throws Exception {
 		assertEquals(1, count);
 	}
 
+	@Test
 	public void testStringMatchTokenBoundaries2() throws Exception {
 		// Test for multi token entities
 		String text = "This is BCA alpha. But we haven't annotated BCA alpha in all cases. Also not some other BCA.";
@@ -430,7 +439,8 @@ else if (g.getSpecificType().equals("type2"))
 		}
 		assertEquals(2, oCount);
 	}
-	
+
+	@Test
 	public void testStringMatchTokenBoundaries3() throws Exception {
 		// Test for multi token entities with correct prefix but wrong ending
 		String text = "Group 1. And Group B.";
diff --git a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.java b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.java
index 44dd4e90d..e2143f3e9 100644
--- a/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.java
+++ b/jcore-jnet-ae/src/test/java/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.java
@@ -20,7 +20,6 @@
 import de.julielab.jcore.types.*;
 import de.julielab.jcore.utility.index.JCoReCoverIndex;
 import de.julielab.jnet.tagger.Unit;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -37,6 +36,7 @@
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
 import org.apache.uima.util.XMLParser;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.xml.sax.SAXException;
@@ -52,7 +52,9 @@
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
-public class EntityAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class EntityAnnotatorTest  {
 
 	/**
 	 * Logger for this class
@@ -66,12 +68,8 @@ public class EntityAnnotatorTest extends TestCase {
 	private static final String ENTITY_ANNOTATOR_DESC = PREFIX+"EntityAnnotatorTest.xml";
 	private static final String NEGATIVE_LIST = PREFIX+"negativeList";
 
-	@Override
-	protected void setUp() throws Exception {
-		super.setUp();
-		// PropertyConfigurator.configure("src/test/java/log4j.properties");
-	}
 
+	@Test
 	public void testIgnoreLabel() throws ResourceInitializationException {
 
 		// load AE
@@ -124,6 +122,7 @@ public void testIgnoreLabel() throws ResourceInitializationException {
 	/**
 	 * test whether Annotator can be initialized properly from given descriptor
 	 */
+	@Test
 	public void testInitialize() {
 		LOGGER.debug("testInitialize()");
 		AnalysisEngine entityAnnotator = null;
@@ -150,6 +149,7 @@ public void testInitialize() {
 	 * test whether process method runs successfully. Output must be checked by
 	 * a human manually
 	 */
+	@Test
 	public void testProcess() throws InvalidXMLException, ResourceInitializationException, IOException, SAXException,
 			CASException, AnalysisEngineProcessException {
 		LOGGER.debug("testProcess()");
@@ -176,6 +176,7 @@ public void testProcess() throws InvalidXMLException, ResourceInitializationExce
 	 * unit sentence and removing duplicates. Prediction is "simulated" (labels
 	 * are set).
 	 */
+	@Test
 	public void testSimulatedProcess() throws IllegalAccessException, NoSuchFieldException,
 			ResourceInitializationException, InvalidXMLException, IOException, CASException, SAXException {
 		LOGGER.debug("testCreateUnitSentence() - starting");
@@ -280,6 +281,7 @@ else if (unit.getRep().equals("ceta"))
 	 * @throws IllegalAccessException
 	 * @throws IllegalArgumentException
 	 */
+	@Test
 	public void testWriteToCAS() throws SecurityException, NoSuchFieldException, ResourceInitializationException,
 			InvalidXMLException, IOException, CASException, IllegalArgumentException, IllegalAccessException {
 		LOGGER.debug("testWriteToCAS()");
diff --git a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipeTest.java b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipeTest.java
index 46d4826c1..140945584 100644
--- a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipeTest.java
+++ b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipeTest.java
@@ -17,19 +17,22 @@
 
 package de.julielab.jcore.ae.jtbd;
 
-import junit.framework.TestCase;
+import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.util.ArrayList;
 
-public class Sentence2TokenPipeTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class Sentence2TokenPipeTest  {
 
 	private static final Logger LOGGER = LoggerFactory
 			.getLogger(Sentence2TokenPipeTest.class);
 
 	private static final String TEST_SENTENCE = "this is   a \t junit -test";
 
+	@Test
 	public void testMakeLabel() {
 		final ArrayList<String> expectedLabels = new ArrayList<String>();
 		expectedLabels.add("P");
@@ -55,6 +58,7 @@ public void testMakeLabel() {
 		assertTrue(allOK);
 	}
 
+	@Test
 	public void testMakeUnits() {
 		final ArrayList<String> expectedUnits = new ArrayList<String>();
 		expectedUnits.add("this");
diff --git a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java
index 37d8571f9..543abf443 100644
--- a/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java
+++ b/jcore-jtbd-ae/src/test/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotatorTest.java
@@ -18,7 +18,6 @@
 
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
-import junit.framework.TestCase;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.jcas.JCas;
@@ -32,7 +31,9 @@
 
 import java.util.Iterator;
 
-public class TokenAnnotatorTest extends TestCase {
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public class TokenAnnotatorTest {
 
 	/**
 	 * Logger for this class

From daaca4c0d6ac468847527d36fd88ea98407d4028 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Jun 2021 14:50:34 +0200
Subject: [PATCH 071/269] Updated the test XMI of the JNET mini app to the new
 output.

Why the output changed is unknown. It seems to be semantically equal, though. The XMI IDs have changed for some reason.
---
 .../de/julielab/jcore/ae/jnet/uima/miniapp_out_template.xmi      | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/miniapp_out_template.xmi

diff --git a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/miniapp_out_template.xmi b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/miniapp_out_template.xmi
deleted file mode 100644
index 029dc8db3..000000000
--- a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/miniapp_out_template.xmi
+++ /dev/null
@@ -1 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore" xmi:version="2.0"><cas:NULL xmi:id="0"/><cas:FSArray xmi:id="8" elements="7465"/><cas:FSArray xmi:id="11" elements="7474"/><cas:FSArray xmi:id="14" elements="7483"/><cas:FSArray xmi:id="17" elements="7492"/><cas:FSArray xmi:id="20" elements="7501"/><cas:FSArray xmi:id="23" elements="7510"/><cas:FSArray xmi:id="26" elements="7519"/><cas:FSArray xmi:id="29" elements="7528"/><cas:FSArray xmi:id="32" elements="7537"/><cas:FSArray xmi:id="35" elements="7546"/><cas:FSArray xmi:id="38" elements="7555"/><cas:FSArray xmi:id="41" elements="7564"/><cas:FSArray xmi:id="44" elements="7573"/><cas:FSArray xmi:id="47" elements="7582"/><cas:FSArray xmi:id="50" elements="7591"/><cas:FSArray xmi:id="53" elements="7600"/><cas:FSArray xmi:id="56" elements="7609"/><cas:FSArray xmi:id="59" elements="7618"/><cas:FSArray xmi:id="62" elements="7627"/><cas:FSArray xmi:id="65" elements="7636"/><cas:FSArray xmi:id="68" elements="7645"/><cas:FSArray xmi:id="71" elements="7654"/><cas:FSArray xmi:id="74" elements="7663"/><cas:FSArray xmi:id="77" elements="7672"/><cas:FSArray xmi:id="80" elements="7681"/><cas:FSArray xmi:id="83" elements="7690"/><cas:FSArray xmi:id="86" elements="7699"/><cas:FSArray xmi:id="89" elements="7708"/><cas:FSArray xmi:id="92" elements="7717"/><cas:FSArray xmi:id="95" elements="7726"/><cas:FSArray xmi:id="98" elements="7735"/><cas:FSArray xmi:id="101" elements="7744"/><cas:FSArray xmi:id="104" elements="7753"/><cas:FSArray xmi:id="107" elements="7762"/><cas:FSArray xmi:id="110" elements="7771"/><cas:FSArray xmi:id="113" elements="7780"/><cas:FSArray xmi:id="116" elements="7789"/><cas:FSArray xmi:id="119" elements="7798"/><cas:FSArray xmi:id="122" elements="7807"/><cas:FSArray xmi:id="125" elements="7816"/><cas:FSArray xmi:id="128" elements="7825"/><cas:FSArray xmi:id="131" elements="7834"/><cas:FSArray xmi:id="134" elements="7843"/><cas:FSArray xmi:id="137" elements="7852"/><cas:FSArray xmi:id="140" elements="7861"/><cas:FSArray xmi:id="143" elements="7870"/><cas:FSArray xmi:id="146" elements="7879"/><cas:FSArray xmi:id="149" elements="7888"/><cas:FSArray xmi:id="152" elements="7897"/><cas:FSArray xmi:id="155" elements="7906"/><cas:FSArray xmi:id="158" elements="7915"/><cas:FSArray xmi:id="161" elements="7924"/><cas:FSArray xmi:id="164" elements="7933"/><cas:FSArray xmi:id="167" elements="7942"/><cas:FSArray xmi:id="170" elements="7951"/><cas:FSArray xmi:id="173" elements="7960"/><cas:FSArray xmi:id="176" elements="7969"/><cas:FSArray xmi:id="179" elements="7978"/><cas:FSArray xmi:id="182" elements="7987"/><cas:FSArray xmi:id="185" elements="7996"/><cas:FSArray xmi:id="188" elements="8005"/><cas:FSArray xmi:id="191" elements="8014"/><cas:FSArray xmi:id="194" elements="8023"/><cas:FSArray xmi:id="197" elements="8032"/><cas:FSArray xmi:id="200" elements="8041"/><cas:FSArray xmi:id="203" elements="8050"/><cas:FSArray xmi:id="206" elements="8059"/><cas:FSArray xmi:id="209" elements="8068"/><cas:FSArray xmi:id="212" elements="8077"/><cas:FSArray xmi:id="215" elements="8086"/><cas:FSArray xmi:id="218" elements="8095"/><cas:FSArray xmi:id="221" elements="8104"/><cas:FSArray xmi:id="224" elements="8113"/><cas:FSArray xmi:id="227" elements="8122"/><cas:FSArray xmi:id="230" elements="8131"/><cas:FSArray xmi:id="233" elements="8140"/><cas:FSArray xmi:id="236" elements="8149"/><cas:FSArray xmi:id="239" elements="8158"/><cas:FSArray xmi:id="242" elements="8167"/><cas:FSArray xmi:id="245" elements="8176"/><cas:FSArray xmi:id="248" elements="8185"/><cas:FSArray xmi:id="251" elements="8194"/><cas:FSArray xmi:id="254" elements="8203"/><cas:FSArray xmi:id="257" elements="8212"/><cas:FSArray xmi:id="260" elements="8221"/><cas:FSArray xmi:id="263" elements="8230"/><cas:FSArray xmi:id="266" elements="8239"/><cas:FSArray xmi:id="269" elements="8248"/><cas:FSArray xmi:id="272" elements="8257"/><cas:FSArray xmi:id="275" elements="8266"/><cas:FSArray xmi:id="278" elements="8275"/><cas:FSArray xmi:id="281" elements="8284"/><cas:FSArray xmi:id="284" elements="8293"/><cas:FSArray xmi:id="287" elements="8302"/><cas:FSArray xmi:id="290" elements="8311"/><cas:FSArray xmi:id="293" elements="8320"/><cas:FSArray xmi:id="296" elements="8329"/><cas:FSArray xmi:id="299" elements="8338"/><cas:FSArray xmi:id="302" elements="8347"/><cas:FSArray xmi:id="305" elements="8356"/><cas:FSArray xmi:id="308" elements="8365"/><cas:FSArray xmi:id="311" elements="8374"/><cas:FSArray xmi:id="314" elements="8383"/><cas:FSArray xmi:id="317" elements="8392"/><cas:FSArray xmi:id="320" elements="8401"/><cas:FSArray xmi:id="323" elements="8410"/><cas:FSArray xmi:id="326" elements="8419"/><cas:FSArray xmi:id="329" elements="8428"/><cas:FSArray xmi:id="332" elements="8437"/><cas:FSArray xmi:id="335" elements="8446"/><cas:FSArray xmi:id="338" elements="8455"/><cas:FSArray xmi:id="341" elements="8464"/><cas:FSArray xmi:id="344" elements="8473"/><cas:FSArray xmi:id="347" elements="8482"/><cas:FSArray xmi:id="350" elements="8491"/><cas:FSArray xmi:id="353" elements="8500"/><cas:FSArray xmi:id="356" elements="8509"/><cas:FSArray xmi:id="359" elements="8518"/><cas:FSArray xmi:id="362" elements="8527"/><cas:FSArray xmi:id="365" elements="8536"/><cas:FSArray xmi:id="368" elements="8545"/><cas:FSArray xmi:id="371" elements="8554"/><cas:FSArray xmi:id="374" elements="8563"/><cas:FSArray xmi:id="377" elements="8572"/><cas:FSArray xmi:id="380" elements="8581"/><cas:FSArray xmi:id="383" elements="8590"/><cas:FSArray xmi:id="386" elements="8599"/><cas:FSArray xmi:id="389" elements="8608"/><cas:FSArray xmi:id="392" elements="8617"/><cas:FSArray xmi:id="395" elements="8626"/><cas:FSArray xmi:id="398" elements="8635"/><cas:FSArray xmi:id="401" elements="8644"/><cas:FSArray xmi:id="404" elements="8653"/><cas:FSArray xmi:id="407" elements="8662"/><cas:FSArray xmi:id="410" elements="8671"/><cas:FSArray xmi:id="413" elements="8680"/><cas:FSArray xmi:id="416" elements="8689"/><cas:FSArray xmi:id="419" elements="8698"/><cas:FSArray xmi:id="422" elements="8707"/><cas:FSArray xmi:id="425" elements="8716"/><cas:FSArray xmi:id="428" elements="8725"/><cas:FSArray xmi:id="431" elements="8734"/><cas:FSArray xmi:id="434" elements="8743"/><cas:FSArray xmi:id="437" elements="8752"/><cas:FSArray xmi:id="440" elements="8761"/><cas:FSArray xmi:id="443" elements="8770"/><cas:FSArray xmi:id="446" elements="8779"/><cas:FSArray xmi:id="449" elements="8788"/><cas:FSArray xmi:id="452" elements="8797"/><cas:FSArray xmi:id="455" elements="8806"/><cas:FSArray xmi:id="458" elements="8815"/><cas:FSArray xmi:id="461" elements="8824"/><cas:FSArray xmi:id="464" elements="8833"/><cas:FSArray xmi:id="467" elements="8842"/><cas:FSArray xmi:id="470" elements="8851"/><cas:FSArray xmi:id="473" elements="8860"/><cas:FSArray xmi:id="476" elements="8869"/><cas:FSArray xmi:id="479" elements="8878"/><cas:FSArray xmi:id="482" elements="8887"/><cas:FSArray xmi:id="485" elements="8896"/><cas:FSArray xmi:id="488" elements="8905"/><cas:FSArray xmi:id="491" elements="8914"/><cas:FSArray xmi:id="494" elements="8923"/><cas:FSArray xmi:id="497" elements="8932"/><cas:FSArray xmi:id="500" elements="8941"/><cas:FSArray xmi:id="503" elements="8950"/><cas:FSArray xmi:id="506" elements="8959"/><cas:FSArray xmi:id="509" elements="8968"/><cas:FSArray xmi:id="512" elements="8977"/><cas:FSArray xmi:id="515" elements="8986"/><cas:FSArray xmi:id="518" elements="8995"/><cas:FSArray xmi:id="521" elements="9004"/><cas:FSArray xmi:id="524" elements="9013"/><cas:FSArray xmi:id="527" elements="9022"/><cas:FSArray xmi:id="530" elements="9031"/><cas:FSArray xmi:id="533" elements="9040"/><cas:FSArray xmi:id="536" elements="9049"/><cas:FSArray xmi:id="539" elements="9058"/><cas:FSArray xmi:id="542" elements="9067"/><cas:FSArray xmi:id="545" elements="9076"/><cas:FSArray xmi:id="548" elements="9085"/><cas:FSArray xmi:id="551" elements="9094"/><cas:FSArray xmi:id="554" elements="9103"/><cas:FSArray xmi:id="557" elements="9112"/><cas:FSArray xmi:id="560" elements="9121"/><cas:FSArray xmi:id="563" elements="9130"/><cas:FSArray xmi:id="566" elements="9139"/><cas:FSArray xmi:id="569" elements="9148"/><cas:FSArray xmi:id="572" elements="9157"/><cas:FSArray xmi:id="575" elements="9166"/><cas:FSArray xmi:id="578" elements="9175"/><cas:FSArray xmi:id="581" elements="9184"/><cas:FSArray xmi:id="584" elements="9193"/><cas:FSArray xmi:id="587" elements="9202"/><cas:FSArray xmi:id="590" elements="9211"/><cas:FSArray xmi:id="593" elements="9220"/><cas:FSArray xmi:id="596" elements="9229"/><cas:FSArray xmi:id="599" elements="9238"/><cas:FSArray xmi:id="602" elements="9247"/><cas:FSArray xmi:id="605" elements="9256"/><cas:FSArray xmi:id="608" elements="9265"/><cas:FSArray xmi:id="611" elements="9274"/><cas:FSArray xmi:id="614" elements="9283"/><cas:FSArray xmi:id="617" elements="9292"/><cas:FSArray xmi:id="620" elements="9301"/><cas:FSArray xmi:id="623" elements="9310"/><cas:FSArray xmi:id="626" elements="9319"/><cas:FSArray xmi:id="629" elements="9328"/><cas:FSArray xmi:id="632" elements="9337"/><cas:FSArray xmi:id="635" elements="9346"/><cas:FSArray xmi:id="638" elements="9355"/><cas:FSArray xmi:id="641" elements="9364"/><cas:FSArray xmi:id="644" elements="9373"/><cas:FSArray xmi:id="647" elements="9382"/><cas:FSArray xmi:id="650" elements="9391"/><cas:FSArray xmi:id="653" elements="9400"/><cas:FSArray xmi:id="656" elements="9409"/><cas:FSArray xmi:id="659" elements="9418"/><cas:FSArray xmi:id="662" elements="9427"/><cas:FSArray xmi:id="665" elements="9436"/><cas:FSArray xmi:id="668" elements="9445"/><cas:FSArray xmi:id="671" elements="9454"/><cas:FSArray xmi:id="674" elements="9463"/><cas:FSArray xmi:id="677" elements="9472"/><cas:FSArray xmi:id="680" elements="9481"/><cas:FSArray xmi:id="683" elements="9490"/><cas:FSArray xmi:id="686" elements="9499"/><cas:FSArray xmi:id="689" elements="9508"/><cas:FSArray xmi:id="692" elements="9517"/><cas:FSArray xmi:id="695" elements="9526"/><cas:FSArray xmi:id="698" elements="9535"/><cas:FSArray xmi:id="701" elements="9544"/><cas:FSArray xmi:id="704" elements="9553"/><cas:FSArray xmi:id="707" elements="9562"/><cas:FSArray xmi:id="710" elements="9571"/><cas:FSArray xmi:id="713" elements="9580"/><cas:FSArray xmi:id="716" elements="9589"/><cas:FSArray xmi:id="719" elements="9598"/><cas:FSArray xmi:id="722" elements="9607"/><cas:FSArray xmi:id="725" elements="9616"/><cas:FSArray xmi:id="728" elements="9625"/><cas:FSArray xmi:id="731" elements="9634"/><cas:FSArray xmi:id="734" elements="9643"/><cas:FSArray xmi:id="737" elements="9652"/><cas:FSArray xmi:id="740" elements="9661"/><cas:FSArray xmi:id="743" elements="9670"/><cas:FSArray xmi:id="746" elements="9679"/><cas:FSArray xmi:id="749" elements="9688"/><cas:FSArray xmi:id="752" elements="9697"/><cas:FSArray xmi:id="755" elements="9706"/><cas:FSArray xmi:id="758" elements="9715"/><cas:FSArray xmi:id="761" elements="9724"/><cas:FSArray xmi:id="764" elements="9733"/><cas:FSArray xmi:id="767" elements="9742"/><cas:FSArray xmi:id="770" elements="9751"/><cas:FSArray xmi:id="773" elements="9760"/><cas:FSArray xmi:id="776" elements="9769"/><cas:FSArray xmi:id="779" elements="9778"/><cas:FSArray xmi:id="782" elements="9787"/><cas:FSArray xmi:id="785" elements="9796"/><cas:FSArray xmi:id="788" elements="9805"/><cas:FSArray xmi:id="791" elements="9814"/><cas:FSArray xmi:id="794" elements="9823"/><cas:FSArray xmi:id="797" elements="9832"/><cas:FSArray xmi:id="800" elements="9841"/><cas:FSArray xmi:id="803" elements="9850"/><cas:FSArray xmi:id="806" elements="9859"/><cas:FSArray xmi:id="809" elements="9868"/><cas:FSArray xmi:id="812" elements="9877"/><cas:FSArray xmi:id="815" elements="9886"/><cas:FSArray xmi:id="818" elements="9895"/><cas:FSArray xmi:id="821" elements="9904"/><cas:FSArray xmi:id="824" elements="9913"/><cas:FSArray xmi:id="827" elements="9922"/><cas:FSArray xmi:id="830" elements="9931"/><cas:FSArray xmi:id="833" elements="9940"/><cas:FSArray xmi:id="836" elements="9949"/><cas:FSArray xmi:id="839" elements="9958"/><cas:FSArray xmi:id="842" elements="9967"/><cas:FSArray xmi:id="845" elements="9976"/><cas:FSArray xmi:id="848" elements="9985"/><cas:FSArray xmi:id="851" elements="9994"/><cas:FSArray xmi:id="854" elements="10003"/><cas:FSArray xmi:id="857" elements="10012"/><cas:FSArray xmi:id="860" elements="10021"/><cas:FSArray xmi:id="863" elements="10030"/><cas:FSArray xmi:id="866" elements="10039"/><cas:FSArray xmi:id="869" elements="10048"/><cas:FSArray xmi:id="872" elements="10057"/><cas:FSArray xmi:id="875" elements="10066"/><cas:FSArray xmi:id="878" elements="10075"/><cas:FSArray xmi:id="881" elements="10084"/><cas:FSArray xmi:id="884" elements="10093"/><cas:FSArray xmi:id="887" elements="10102"/><cas:FSArray xmi:id="890" elements="10111"/><cas:FSArray xmi:id="893" elements="10120"/><cas:FSArray xmi:id="896" elements="10129"/><cas:FSArray xmi:id="899" elements="10138"/><cas:FSArray xmi:id="902" elements="10147"/><cas:FSArray xmi:id="905" elements="10156"/><cas:FSArray xmi:id="908" elements="10165"/><cas:FSArray xmi:id="911" elements="10174"/><cas:FSArray xmi:id="914" elements="10183"/><cas:FSArray xmi:id="917" elements="10192"/><cas:FSArray xmi:id="920" elements="10201"/><cas:FSArray xmi:id="923" elements="10210"/><cas:FSArray xmi:id="926" elements="10219"/><cas:FSArray xmi:id="929" elements="10228"/><cas:FSArray xmi:id="932" elements="10237"/><cas:FSArray xmi:id="935" elements="10246"/><cas:FSArray xmi:id="938" elements="10255"/><cas:FSArray xmi:id="941" elements="10264"/><cas:FSArray xmi:id="944" elements="10273"/><cas:FSArray xmi:id="947" elements="10282"/><cas:FSArray xmi:id="950" elements="10291"/><cas:FSArray xmi:id="953" elements="10300"/><cas:FSArray xmi:id="956" elements="10309"/><cas:FSArray xmi:id="959" elements="10318"/><cas:FSArray xmi:id="962" elements="10327"/><cas:FSArray xmi:id="965" elements="10336"/><cas:FSArray xmi:id="968" elements="10345"/><cas:FSArray xmi:id="971" elements="10354"/><cas:FSArray xmi:id="974" elements="10363"/><cas:FSArray xmi:id="977" elements="10372"/><cas:FSArray xmi:id="980" elements="10381"/><cas:FSArray xmi:id="983" elements="10390"/><cas:FSArray xmi:id="986" elements="10399"/><cas:FSArray xmi:id="989" elements="10408"/><cas:FSArray xmi:id="992" elements="10417"/><cas:FSArray xmi:id="995" elements="10426"/><cas:FSArray xmi:id="998" elements="10435"/><cas:FSArray xmi:id="1001" elements="10444"/><cas:FSArray xmi:id="1004" elements="10453"/><cas:FSArray xmi:id="1007" elements="10462"/><cas:FSArray xmi:id="1010" elements="10471"/><cas:FSArray xmi:id="1013" elements="10480"/><cas:FSArray xmi:id="1016" elements="10489"/><cas:FSArray xmi:id="1019" elements="10498"/><cas:FSArray xmi:id="1022" elements="10507"/><cas:FSArray xmi:id="1025" elements="10516"/><cas:FSArray xmi:id="1028" elements="10525"/><cas:FSArray xmi:id="1031" elements="10534"/><cas:FSArray xmi:id="1034" elements="10543"/><cas:FSArray xmi:id="1037" elements="10552"/><cas:FSArray xmi:id="1040" elements="10561"/><cas:FSArray xmi:id="1043" elements="10570"/><cas:FSArray xmi:id="1046" elements="10579"/><cas:FSArray xmi:id="1049" elements="10588"/><cas:FSArray xmi:id="1052" elements="10597"/><cas:FSArray xmi:id="1055" elements="10606"/><cas:FSArray xmi:id="1058" elements="10615"/><cas:FSArray xmi:id="1061" elements="10624"/><cas:FSArray xmi:id="1064" elements="10633"/><cas:FSArray xmi:id="1067" elements="10642"/><cas:FSArray xmi:id="1070" elements="10651"/><cas:FSArray xmi:id="1073" elements="10660"/><cas:FSArray xmi:id="1076" elements="10669"/><cas:FSArray xmi:id="1079" elements="10678"/><cas:FSArray xmi:id="1082" elements="10687"/><cas:FSArray xmi:id="1085" elements="10696"/><cas:FSArray xmi:id="1088" elements="10705"/><cas:FSArray xmi:id="1091" elements="10714"/><cas:FSArray xmi:id="1094" elements="10723"/><cas:FSArray xmi:id="1097" elements="10732"/><cas:FSArray xmi:id="1100" elements="10741"/><cas:FSArray xmi:id="1103" elements="10750"/><cas:FSArray xmi:id="1106" elements="10759"/><cas:FSArray xmi:id="1109" elements="10768"/><cas:FSArray xmi:id="1112" elements="10777"/><cas:FSArray xmi:id="1115" elements="10786"/><cas:FSArray xmi:id="1118" elements="10795"/><cas:FSArray xmi:id="1121" elements="10804"/><cas:FSArray xmi:id="1124" elements="10813"/><cas:FSArray xmi:id="1127" elements="10822"/><cas:FSArray xmi:id="1130" elements="10831"/><cas:FSArray xmi:id="1133" elements="10840"/><cas:FSArray xmi:id="1136" elements="10849"/><cas:FSArray xmi:id="1139" elements="10858"/><cas:FSArray xmi:id="1142" elements="10867"/><cas:FSArray xmi:id="1145" elements="10876"/><cas:FSArray xmi:id="1148" elements="10885"/><cas:FSArray xmi:id="1151" elements="10894"/><cas:FSArray xmi:id="1154" elements="10903"/><cas:FSArray xmi:id="1157" elements="10912"/><cas:FSArray xmi:id="1160" elements="10921"/><cas:FSArray xmi:id="1163" elements="10930"/><cas:FSArray xmi:id="1166" elements="10939"/><cas:FSArray xmi:id="1169" elements="10948"/><cas:FSArray xmi:id="1172" elements="10957"/><cas:FSArray xmi:id="1175" elements="10966"/><tcas:DocumentAnnotation xmi:id="1178" sofa="1" begin="0" end="1837" language="x-unspecified"/><types:AbbreviationLongform xmi:id="1183" sofa="1" begin="16" end="29"/><types:AbbreviationLongform xmi:id="1190" sofa="1" begin="70" end="79"/><types:AbbreviationLongform xmi:id="1197" sofa="1" begin="294" end="304"/><types:AbbreviationLongform xmi:id="1204" sofa="1" begin="613" end="635"/><types:AbbreviationLongform xmi:id="1211" sofa="1" begin="677" end="697"/><types:AbbreviationLongform xmi:id="1218" sofa="1" begin="776" end="845"/><types:Token xmi:id="1225" sofa="1" begin="0" end="4" componentId="JULIE Token Boundary Detector" posTag="8"/><types:Token xmi:id="1241" sofa="1" begin="4" end="5" componentId="JULIE Token Boundary Detector" posTag="11"/><types:Token xmi:id="1257" sofa="1" begin="5" end="10" componentId="JULIE Token Boundary Detector" posTag="14"/><types:Token xmi:id="1273" sofa="1" begin="11" end="12" componentId="JULIE Token Boundary Detector" posTag="17"/><types:Token xmi:id="1289" sofa="1" begin="12" end="14" componentId="JULIE Token Boundary Detector" posTag="20"/><types:Token xmi:id="1305" sofa="1" begin="14" end="15" componentId="JULIE Token Boundary Detector" posTag="23"/><types:Token xmi:id="1321" sofa="1" begin="16" end="29" componentId="JULIE Token Boundary Detector" posTag="26"/><types:Token xmi:id="1337" sofa="1" begin="30" end="31" componentId="JULIE Token Boundary Detector" posTag="29"/><types:Token xmi:id="1353" sofa="1" begin="31" end="34" componentId="JULIE Token Boundary Detector" posTag="32"/><types:Token xmi:id="1369" sofa="1" begin="34" end="35" componentId="JULIE Token Boundary Detector" posTag="35"/><types:Token xmi:id="1385" sofa="1" begin="36" end="38" componentId="JULIE Token Boundary Detector" posTag="38"/><types:Token xmi:id="1401" sofa="1" begin="39" end="44" componentId="JULIE Token Boundary Detector" posTag="41"/><types:Token xmi:id="1417" sofa="1" begin="45" end="47" componentId="JULIE Token Boundary Detector" posTag="44"/><types:Token xmi:id="1433" sofa="1" begin="48" end="55" componentId="JULIE Token Boundary Detector" posTag="47"/><types:Token xmi:id="1449" sofa="1" begin="56" end="60" componentId="JULIE Token Boundary Detector" posTag="50"/><types:Token xmi:id="1465" sofa="1" begin="61" end="64" componentId="JULIE Token Boundary Detector" posTag="53"/><types:Token xmi:id="1481" sofa="1" begin="64" end="65" componentId="JULIE Token Boundary Detector" posTag="56"/><types:Token xmi:id="1497" sofa="1" begin="65" end="69" componentId="JULIE Token Boundary Detector" posTag="59"/><types:Token xmi:id="1513" sofa="1" begin="70" end="79" componentId="JULIE Token Boundary Detector" posTag="62"/><types:Token xmi:id="1529" sofa="1" begin="80" end="81" componentId="JULIE Token Boundary Detector" posTag="65"/><types:Token xmi:id="1545" sofa="1" begin="81" end="84" componentId="JULIE Token Boundary Detector" posTag="68"/><types:Token xmi:id="1561" sofa="1" begin="84" end="85" componentId="JULIE Token Boundary Detector" posTag="71"/><types:Token xmi:id="1577" sofa="1" begin="86" end="88" componentId="JULIE Token Boundary Detector" posTag="74"/><types:Token xmi:id="1593" sofa="1" begin="89" end="92" componentId="JULIE Token Boundary Detector" posTag="77"/><types:Token xmi:id="1609" sofa="1" begin="93" end="95" componentId="JULIE Token Boundary Detector" posTag="80"/><types:Token xmi:id="1625" sofa="1" begin="96" end="98" componentId="JULIE Token Boundary Detector" posTag="83"/><types:Token xmi:id="1641" sofa="1" begin="99" end="108" componentId="JULIE Token Boundary Detector" posTag="86"/><types:Token xmi:id="1657" sofa="1" begin="109" end="113" componentId="JULIE Token Boundary Detector" posTag="89"/><types:Token xmi:id="1673" sofa="1" begin="114" end="116" componentId="JULIE Token Boundary Detector" posTag="92"/><types:Token xmi:id="1689" sofa="1" begin="117" end="119" componentId="JULIE Token Boundary Detector" posTag="95"/><types:Token xmi:id="1705" sofa="1" begin="120" end="124" componentId="JULIE Token Boundary Detector" posTag="98"/><types:Token xmi:id="1721" sofa="1" begin="125" end="127" componentId="JULIE Token Boundary Detector" posTag="101"/><types:Token xmi:id="1737" sofa="1" begin="128" end="139" componentId="JULIE Token Boundary Detector" posTag="104"/><types:Token xmi:id="1753" sofa="1" begin="140" end="148" componentId="JULIE Token Boundary Detector" posTag="107"/><types:Token xmi:id="1769" sofa="1" begin="149" end="152" componentId="JULIE Token Boundary Detector" posTag="110"/><types:Token xmi:id="1785" sofa="1" begin="153" end="156" componentId="JULIE Token Boundary Detector" posTag="113"/><types:Token xmi:id="1801" sofa="1" begin="157" end="164" componentId="JULIE Token Boundary Detector" posTag="116"/><types:Token xmi:id="1817" sofa="1" begin="165" end="174" componentId="JULIE Token Boundary Detector" posTag="119"/><types:Token xmi:id="1833" sofa="1" begin="175" end="177" componentId="JULIE Token Boundary Detector" posTag="122"/><types:Token xmi:id="1849" sofa="1" begin="178" end="183" componentId="JULIE Token Boundary Detector" posTag="125"/><types:Token xmi:id="1865" sofa="1" begin="184" end="200" componentId="JULIE Token Boundary Detector" posTag="128"/><types:Token xmi:id="1881" sofa="1" begin="201" end="206" componentId="JULIE Token Boundary Detector" posTag="131"/><types:Token xmi:id="1897" sofa="1" begin="207" end="216" componentId="JULIE Token Boundary Detector" posTag="134"/><types:Token xmi:id="1913" sofa="1" begin="216" end="217" componentId="JULIE Token Boundary Detector" posTag="137"/><types:Token xmi:id="1929" sofa="1" begin="218" end="225" componentId="JULIE Token Boundary Detector" posTag="140"/><types:Token xmi:id="1945" sofa="1" begin="226" end="227" componentId="JULIE Token Boundary Detector" posTag="143"/><types:Token xmi:id="1961" sofa="1" begin="228" end="233" componentId="JULIE Token Boundary Detector" posTag="146"/><types:Token xmi:id="1977" sofa="1" begin="234" end="237" componentId="JULIE Token Boundary Detector" posTag="149"/><types:Token xmi:id="1993" sofa="1" begin="238" end="246" componentId="JULIE Token Boundary Detector" posTag="152"/><types:Token xmi:id="2009" sofa="1" begin="247" end="251" componentId="JULIE Token Boundary Detector" posTag="155"/><types:Token xmi:id="2025" sofa="1" begin="252" end="253" componentId="JULIE Token Boundary Detector" posTag="158"/><types:Token xmi:id="2041" sofa="1" begin="253" end="257" componentId="JULIE Token Boundary Detector" posTag="161"/><types:Token xmi:id="2057" sofa="1" begin="257" end="258" componentId="JULIE Token Boundary Detector" posTag="164"/><types:Token xmi:id="2073" sofa="1" begin="259" end="262" componentId="JULIE Token Boundary Detector" posTag="167"/><types:Token xmi:id="2089" sofa="1" begin="263" end="265" componentId="JULIE Token Boundary Detector" posTag="170"/><types:Token xmi:id="2105" sofa="1" begin="266" end="268" componentId="JULIE Token Boundary Detector" posTag="173"/><types:Token xmi:id="2121" sofa="1" begin="268" end="269" componentId="JULIE Token Boundary Detector" posTag="176"/><types:Token xmi:id="2137" sofa="1" begin="270" end="275" componentId="JULIE Token Boundary Detector" posTag="179"/><types:Token xmi:id="2153" sofa="1" begin="276" end="282" componentId="JULIE Token Boundary Detector" posTag="182"/><types:Token xmi:id="2169" sofa="1" begin="283" end="293" componentId="JULIE Token Boundary Detector" posTag="185"/><types:Token xmi:id="2185" sofa="1" begin="294" end="304" componentId="JULIE Token Boundary Detector" posTag="188"/><types:Token xmi:id="2201" sofa="1" begin="305" end="306" componentId="JULIE Token Boundary Detector" posTag="191"/><types:Token xmi:id="2217" sofa="1" begin="306" end="309" componentId="JULIE Token Boundary Detector" posTag="194"/><types:Token xmi:id="2233" sofa="1" begin="309" end="310" componentId="JULIE Token Boundary Detector" posTag="197"/><types:Token xmi:id="2249" sofa="1" begin="311" end="325" componentId="JULIE Token Boundary Detector" posTag="200"/><types:Token xmi:id="2265" sofa="1" begin="326" end="330" componentId="JULIE Token Boundary Detector" posTag="203"/><types:Token xmi:id="2281" sofa="1" begin="331" end="339" componentId="JULIE Token Boundary Detector" posTag="206"/><types:Token xmi:id="2297" sofa="1" begin="340" end="346" componentId="JULIE Token Boundary Detector" posTag="209"/><types:Token xmi:id="2313" sofa="1" begin="346" end="347" componentId="JULIE Token Boundary Detector" posTag="212"/><types:Token xmi:id="2329" sofa="1" begin="347" end="359" componentId="JULIE Token Boundary Detector" posTag="215"/><types:Token xmi:id="2345" sofa="1" begin="360" end="362" componentId="JULIE Token Boundary Detector" posTag="218"/><types:Token xmi:id="2361" sofa="1" begin="363" end="370" componentId="JULIE Token Boundary Detector" posTag="221"/><types:Token xmi:id="2377" sofa="1" begin="370" end="371" componentId="JULIE Token Boundary Detector" posTag="224"/><types:Token xmi:id="2393" sofa="1" begin="372" end="376" componentId="JULIE Token Boundary Detector" posTag="227"/><types:Token xmi:id="2409" sofa="1" begin="377" end="387" componentId="JULIE Token Boundary Detector" posTag="230"/><types:Token xmi:id="2425" sofa="1" begin="387" end="388" componentId="JULIE Token Boundary Detector" posTag="233"/><types:Token xmi:id="2441" sofa="1" begin="389" end="392" componentId="JULIE Token Boundary Detector" posTag="236"/><types:Token xmi:id="2457" sofa="1" begin="393" end="395" componentId="JULIE Token Boundary Detector" posTag="239"/><types:Token xmi:id="2473" sofa="1" begin="395" end="396" componentId="JULIE Token Boundary Detector" posTag="242"/><types:Token xmi:id="2489" sofa="1" begin="396" end="399" componentId="JULIE Token Boundary Detector" posTag="245"/><types:Token xmi:id="2505" sofa="1" begin="399" end="400" componentId="JULIE Token Boundary Detector" posTag="248"/><types:Token xmi:id="2521" sofa="1" begin="400" end="406" componentId="JULIE Token Boundary Detector" posTag="251"/><types:Token xmi:id="2537" sofa="1" begin="406" end="407" componentId="JULIE Token Boundary Detector" posTag="254"/><types:Token xmi:id="2553" sofa="1" begin="408" end="417" componentId="JULIE Token Boundary Detector" posTag="257"/><types:Token xmi:id="2569" sofa="1" begin="418" end="433" componentId="JULIE Token Boundary Detector" posTag="260"/><types:Token xmi:id="2585" sofa="1" begin="434" end="439" componentId="JULIE Token Boundary Detector" posTag="263"/><types:Token xmi:id="2601" sofa="1" begin="440" end="448" componentId="JULIE Token Boundary Detector" posTag="266"/><types:Token xmi:id="2617" sofa="1" begin="449" end="452" componentId="JULIE Token Boundary Detector" posTag="269"/><types:Token xmi:id="2633" sofa="1" begin="453" end="459" componentId="JULIE Token Boundary Detector" posTag="272"/><types:Token xmi:id="2649" sofa="1" begin="459" end="460" componentId="JULIE Token Boundary Detector" posTag="275"/><types:Token xmi:id="2665" sofa="1" begin="460" end="465" componentId="JULIE Token Boundary Detector" posTag="278"/><types:Token xmi:id="2681" sofa="1" begin="466" end="472" componentId="JULIE Token Boundary Detector" posTag="281"/><types:Token xmi:id="2697" sofa="1" begin="473" end="476" componentId="JULIE Token Boundary Detector" posTag="284"/><types:Token xmi:id="2713" sofa="1" begin="477" end="491" componentId="JULIE Token Boundary Detector" posTag="287"/><types:Token xmi:id="2729" sofa="1" begin="491" end="492" componentId="JULIE Token Boundary Detector" posTag="290"/><types:Token xmi:id="2745" sofa="1" begin="493" end="499" componentId="JULIE Token Boundary Detector" posTag="293"/><types:Token xmi:id="2761" sofa="1" begin="499" end="500" componentId="JULIE Token Boundary Detector" posTag="296"/><types:Token xmi:id="2777" sofa="1" begin="501" end="504" componentId="JULIE Token Boundary Detector" posTag="299"/><types:Token xmi:id="2793" sofa="1" begin="505" end="517" componentId="JULIE Token Boundary Detector" posTag="302"/><types:Token xmi:id="2809" sofa="1" begin="518" end="520" componentId="JULIE Token Boundary Detector" posTag="305"/><types:Token xmi:id="2825" sofa="1" begin="521" end="524" componentId="JULIE Token Boundary Detector" posTag="308"/><types:Token xmi:id="2841" sofa="1" begin="525" end="527" componentId="JULIE Token Boundary Detector" posTag="311"/><types:Token xmi:id="2857" sofa="1" begin="528" end="529" componentId="JULIE Token Boundary Detector" posTag="314"/><types:Token xmi:id="2873" sofa="1" begin="529" end="530" componentId="JULIE Token Boundary Detector" posTag="317"/><types:Token xmi:id="2889" sofa="1" begin="530" end="533" componentId="JULIE Token Boundary Detector" posTag="320"/><types:Token xmi:id="2905" sofa="1" begin="534" end="536" componentId="JULIE Token Boundary Detector" posTag="323"/><types:Token xmi:id="2921" sofa="1" begin="537" end="539" componentId="JULIE Token Boundary Detector" posTag="326"/><types:Token xmi:id="2937" sofa="1" begin="540" end="547" componentId="JULIE Token Boundary Detector" posTag="329"/><types:Token xmi:id="2953" sofa="1" begin="548" end="552" componentId="JULIE Token Boundary Detector" posTag="332"/><types:Token xmi:id="2969" sofa="1" begin="553" end="559" componentId="JULIE Token Boundary Detector" posTag="335"/><types:Token xmi:id="2985" sofa="1" begin="560" end="563" componentId="JULIE Token Boundary Detector" posTag="338"/><types:Token xmi:id="3001" sofa="1" begin="564" end="566" componentId="JULIE Token Boundary Detector" posTag="341"/><types:Token xmi:id="3017" sofa="1" begin="567" end="569" componentId="JULIE Token Boundary Detector" posTag="344"/><types:Token xmi:id="3033" sofa="1" begin="570" end="573" componentId="JULIE Token Boundary Detector" posTag="347"/><types:Token xmi:id="3049" sofa="1" begin="574" end="576" componentId="JULIE Token Boundary Detector" posTag="350"/><types:Token xmi:id="3065" sofa="1" begin="577" end="579" componentId="JULIE Token Boundary Detector" posTag="353"/><types:Token xmi:id="3081" sofa="1" begin="580" end="583" componentId="JULIE Token Boundary Detector" posTag="356"/><types:Token xmi:id="3097" sofa="1" begin="584" end="586" componentId="JULIE Token Boundary Detector" posTag="359"/><types:Token xmi:id="3113" sofa="1" begin="587" end="589" componentId="JULIE Token Boundary Detector" posTag="362"/><types:Token xmi:id="3129" sofa="1" begin="590" end="592" componentId="JULIE Token Boundary Detector" posTag="365"/><types:Token xmi:id="3145" sofa="1" begin="593" end="600" componentId="JULIE Token Boundary Detector" posTag="368"/><types:Token xmi:id="3161" sofa="1" begin="601" end="611" componentId="JULIE Token Boundary Detector" posTag="371"/><types:Token xmi:id="3177" sofa="1" begin="611" end="612" componentId="JULIE Token Boundary Detector" posTag="374"/><types:Token xmi:id="3193" sofa="1" begin="613" end="622" componentId="JULIE Token Boundary Detector" posTag="377"/><types:Token xmi:id="3209" sofa="1" begin="623" end="628" componentId="JULIE Token Boundary Detector" posTag="380"/><types:Token xmi:id="3225" sofa="1" begin="628" end="629" componentId="JULIE Token Boundary Detector" posTag="383"/><types:Token xmi:id="3241" sofa="1" begin="629" end="635" componentId="JULIE Token Boundary Detector" posTag="386"/><types:Token xmi:id="3257" sofa="1" begin="636" end="637" componentId="JULIE Token Boundary Detector" posTag="389"/><types:Token xmi:id="3273" sofa="1" begin="637" end="640" componentId="JULIE Token Boundary Detector" posTag="392"/><types:Token xmi:id="3289" sofa="1" begin="640" end="641" componentId="JULIE Token Boundary Detector" posTag="395"/><types:Token xmi:id="3305" sofa="1" begin="642" end="646" componentId="JULIE Token Boundary Detector" posTag="398"/><types:Token xmi:id="3321" sofa="1" begin="647" end="653" componentId="JULIE Token Boundary Detector" posTag="401"/><types:Token xmi:id="3337" sofa="1" begin="654" end="657" componentId="JULIE Token Boundary Detector" posTag="404"/><types:Token xmi:id="3353" sofa="1" begin="658" end="661" componentId="JULIE Token Boundary Detector" posTag="407"/><types:Token xmi:id="3369" sofa="1" begin="662" end="672" componentId="JULIE Token Boundary Detector" posTag="410"/><types:Token xmi:id="3385" sofa="1" begin="673" end="675" componentId="JULIE Token Boundary Detector" posTag="413"/><types:Token xmi:id="3401" sofa="1" begin="675" end="676" componentId="JULIE Token Boundary Detector" posTag="416"/><types:Token xmi:id="3417" sofa="1" begin="677" end="687" componentId="JULIE Token Boundary Detector" posTag="419"/><types:Token xmi:id="3433" sofa="1" begin="688" end="697" componentId="JULIE Token Boundary Detector" posTag="422"/><types:Token xmi:id="3449" sofa="1" begin="698" end="699" componentId="JULIE Token Boundary Detector" posTag="425"/><types:Token xmi:id="3465" sofa="1" begin="699" end="702" componentId="JULIE Token Boundary Detector" posTag="428"/><types:Token xmi:id="3481" sofa="1" begin="702" end="703" componentId="JULIE Token Boundary Detector" posTag="431"/><types:Token xmi:id="3497" sofa="1" begin="704" end="708" componentId="JULIE Token Boundary Detector" posTag="434"/><types:Token xmi:id="3513" sofa="1" begin="709" end="718" componentId="JULIE Token Boundary Detector" posTag="437"/><types:Token xmi:id="3529" sofa="1" begin="719" end="722" componentId="JULIE Token Boundary Detector" posTag="440"/><types:Token xmi:id="3545" sofa="1" begin="723" end="729" componentId="JULIE Token Boundary Detector" posTag="443"/><types:Token xmi:id="3561" sofa="1" begin="730" end="733" componentId="JULIE Token Boundary Detector" posTag="446"/><types:Token xmi:id="3577" sofa="1" begin="734" end="741" componentId="JULIE Token Boundary Detector" posTag="449"/><types:Token xmi:id="3593" sofa="1" begin="742" end="748" componentId="JULIE Token Boundary Detector" posTag="452"/><types:Token xmi:id="3609" sofa="1" begin="749" end="763" componentId="JULIE Token Boundary Detector" posTag="455"/><types:Token xmi:id="3625" sofa="1" begin="764" end="765" componentId="JULIE Token Boundary Detector" posTag="458"/><types:Token xmi:id="3641" sofa="1" begin="765" end="769" componentId="JULIE Token Boundary Detector" posTag="461"/><types:Token xmi:id="3657" sofa="1" begin="769" end="770" componentId="JULIE Token Boundary Detector" posTag="464"/><types:Token xmi:id="3673" sofa="1" begin="770" end="771" componentId="JULIE Token Boundary Detector" posTag="467"/><types:Token xmi:id="3689" sofa="1" begin="772" end="775" componentId="JULIE Token Boundary Detector" posTag="470"/><types:Token xmi:id="3705" sofa="1" begin="776" end="780" componentId="JULIE Token Boundary Detector" posTag="473"/><types:Token xmi:id="3721" sofa="1" begin="781" end="786" componentId="JULIE Token Boundary Detector" posTag="476"/><types:Token xmi:id="3737" sofa="1" begin="787" end="790" componentId="JULIE Token Boundary Detector" posTag="479"/><types:Token xmi:id="3753" sofa="1" begin="791" end="797" componentId="JULIE Token Boundary Detector" posTag="482"/><types:Token xmi:id="3769" sofa="1" begin="798" end="811" componentId="JULIE Token Boundary Detector" posTag="485"/><types:Token xmi:id="3785" sofa="1" begin="811" end="812" componentId="JULIE Token Boundary Detector" posTag="488"/><types:Token xmi:id="3801" sofa="1" begin="812" end="816" componentId="JULIE Token Boundary Detector" posTag="491"/><types:Token xmi:id="3817" sofa="1" begin="817" end="822" componentId="JULIE Token Boundary Detector" posTag="494"/><types:Token xmi:id="3833" sofa="1" begin="823" end="827" componentId="JULIE Token Boundary Detector" posTag="497"/><types:Token xmi:id="3849" sofa="1" begin="828" end="831" componentId="JULIE Token Boundary Detector" posTag="500"/><types:Token xmi:id="3865" sofa="1" begin="832" end="838" componentId="JULIE Token Boundary Detector" posTag="503"/><types:Token xmi:id="3881" sofa="1" begin="839" end="845" componentId="JULIE Token Boundary Detector" posTag="506"/><types:Token xmi:id="3897" sofa="1" begin="846" end="847" componentId="JULIE Token Boundary Detector" posTag="509"/><types:Token xmi:id="3913" sofa="1" begin="847" end="851" componentId="JULIE Token Boundary Detector" posTag="512"/><types:Token xmi:id="3929" sofa="1" begin="851" end="852" componentId="JULIE Token Boundary Detector" posTag="515"/><types:Token xmi:id="3945" sofa="1" begin="852" end="855" componentId="JULIE Token Boundary Detector" posTag="518"/><types:Token xmi:id="3961" sofa="1" begin="855" end="856" componentId="JULIE Token Boundary Detector" posTag="521"/><types:Token xmi:id="3977" sofa="1" begin="856" end="857" componentId="JULIE Token Boundary Detector" posTag="524"/><types:Token xmi:id="3993" sofa="1" begin="858" end="861" componentId="JULIE Token Boundary Detector" posTag="527"/><types:Token xmi:id="4009" sofa="1" begin="862" end="868" componentId="JULIE Token Boundary Detector" posTag="530"/><types:Token xmi:id="4025" sofa="1" begin="869" end="883" componentId="JULIE Token Boundary Detector" posTag="533"/><types:Token xmi:id="4041" sofa="1" begin="884" end="885" componentId="JULIE Token Boundary Detector" posTag="536"/><types:Token xmi:id="4057" sofa="1" begin="885" end="889" componentId="JULIE Token Boundary Detector" posTag="539"/><types:Token xmi:id="4073" sofa="1" begin="889" end="890" componentId="JULIE Token Boundary Detector" posTag="542"/><types:Token xmi:id="4089" sofa="1" begin="891" end="897" componentId="JULIE Token Boundary Detector" posTag="545"/><types:Token xmi:id="4105" sofa="1" begin="898" end="902" componentId="JULIE Token Boundary Detector" posTag="548"/><types:Token xmi:id="4121" sofa="1" begin="903" end="909" componentId="JULIE Token Boundary Detector" posTag="551"/><types:Token xmi:id="4137" sofa="1" begin="910" end="916" componentId="JULIE Token Boundary Detector" posTag="554"/><types:Token xmi:id="4153" sofa="1" begin="916" end="917" componentId="JULIE Token Boundary Detector" posTag="557"/><types:Token xmi:id="4169" sofa="1" begin="918" end="929" componentId="JULIE Token Boundary Detector" posTag="560"/><types:Token xmi:id="4185" sofa="1" begin="930" end="937" componentId="JULIE Token Boundary Detector" posTag="563"/><types:Token xmi:id="4201" sofa="1" begin="938" end="946" componentId="JULIE Token Boundary Detector" posTag="566"/><types:Token xmi:id="4217" sofa="1" begin="947" end="948" componentId="JULIE Token Boundary Detector" posTag="569"/><types:Token xmi:id="4233" sofa="1" begin="948" end="950" componentId="JULIE Token Boundary Detector" posTag="572"/><types:Token xmi:id="4249" sofa="1" begin="950" end="951" componentId="JULIE Token Boundary Detector" posTag="575"/><types:Token xmi:id="4265" sofa="1" begin="952" end="955" componentId="JULIE Token Boundary Detector" posTag="578"/><types:Token xmi:id="4281" sofa="1" begin="956" end="958" componentId="JULIE Token Boundary Detector" posTag="581"/><types:Token xmi:id="4297" sofa="1" begin="959" end="962" componentId="JULIE Token Boundary Detector" posTag="584"/><types:Token xmi:id="4313" sofa="1" begin="963" end="967" componentId="JULIE Token Boundary Detector" posTag="587"/><types:Token xmi:id="4329" sofa="1" begin="968" end="974" componentId="JULIE Token Boundary Detector" posTag="590"/><types:Token xmi:id="4345" sofa="1" begin="974" end="975" componentId="JULIE Token Boundary Detector" posTag="593"/><types:Token xmi:id="4361" sofa="1" begin="976" end="980" componentId="JULIE Token Boundary Detector" posTag="596"/><types:Token xmi:id="4377" sofa="1" begin="981" end="983" componentId="JULIE Token Boundary Detector" posTag="599"/><types:Token xmi:id="4393" sofa="1" begin="984" end="988" componentId="JULIE Token Boundary Detector" posTag="602"/><types:Token xmi:id="4409" sofa="1" begin="988" end="989" componentId="JULIE Token Boundary Detector" posTag="605"/><types:Token xmi:id="4425" sofa="1" begin="990" end="993" componentId="JULIE Token Boundary Detector" posTag="608"/><types:Token xmi:id="4441" sofa="1" begin="994" end="1002" componentId="JULIE Token Boundary Detector" posTag="611"/><types:Token xmi:id="4457" sofa="1" begin="1003" end="1006" componentId="JULIE Token Boundary Detector" posTag="614"/><types:Token xmi:id="4473" sofa="1" begin="1007" end="1010" componentId="JULIE Token Boundary Detector" posTag="617"/><types:Token xmi:id="4489" sofa="1" begin="1011" end="1017" componentId="JULIE Token Boundary Detector" posTag="620"/><types:Token xmi:id="4505" sofa="1" begin="1018" end="1021" componentId="JULIE Token Boundary Detector" posTag="623"/><types:Token xmi:id="4521" sofa="1" begin="1022" end="1026" componentId="JULIE Token Boundary Detector" posTag="626"/><types:Token xmi:id="4537" sofa="1" begin="1026" end="1027" componentId="JULIE Token Boundary Detector" posTag="629"/><types:Token xmi:id="4553" sofa="1" begin="1027" end="1030" componentId="JULIE Token Boundary Detector" posTag="632"/><types:Token xmi:id="4569" sofa="1" begin="1031" end="1032" componentId="JULIE Token Boundary Detector" posTag="635"/><types:Token xmi:id="4585" sofa="1" begin="1032" end="1035" componentId="JULIE Token Boundary Detector" posTag="638"/><types:Token xmi:id="4601" sofa="1" begin="1036" end="1040" componentId="JULIE Token Boundary Detector" posTag="641"/><types:Token xmi:id="4617" sofa="1" begin="1041" end="1046" componentId="JULIE Token Boundary Detector" posTag="644"/><types:Token xmi:id="4633" sofa="1" begin="1046" end="1047" componentId="JULIE Token Boundary Detector" posTag="647"/><types:Token xmi:id="4649" sofa="1" begin="1048" end="1052" componentId="JULIE Token Boundary Detector" posTag="650"/><types:Token xmi:id="4665" sofa="1" begin="1053" end="1054" componentId="JULIE Token Boundary Detector" posTag="653"/><types:Token xmi:id="4681" sofa="1" begin="1054" end="1056" componentId="JULIE Token Boundary Detector" posTag="656"/><types:Token xmi:id="4697" sofa="1" begin="1056" end="1057" componentId="JULIE Token Boundary Detector" posTag="659"/><types:Token xmi:id="4713" sofa="1" begin="1058" end="1060" componentId="JULIE Token Boundary Detector" posTag="662"/><types:Token xmi:id="4729" sofa="1" begin="1060" end="1061" componentId="JULIE Token Boundary Detector" posTag="665"/><types:Token xmi:id="4745" sofa="1" begin="1062" end="1066" componentId="JULIE Token Boundary Detector" posTag="668"/><types:Token xmi:id="4761" sofa="1" begin="1067" end="1069" componentId="JULIE Token Boundary Detector" posTag="671"/><types:Token xmi:id="4777" sofa="1" begin="1070" end="1074" componentId="JULIE Token Boundary Detector" posTag="674"/><types:Token xmi:id="4793" sofa="1" begin="1074" end="1075" componentId="JULIE Token Boundary Detector" posTag="677"/><types:Token xmi:id="4809" sofa="1" begin="1075" end="1076" componentId="JULIE Token Boundary Detector" posTag="680"/><types:Token xmi:id="4825" sofa="1" begin="1077" end="1080" componentId="JULIE Token Boundary Detector" posTag="683"/><types:Token xmi:id="4841" sofa="1" begin="1081" end="1085" componentId="JULIE Token Boundary Detector" posTag="686"/><types:Token xmi:id="4857" sofa="1" begin="1086" end="1087" componentId="JULIE Token Boundary Detector" posTag="689"/><types:Token xmi:id="4873" sofa="1" begin="1087" end="1091" componentId="JULIE Token Boundary Detector" posTag="692"/><types:Token xmi:id="4889" sofa="1" begin="1092" end="1093" componentId="JULIE Token Boundary Detector" posTag="695"/><types:Token xmi:id="4905" sofa="1" begin="1093" end="1095" componentId="JULIE Token Boundary Detector" posTag="698"/><types:Token xmi:id="4921" sofa="1" begin="1095" end="1096" componentId="JULIE Token Boundary Detector" posTag="701"/><types:Token xmi:id="4937" sofa="1" begin="1097" end="1099" componentId="JULIE Token Boundary Detector" posTag="704"/><types:Token xmi:id="4953" sofa="1" begin="1099" end="1100" componentId="JULIE Token Boundary Detector" posTag="707"/><types:Token xmi:id="4969" sofa="1" begin="1101" end="1105" componentId="JULIE Token Boundary Detector" posTag="710"/><types:Token xmi:id="4985" sofa="1" begin="1106" end="1108" componentId="JULIE Token Boundary Detector" posTag="713"/><types:Token xmi:id="5001" sofa="1" begin="1109" end="1113" componentId="JULIE Token Boundary Detector" posTag="716"/><types:Token xmi:id="5017" sofa="1" begin="1113" end="1114" componentId="JULIE Token Boundary Detector" posTag="719"/><types:Token xmi:id="5033" sofa="1" begin="1114" end="1115" componentId="JULIE Token Boundary Detector" posTag="722"/><types:Token xmi:id="5049" sofa="1" begin="1115" end="1116" componentId="JULIE Token Boundary Detector" posTag="725"/><types:Token xmi:id="5065" sofa="1" begin="1117" end="1120" componentId="JULIE Token Boundary Detector" posTag="728"/><types:Token xmi:id="5081" sofa="1" begin="1121" end="1124" componentId="JULIE Token Boundary Detector" posTag="731"/><types:Token xmi:id="5097" sofa="1" begin="1125" end="1129" componentId="JULIE Token Boundary Detector" posTag="734"/><types:Token xmi:id="5113" sofa="1" begin="1130" end="1133" componentId="JULIE Token Boundary Detector" posTag="737"/><types:Token xmi:id="5129" sofa="1" begin="1134" end="1136" componentId="JULIE Token Boundary Detector" posTag="740"/><types:Token xmi:id="5145" sofa="1" begin="1136" end="1137" componentId="JULIE Token Boundary Detector" posTag="743"/><types:Token xmi:id="5161" sofa="1" begin="1138" end="1143" componentId="JULIE Token Boundary Detector" posTag="746"/><types:Token xmi:id="5177" sofa="1" begin="1144" end="1148" componentId="JULIE Token Boundary Detector" posTag="749"/><types:Token xmi:id="5193" sofa="1" begin="1149" end="1152" componentId="JULIE Token Boundary Detector" posTag="752"/><types:Token xmi:id="5209" sofa="1" begin="1153" end="1155" componentId="JULIE Token Boundary Detector" posTag="755"/><types:Token xmi:id="5225" sofa="1" begin="1156" end="1159" componentId="JULIE Token Boundary Detector" posTag="758"/><types:Token xmi:id="5241" sofa="1" begin="1160" end="1162" componentId="JULIE Token Boundary Detector" posTag="761"/><types:Token xmi:id="5257" sofa="1" begin="1163" end="1165" componentId="JULIE Token Boundary Detector" posTag="764"/><types:Token xmi:id="5273" sofa="1" begin="1166" end="1170" componentId="JULIE Token Boundary Detector" posTag="767"/><types:Token xmi:id="5289" sofa="1" begin="1171" end="1175" componentId="JULIE Token Boundary Detector" posTag="770"/><types:Token xmi:id="5305" sofa="1" begin="1176" end="1179" componentId="JULIE Token Boundary Detector" posTag="773"/><types:Token xmi:id="5321" sofa="1" begin="1180" end="1182" componentId="JULIE Token Boundary Detector" posTag="776"/><types:Token xmi:id="5337" sofa="1" begin="1183" end="1186" componentId="JULIE Token Boundary Detector" posTag="779"/><types:Token xmi:id="5353" sofa="1" begin="1187" end="1189" componentId="JULIE Token Boundary Detector" posTag="782"/><types:Token xmi:id="5369" sofa="1" begin="1190" end="1192" componentId="JULIE Token Boundary Detector" posTag="785"/><types:Token xmi:id="5385" sofa="1" begin="1193" end="1194" componentId="JULIE Token Boundary Detector" posTag="788"/><types:Token xmi:id="5401" sofa="1" begin="1194" end="1197" componentId="JULIE Token Boundary Detector" posTag="791"/><types:Token xmi:id="5417" sofa="1" begin="1198" end="1202" componentId="JULIE Token Boundary Detector" posTag="794"/><types:Token xmi:id="5433" sofa="1" begin="1203" end="1208" componentId="JULIE Token Boundary Detector" posTag="797"/><types:Token xmi:id="5449" sofa="1" begin="1208" end="1209" componentId="JULIE Token Boundary Detector" posTag="800"/><types:Token xmi:id="5465" sofa="1" begin="1210" end="1214" componentId="JULIE Token Boundary Detector" posTag="803"/><types:Token xmi:id="5481" sofa="1" begin="1215" end="1216" componentId="JULIE Token Boundary Detector" posTag="806"/><types:Token xmi:id="5497" sofa="1" begin="1216" end="1218" componentId="JULIE Token Boundary Detector" posTag="809"/><types:Token xmi:id="5513" sofa="1" begin="1218" end="1219" componentId="JULIE Token Boundary Detector" posTag="812"/><types:Token xmi:id="5529" sofa="1" begin="1220" end="1222" componentId="JULIE Token Boundary Detector" posTag="815"/><types:Token xmi:id="5545" sofa="1" begin="1222" end="1223" componentId="JULIE Token Boundary Detector" posTag="818"/><types:Token xmi:id="5561" sofa="1" begin="1224" end="1228" componentId="JULIE Token Boundary Detector" posTag="821"/><types:Token xmi:id="5577" sofa="1" begin="1229" end="1231" componentId="JULIE Token Boundary Detector" posTag="824"/><types:Token xmi:id="5593" sofa="1" begin="1232" end="1236" componentId="JULIE Token Boundary Detector" posTag="827"/><types:Token xmi:id="5609" sofa="1" begin="1236" end="1237" componentId="JULIE Token Boundary Detector" posTag="830"/><types:Token xmi:id="5625" sofa="1" begin="1237" end="1238" componentId="JULIE Token Boundary Detector" posTag="833"/><types:Token xmi:id="5641" sofa="1" begin="1239" end="1242" componentId="JULIE Token Boundary Detector" posTag="836"/><types:Token xmi:id="5657" sofa="1" begin="1243" end="1251" componentId="JULIE Token Boundary Detector" posTag="839"/><types:Token xmi:id="5673" sofa="1" begin="1252" end="1259" componentId="JULIE Token Boundary Detector" posTag="842"/><types:Token xmi:id="5689" sofa="1" begin="1260" end="1266" componentId="JULIE Token Boundary Detector" posTag="845"/><types:Token xmi:id="5705" sofa="1" begin="1267" end="1271" componentId="JULIE Token Boundary Detector" posTag="848"/><types:Token xmi:id="5721" sofa="1" begin="1272" end="1275" componentId="JULIE Token Boundary Detector" posTag="851"/><types:Token xmi:id="5737" sofa="1" begin="1276" end="1283" componentId="JULIE Token Boundary Detector" posTag="854"/><types:Token xmi:id="5753" sofa="1" begin="1283" end="1284" componentId="JULIE Token Boundary Detector" posTag="857"/><types:Token xmi:id="5769" sofa="1" begin="1284" end="1293" componentId="JULIE Token Boundary Detector" posTag="860"/><types:Token xmi:id="5785" sofa="1" begin="1294" end="1296" componentId="JULIE Token Boundary Detector" posTag="863"/><types:Token xmi:id="5801" sofa="1" begin="1296" end="1297" componentId="JULIE Token Boundary Detector" posTag="866"/><types:Token xmi:id="5817" sofa="1" begin="1298" end="1308" componentId="JULIE Token Boundary Detector" posTag="869"/><types:Token xmi:id="5833" sofa="1" begin="1309" end="1322" componentId="JULIE Token Boundary Detector" posTag="872"/><types:Token xmi:id="5849" sofa="1" begin="1323" end="1326" componentId="JULIE Token Boundary Detector" posTag="875"/><types:Token xmi:id="5865" sofa="1" begin="1327" end="1331" componentId="JULIE Token Boundary Detector" posTag="878"/><types:Token xmi:id="5881" sofa="1" begin="1331" end="1332" componentId="JULIE Token Boundary Detector" posTag="881"/><types:Token xmi:id="5897" sofa="1" begin="1332" end="1336" componentId="JULIE Token Boundary Detector" posTag="884"/><types:Token xmi:id="5913" sofa="1" begin="1337" end="1342" componentId="JULIE Token Boundary Detector" posTag="887"/><types:Token xmi:id="5929" sofa="1" begin="1342" end="1343" componentId="JULIE Token Boundary Detector" posTag="890"/><types:Token xmi:id="5945" sofa="1" begin="1344" end="1348" componentId="JULIE Token Boundary Detector" posTag="893"/><types:Token xmi:id="5961" sofa="1" begin="1349" end="1352" componentId="JULIE Token Boundary Detector" posTag="896"/><types:Token xmi:id="5977" sofa="1" begin="1353" end="1359" componentId="JULIE Token Boundary Detector" posTag="899"/><types:Token xmi:id="5993" sofa="1" begin="1360" end="1363" componentId="JULIE Token Boundary Detector" posTag="902"/><types:Token xmi:id="6009" sofa="1" begin="1364" end="1368" componentId="JULIE Token Boundary Detector" posTag="905"/><types:Token xmi:id="6025" sofa="1" begin="1369" end="1377" componentId="JULIE Token Boundary Detector" posTag="908"/><types:Token xmi:id="6041" sofa="1" begin="1378" end="1384" componentId="JULIE Token Boundary Detector" posTag="911"/><types:Token xmi:id="6057" sofa="1" begin="1385" end="1388" componentId="JULIE Token Boundary Detector" posTag="914"/><types:Token xmi:id="6073" sofa="1" begin="1389" end="1392" componentId="JULIE Token Boundary Detector" posTag="917"/><types:Token xmi:id="6089" sofa="1" begin="1392" end="1393" componentId="JULIE Token Boundary Detector" posTag="920"/><types:Token xmi:id="6105" sofa="1" begin="1393" end="1395" componentId="JULIE Token Boundary Detector" posTag="923"/><types:Token xmi:id="6121" sofa="1" begin="1396" end="1398" componentId="JULIE Token Boundary Detector" posTag="926"/><types:Token xmi:id="6137" sofa="1" begin="1399" end="1405" componentId="JULIE Token Boundary Detector" posTag="929"/><types:Token xmi:id="6153" sofa="1" begin="1406" end="1411" componentId="JULIE Token Boundary Detector" posTag="932"/><types:Token xmi:id="6169" sofa="1" begin="1412" end="1418" componentId="JULIE Token Boundary Detector" posTag="935"/><types:Token xmi:id="6185" sofa="1" begin="1419" end="1428" componentId="JULIE Token Boundary Detector" posTag="938"/><types:Token xmi:id="6201" sofa="1" begin="1429" end="1435" componentId="JULIE Token Boundary Detector" posTag="941"/><types:Token xmi:id="6217" sofa="1" begin="1435" end="1436" componentId="JULIE Token Boundary Detector" posTag="944"/><types:Token xmi:id="6233" sofa="1" begin="1437" end="1440" componentId="JULIE Token Boundary Detector" posTag="947"/><types:Token xmi:id="6249" sofa="1" begin="1441" end="1444" componentId="JULIE Token Boundary Detector" posTag="950"/><types:Token xmi:id="6265" sofa="1" begin="1445" end="1449" componentId="JULIE Token Boundary Detector" posTag="953"/><types:Token xmi:id="6281" sofa="1" begin="1450" end="1453" componentId="JULIE Token Boundary Detector" posTag="956"/><types:Token xmi:id="6297" sofa="1" begin="1454" end="1458" componentId="JULIE Token Boundary Detector" posTag="959"/><types:Token xmi:id="6313" sofa="1" begin="1459" end="1462" componentId="JULIE Token Boundary Detector" posTag="962"/><types:Token xmi:id="6329" sofa="1" begin="1463" end="1466" componentId="JULIE Token Boundary Detector" posTag="965"/><types:Token xmi:id="6345" sofa="1" begin="1467" end="1472" componentId="JULIE Token Boundary Detector" posTag="968"/><types:Token xmi:id="6361" sofa="1" begin="1473" end="1479" componentId="JULIE Token Boundary Detector" posTag="971"/><types:Token xmi:id="6377" sofa="1" begin="1480" end="1484" componentId="JULIE Token Boundary Detector" posTag="974"/><types:Token xmi:id="6393" sofa="1" begin="1485" end="1488" componentId="JULIE Token Boundary Detector" posTag="977"/><types:Token xmi:id="6409" sofa="1" begin="1489" end="1499" componentId="JULIE Token Boundary Detector" posTag="980"/><types:Token xmi:id="6425" sofa="1" begin="1500" end="1504" componentId="JULIE Token Boundary Detector" posTag="983"/><types:Token xmi:id="6441" sofa="1" begin="1505" end="1508" componentId="JULIE Token Boundary Detector" posTag="986"/><types:Token xmi:id="6457" sofa="1" begin="1509" end="1518" componentId="JULIE Token Boundary Detector" posTag="989"/><types:Token xmi:id="6473" sofa="1" begin="1519" end="1522" componentId="JULIE Token Boundary Detector" posTag="992"/><types:Token xmi:id="6489" sofa="1" begin="1523" end="1525" componentId="JULIE Token Boundary Detector" posTag="995"/><types:Token xmi:id="6505" sofa="1" begin="1526" end="1531" componentId="JULIE Token Boundary Detector" posTag="998"/><types:Token xmi:id="6521" sofa="1" begin="1532" end="1534" componentId="JULIE Token Boundary Detector" posTag="1001"/><types:Token xmi:id="6537" sofa="1" begin="1535" end="1540" componentId="JULIE Token Boundary Detector" posTag="1004"/><types:Token xmi:id="6553" sofa="1" begin="1541" end="1546" componentId="JULIE Token Boundary Detector" posTag="1007"/><types:Token xmi:id="6569" sofa="1" begin="1546" end="1547" componentId="JULIE Token Boundary Detector" posTag="1010"/><types:Token xmi:id="6585" sofa="1" begin="1548" end="1553" componentId="JULIE Token Boundary Detector" posTag="1013"/><types:Token xmi:id="6601" sofa="1" begin="1554" end="1562" componentId="JULIE Token Boundary Detector" posTag="1016"/><types:Token xmi:id="6617" sofa="1" begin="1563" end="1570" componentId="JULIE Token Boundary Detector" posTag="1019"/><types:Token xmi:id="6633" sofa="1" begin="1571" end="1575" componentId="JULIE Token Boundary Detector" posTag="1022"/><types:Token xmi:id="6649" sofa="1" begin="1576" end="1582" componentId="JULIE Token Boundary Detector" posTag="1025"/><types:Token xmi:id="6665" sofa="1" begin="1583" end="1586" componentId="JULIE Token Boundary Detector" posTag="1028"/><types:Token xmi:id="6681" sofa="1" begin="1587" end="1594" componentId="JULIE Token Boundary Detector" posTag="1031"/><types:Token xmi:id="6697" sofa="1" begin="1595" end="1604" componentId="JULIE Token Boundary Detector" posTag="1034"/><types:Token xmi:id="6713" sofa="1" begin="1605" end="1607" componentId="JULIE Token Boundary Detector" posTag="1037"/><types:Token xmi:id="6729" sofa="1" begin="1608" end="1620" componentId="JULIE Token Boundary Detector" posTag="1040"/><types:Token xmi:id="6745" sofa="1" begin="1621" end="1627" componentId="JULIE Token Boundary Detector" posTag="1043"/><types:Token xmi:id="6761" sofa="1" begin="1628" end="1632" componentId="JULIE Token Boundary Detector" posTag="1046"/><types:Token xmi:id="6777" sofa="1" begin="1633" end="1641" componentId="JULIE Token Boundary Detector" posTag="1049"/><types:Token xmi:id="6793" sofa="1" begin="1642" end="1646" componentId="JULIE Token Boundary Detector" posTag="1052"/><types:Token xmi:id="6809" sofa="1" begin="1647" end="1650" componentId="JULIE Token Boundary Detector" posTag="1055"/><types:Token xmi:id="6825" sofa="1" begin="1651" end="1654" componentId="JULIE Token Boundary Detector" posTag="1058"/><types:Token xmi:id="6841" sofa="1" begin="1655" end="1658" componentId="JULIE Token Boundary Detector" posTag="1061"/><types:Token xmi:id="6857" sofa="1" begin="1658" end="1659" componentId="JULIE Token Boundary Detector" posTag="1064"/><types:Token xmi:id="6873" sofa="1" begin="1659" end="1661" componentId="JULIE Token Boundary Detector" posTag="1067"/><types:Token xmi:id="6889" sofa="1" begin="1662" end="1664" componentId="JULIE Token Boundary Detector" posTag="1070"/><types:Token xmi:id="6905" sofa="1" begin="1665" end="1672" componentId="JULIE Token Boundary Detector" posTag="1073"/><types:Token xmi:id="6921" sofa="1" begin="1672" end="1673" componentId="JULIE Token Boundary Detector" posTag="1076"/><types:Token xmi:id="6937" sofa="1" begin="1674" end="1682" componentId="JULIE Token Boundary Detector" posTag="1079"/><types:Token xmi:id="6953" sofa="1" begin="1683" end="1689" componentId="JULIE Token Boundary Detector" posTag="1082"/><types:Token xmi:id="6969" sofa="1" begin="1690" end="1700" componentId="JULIE Token Boundary Detector" posTag="1085"/><types:Token xmi:id="6985" sofa="1" begin="1701" end="1704" componentId="JULIE Token Boundary Detector" posTag="1088"/><types:Token xmi:id="7001" sofa="1" begin="1705" end="1713" componentId="JULIE Token Boundary Detector" posTag="1091"/><types:Token xmi:id="7017" sofa="1" begin="1714" end="1717" componentId="JULIE Token Boundary Detector" posTag="1094"/><types:Token xmi:id="7033" sofa="1" begin="1718" end="1724" componentId="JULIE Token Boundary Detector" posTag="1097"/><types:Token xmi:id="7049" sofa="1" begin="1725" end="1727" componentId="JULIE Token Boundary Detector" posTag="1100"/><types:Token xmi:id="7065" sofa="1" begin="1728" end="1731" componentId="JULIE Token Boundary Detector" posTag="1103"/><types:Token xmi:id="7081" sofa="1" begin="1732" end="1734" componentId="JULIE Token Boundary Detector" posTag="1106"/><types:Token xmi:id="7097" sofa="1" begin="1735" end="1736" componentId="JULIE Token Boundary Detector" posTag="1109"/><types:Token xmi:id="7113" sofa="1" begin="1736" end="1737" componentId="JULIE Token Boundary Detector" posTag="1112"/><types:Token xmi:id="7129" sofa="1" begin="1737" end="1740" componentId="JULIE Token Boundary Detector" posTag="1115"/><types:Token xmi:id="7145" sofa="1" begin="1741" end="1743" componentId="JULIE Token Boundary Detector" posTag="1118"/><types:Token xmi:id="7161" sofa="1" begin="1744" end="1746" componentId="JULIE Token Boundary Detector" posTag="1121"/><types:Token xmi:id="7177" sofa="1" begin="1747" end="1754" componentId="JULIE Token Boundary Detector" posTag="1124"/><types:Token xmi:id="7193" sofa="1" begin="1755" end="1757" componentId="JULIE Token Boundary Detector" posTag="1127"/><types:Token xmi:id="7209" sofa="1" begin="1758" end="1761" componentId="JULIE Token Boundary Detector" posTag="1130"/><types:Token xmi:id="7225" sofa="1" begin="1762" end="1764" componentId="JULIE Token Boundary Detector" posTag="1133"/><types:Token xmi:id="7241" sofa="1" begin="1765" end="1768" componentId="JULIE Token Boundary Detector" posTag="1136"/><types:Token xmi:id="7257" sofa="1" begin="1769" end="1771" componentId="JULIE Token Boundary Detector" posTag="1139"/><types:Token xmi:id="7273" sofa="1" begin="1772" end="1774" componentId="JULIE Token Boundary Detector" posTag="1142"/><types:Token xmi:id="7289" sofa="1" begin="1775" end="1778" componentId="JULIE Token Boundary Detector" posTag="1145"/><types:Token xmi:id="7305" sofa="1" begin="1779" end="1782" componentId="JULIE Token Boundary Detector" posTag="1148"/><types:Token xmi:id="7321" sofa="1" begin="1783" end="1788" componentId="JULIE Token Boundary Detector" posTag="1151"/><types:Token xmi:id="7337" sofa="1" begin="1789" end="1792" componentId="JULIE Token Boundary Detector" posTag="1154"/><types:Token xmi:id="7353" sofa="1" begin="1793" end="1797" componentId="JULIE Token Boundary Detector" posTag="1157"/><types:Token xmi:id="7369" sofa="1" begin="1798" end="1812" componentId="JULIE Token Boundary Detector" posTag="1160"/><types:Token xmi:id="7385" sofa="1" begin="1813" end="1820" componentId="JULIE Token Boundary Detector" posTag="1163"/><types:Token xmi:id="7401" sofa="1" begin="1820" end="1821" componentId="JULIE Token Boundary Detector" posTag="1166"/><types:Token xmi:id="7417" sofa="1" begin="1821" end="1826" componentId="JULIE Token Boundary Detector" posTag="1169"/><types:Token xmi:id="7433" sofa="1" begin="1827" end="1835" componentId="JULIE Token Boundary Detector" posTag="1172"/><types:Token xmi:id="7449" sofa="1" begin="1835" end="1836" componentId="JULIE Token Boundary Detector" posTag="1175"/><types:PennBioIEPOSTag xmi:id="7465" sofa="1" begin="0" end="4" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="7474" sofa="1" begin="4" end="5" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="7483" sofa="1" begin="5" end="10" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="7492" sofa="1" begin="11" end="12" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7501" sofa="1" begin="12" end="14" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7510" sofa="1" begin="14" end="15" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="7519" sofa="1" begin="16" end="29" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7528" sofa="1" begin="30" end="31" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7537" sofa="1" begin="31" end="34" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7546" sofa="1" begin="34" end="35" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="7555" sofa="1" begin="36" end="38" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7564" sofa="1" begin="39" end="44" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="7573" sofa="1" begin="45" end="47" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7582" sofa="1" begin="48" end="55" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="7591" sofa="1" begin="56" end="60" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7600" sofa="1" begin="61" end="64" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="7609" sofa="1" begin="64" end="65" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="7618" sofa="1" begin="65" end="69" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7627" sofa="1" begin="70" end="79" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7636" sofa="1" begin="80" end="81" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7645" sofa="1" begin="81" end="84" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7654" sofa="1" begin="84" end="85" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="7663" sofa="1" begin="86" end="88" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7672" sofa="1" begin="89" end="92" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="7681" sofa="1" begin="93" end="95" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7690" sofa="1" begin="96" end="98" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBZ"/><types:PennBioIEPOSTag xmi:id="7699" sofa="1" begin="99" end="108" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="7708" sofa="1" begin="109" end="113" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="WRB"/><types:PennBioIEPOSTag xmi:id="7717" sofa="1" begin="114" end="116" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="PRP"/><types:PennBioIEPOSTag xmi:id="7726" sofa="1" begin="117" end="119" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBZ"/><types:PennBioIEPOSTag xmi:id="7735" sofa="1" begin="120" end="124" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="7744" sofa="1" begin="125" end="127" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7753" sofa="1" begin="128" end="139" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7762" sofa="1" begin="140" end="148" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="7771" sofa="1" begin="149" end="152" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7780" sofa="1" begin="153" end="156" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="7789" sofa="1" begin="157" end="164" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="7798" sofa="1" begin="165" end="174" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7807" sofa="1" begin="175" end="177" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7816" sofa="1" begin="178" end="183" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="7825" sofa="1" begin="184" end="200" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7834" sofa="1" begin="201" end="206" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7843" sofa="1" begin="207" end="216" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7852" sofa="1" begin="216" end="217" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="7861" sofa="1" begin="218" end="225" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="7870" sofa="1" begin="226" end="227" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="7879" sofa="1" begin="228" end="233" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="7888" sofa="1" begin="234" end="237" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7897" sofa="1" begin="238" end="246" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBG"/><types:PennBioIEPOSTag xmi:id="7906" sofa="1" begin="247" end="251" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7915" sofa="1" begin="252" end="253" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="7924" sofa="1" begin="253" end="257" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="FW"/><types:PennBioIEPOSTag xmi:id="7933" sofa="1" begin="257" end="258" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="7942" sofa="1" begin="259" end="262" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="7951" sofa="1" begin="263" end="265" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7960" sofa="1" begin="266" end="268" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="7969" sofa="1" begin="268" end="269" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="7978" sofa="1" begin="270" end="275" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="MD"/><types:PennBioIEPOSTag xmi:id="7987" sofa="1" begin="276" end="282" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VB"/><types:PennBioIEPOSTag xmi:id="7996" sofa="1" begin="283" end="293" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8005" sofa="1" begin="294" end="304" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8014" sofa="1" begin="305" end="306" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="8023" sofa="1" begin="306" end="309" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8032" sofa="1" begin="309" end="310" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="8041" sofa="1" begin="311" end="325" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="8050" sofa="1" begin="326" end="330" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8059" sofa="1" begin="331" end="339" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="8068" sofa="1" begin="340" end="346" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8077" sofa="1" begin="346" end="347" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="SYM"/><types:PennBioIEPOSTag xmi:id="8086" sofa="1" begin="347" end="359" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8095" sofa="1" begin="360" end="362" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBZ"/><types:PennBioIEPOSTag xmi:id="8104" sofa="1" begin="363" end="370" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8113" sofa="1" begin="370" end="371" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="8122" sofa="1" begin="372" end="376" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="8131" sofa="1" begin="377" end="387" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="8140" sofa="1" begin="387" end="388" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="8149" sofa="1" begin="389" end="392" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8158" sofa="1" begin="393" end="395" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8167" sofa="1" begin="395" end="396" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="8176" sofa="1" begin="396" end="399" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8185" sofa="1" begin="399" end="400" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="8194" sofa="1" begin="400" end="406" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8203" sofa="1" begin="406" end="407" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="8212" sofa="1" begin="408" end="417" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8221" sofa="1" begin="418" end="433" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8230" sofa="1" begin="434" end="439" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8239" sofa="1" begin="440" end="448" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="8248" sofa="1" begin="449" end="452" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="8257" sofa="1" begin="453" end="459" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8266" sofa="1" begin="459" end="460" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="8275" sofa="1" begin="460" end="465" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8284" sofa="1" begin="466" end="472" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8293" sofa="1" begin="473" end="476" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8302" sofa="1" begin="477" end="491" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="8311" sofa="1" begin="491" end="492" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="8320" sofa="1" begin="493" end="499" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8329" sofa="1" begin="499" end="500" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="8338" sofa="1" begin="501" end="504" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="8347" sofa="1" begin="505" end="517" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8356" sofa="1" begin="518" end="520" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8365" sofa="1" begin="521" end="524" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8374" sofa="1" begin="525" end="527" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8383" sofa="1" begin="528" end="529" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8392" sofa="1" begin="529" end="530" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="8401" sofa="1" begin="530" end="533" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8410" sofa="1" begin="534" end="536" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8419" sofa="1" begin="537" end="539" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8428" sofa="1" begin="540" end="547" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="8437" sofa="1" begin="548" end="552" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8446" sofa="1" begin="553" end="559" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="8455" sofa="1" begin="560" end="563" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8464" sofa="1" begin="564" end="566" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8473" sofa="1" begin="567" end="569" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="8482" sofa="1" begin="570" end="573" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8491" sofa="1" begin="574" end="576" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8500" sofa="1" begin="577" end="579" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8509" sofa="1" begin="580" end="583" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8518" sofa="1" begin="584" end="586" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8527" sofa="1" begin="587" end="589" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8536" sofa="1" begin="590" end="592" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8545" sofa="1" begin="593" end="600" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8554" sofa="1" begin="601" end="611" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="8563" sofa="1" begin="611" end="612" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="8572" sofa="1" begin="613" end="622" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8581" sofa="1" begin="623" end="628" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RBS"/><types:PennBioIEPOSTag xmi:id="8590" sofa="1" begin="628" end="629" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="8599" sofa="1" begin="629" end="635" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8608" sofa="1" begin="636" end="637" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="8617" sofa="1" begin="637" end="640" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8626" sofa="1" begin="640" end="641" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="8635" sofa="1" begin="642" end="646" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8644" sofa="1" begin="647" end="653" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="8653" sofa="1" begin="654" end="657" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="8662" sofa="1" begin="658" end="661" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="8671" sofa="1" begin="662" end="672" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="8680" sofa="1" begin="673" end="675" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="8689" sofa="1" begin="675" end="676" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8698" sofa="1" begin="677" end="687" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8707" sofa="1" begin="688" end="697" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="8716" sofa="1" begin="698" end="699" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="8725" sofa="1" begin="699" end="702" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8734" sofa="1" begin="702" end="703" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="8743" sofa="1" begin="704" end="708" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="8752" sofa="1" begin="709" end="718" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="8761" sofa="1" begin="719" end="722" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8770" sofa="1" begin="723" end="729" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8779" sofa="1" begin="730" end="733" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8788" sofa="1" begin="734" end="741" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="8797" sofa="1" begin="742" end="748" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8806" sofa="1" begin="749" end="763" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="8815" sofa="1" begin="764" end="765" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="8824" sofa="1" begin="765" end="769" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8833" sofa="1" begin="769" end="770" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="8842" sofa="1" begin="770" end="771" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="8851" sofa="1" begin="772" end="775" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="8860" sofa="1" begin="776" end="780" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8869" sofa="1" begin="781" end="786" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8878" sofa="1" begin="787" end="790" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="8887" sofa="1" begin="791" end="797" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8896" sofa="1" begin="798" end="811" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8905" sofa="1" begin="811" end="812" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="8914" sofa="1" begin="812" end="816" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8923" sofa="1" begin="817" end="822" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8932" sofa="1" begin="823" end="827" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="8941" sofa="1" begin="828" end="831" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="8950" sofa="1" begin="832" end="838" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8959" sofa="1" begin="839" end="845" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="8968" sofa="1" begin="846" end="847" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="8977" sofa="1" begin="847" end="851" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="quantitative-name"/><types:PennBioIEPOSTag xmi:id="8986" sofa="1" begin="851" end="852" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="8995" sofa="1" begin="852" end="855" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="SYM"/><types:PennBioIEPOSTag xmi:id="9004" sofa="1" begin="855" end="856" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9013" sofa="1" begin="856" end="857" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9022" sofa="1" begin="858" end="861" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="9031" sofa="1" begin="862" end="868" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9040" sofa="1" begin="869" end="883" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="9049" sofa="1" begin="884" end="885" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9058" sofa="1" begin="885" end="889" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9067" sofa="1" begin="889" end="890" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9076" sofa="1" begin="891" end="897" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9085" sofa="1" begin="898" end="902" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="9094" sofa="1" begin="903" end="909" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9103" sofa="1" begin="910" end="916" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9112" sofa="1" begin="916" end="917" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="9121" sofa="1" begin="918" end="929" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9130" sofa="1" begin="930" end="937" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9139" sofa="1" begin="938" end="946" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="9148" sofa="1" begin="947" end="948" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9157" sofa="1" begin="948" end="950" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9166" sofa="1" begin="950" end="951" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9175" sofa="1" begin="952" end="955" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9184" sofa="1" begin="956" end="958" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9193" sofa="1" begin="959" end="962" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9202" sofa="1" begin="963" end="967" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9211" sofa="1" begin="968" end="974" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="9220" sofa="1" begin="974" end="975" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9229" sofa="1" begin="976" end="980" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9238" sofa="1" begin="981" end="983" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="TO"/><types:PennBioIEPOSTag xmi:id="9247" sofa="1" begin="984" end="988" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9256" sofa="1" begin="988" end="989" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9265" sofa="1" begin="990" end="993" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="9274" sofa="1" begin="994" end="1002" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="9283" sofa="1" begin="1003" end="1006" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9292" sofa="1" begin="1007" end="1010" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="9301" sofa="1" begin="1011" end="1017" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9310" sofa="1" begin="1018" end="1021" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9319" sofa="1" begin="1022" end="1026" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="quantitative-name"/><types:PennBioIEPOSTag xmi:id="9328" sofa="1" begin="1026" end="1027" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="9337" sofa="1" begin="1027" end="1030" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="SYM"/><types:PennBioIEPOSTag xmi:id="9346" sofa="1" begin="1031" end="1032" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9355" sofa="1" begin="1032" end="1035" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9364" sofa="1" begin="1036" end="1040" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9373" sofa="1" begin="1041" end="1046" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9382" sofa="1" begin="1046" end="1047" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9391" sofa="1" begin="1048" end="1052" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9400" sofa="1" begin="1053" end="1054" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9409" sofa="1" begin="1054" end="1056" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9418" sofa="1" begin="1056" end="1057" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9427" sofa="1" begin="1058" end="1060" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9436" sofa="1" begin="1060" end="1061" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9445" sofa="1" begin="1062" end="1066" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9454" sofa="1" begin="1067" end="1069" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="TO"/><types:PennBioIEPOSTag xmi:id="9463" sofa="1" begin="1070" end="1074" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9472" sofa="1" begin="1074" end="1075" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9481" sofa="1" begin="1075" end="1076" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9490" sofa="1" begin="1077" end="1080" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="9499" sofa="1" begin="1081" end="1085" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9508" sofa="1" begin="1086" end="1087" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9517" sofa="1" begin="1087" end="1091" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9526" sofa="1" begin="1092" end="1093" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9535" sofa="1" begin="1093" end="1095" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9544" sofa="1" begin="1095" end="1096" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9553" sofa="1" begin="1097" end="1099" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9562" sofa="1" begin="1099" end="1100" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9571" sofa="1" begin="1101" end="1105" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9580" sofa="1" begin="1106" end="1108" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="TO"/><types:PennBioIEPOSTag xmi:id="9589" sofa="1" begin="1109" end="1113" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9598" sofa="1" begin="1113" end="1114" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9607" sofa="1" begin="1114" end="1115" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9616" sofa="1" begin="1115" end="1116" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="9625" sofa="1" begin="1117" end="1120" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="9634" sofa="1" begin="1121" end="1124" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9643" sofa="1" begin="1125" end="1129" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9652" sofa="1" begin="1130" end="1133" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="9661" sofa="1" begin="1134" end="1136" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9670" sofa="1" begin="1136" end="1137" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9679" sofa="1" begin="1138" end="1143" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="9688" sofa="1" begin="1144" end="1148" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9697" sofa="1" begin="1149" end="1152" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9706" sofa="1" begin="1153" end="1155" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9715" sofa="1" begin="1156" end="1159" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9724" sofa="1" begin="1160" end="1162" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9733" sofa="1" begin="1163" end="1165" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9742" sofa="1" begin="1166" end="1170" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9751" sofa="1" begin="1171" end="1175" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9760" sofa="1" begin="1176" end="1179" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9769" sofa="1" begin="1180" end="1182" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9778" sofa="1" begin="1183" end="1186" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9787" sofa="1" begin="1187" end="1189" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9796" sofa="1" begin="1190" end="1192" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9805" sofa="1" begin="1193" end="1194" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9814" sofa="1" begin="1194" end="1197" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9823" sofa="1" begin="1198" end="1202" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9832" sofa="1" begin="1203" end="1208" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9841" sofa="1" begin="1208" end="1209" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9850" sofa="1" begin="1210" end="1214" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9859" sofa="1" begin="1215" end="1216" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-LRB-"/><types:PennBioIEPOSTag xmi:id="9868" sofa="1" begin="1216" end="1218" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9877" sofa="1" begin="1218" end="1219" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9886" sofa="1" begin="1220" end="1222" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="9895" sofa="1" begin="1222" end="1223" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="9904" sofa="1" begin="1224" end="1228" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9913" sofa="1" begin="1229" end="1231" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="TO"/><types:PennBioIEPOSTag xmi:id="9922" sofa="1" begin="1232" end="1236" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="9931" sofa="1" begin="1236" end="1237" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9940" sofa="1" begin="1237" end="1238" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="-RRB-"/><types:PennBioIEPOSTag xmi:id="9949" sofa="1" begin="1239" end="1242" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="9958" sofa="1" begin="1243" end="1251" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="9967" sofa="1" begin="1252" end="1259" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="9976" sofa="1" begin="1260" end="1266" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="9985" sofa="1" begin="1267" end="1271" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="9994" sofa="1" begin="1272" end="1275" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10003" sofa="1" begin="1276" end="1283" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10012" sofa="1" begin="1283" end="1284" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10021" sofa="1" begin="1284" end="1293" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="10030" sofa="1" begin="1294" end="1296" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10039" sofa="1" begin="1296" end="1297" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10048" sofa="1" begin="1298" end="1308" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10057" sofa="1" begin="1309" end="1322" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10066" sofa="1" begin="1323" end="1326" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10075" sofa="1" begin="1327" end="1331" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10084" sofa="1" begin="1331" end="1332" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10093" sofa="1" begin="1332" end="1336" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10102" sofa="1" begin="1337" end="1342" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10111" sofa="1" begin="1342" end="1343" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="10120" sofa="1" begin="1344" end="1348" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10129" sofa="1" begin="1349" end="1352" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10138" sofa="1" begin="1353" end="1359" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJS"/><types:PennBioIEPOSTag xmi:id="10147" sofa="1" begin="1360" end="1363" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10156" sofa="1" begin="1364" end="1368" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10165" sofa="1" begin="1369" end="1377" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="10174" sofa="1" begin="1378" end="1384" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10183" sofa="1" begin="1385" end="1388" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10192" sofa="1" begin="1389" end="1392" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10201" sofa="1" begin="1392" end="1393" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10210" sofa="1" begin="1393" end="1395" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10219" sofa="1" begin="1396" end="1398" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10228" sofa="1" begin="1399" end="1405" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10237" sofa="1" begin="1406" end="1411" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBG"/><types:PennBioIEPOSTag xmi:id="10246" sofa="1" begin="1412" end="1418" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="10255" sofa="1" begin="1419" end="1428" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="10264" sofa="1" begin="1429" end="1435" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="10273" sofa="1" begin="1435" end="1436" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="10282" sofa="1" begin="1437" end="1440" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10291" sofa="1" begin="1441" end="1444" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10300" sofa="1" begin="1445" end="1449" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10309" sofa="1" begin="1450" end="1453" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10318" sofa="1" begin="1454" end="1458" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10327" sofa="1" begin="1459" end="1462" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="10336" sofa="1" begin="1463" end="1466" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10345" sofa="1" begin="1467" end="1472" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="10354" sofa="1" begin="1473" end="1479" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="10363" sofa="1" begin="1480" end="1484" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10372" sofa="1" begin="1485" end="1488" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10381" sofa="1" begin="1489" end="1499" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10390" sofa="1" begin="1500" end="1504" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10399" sofa="1" begin="1505" end="1508" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10408" sofa="1" begin="1509" end="1518" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10417" sofa="1" begin="1519" end="1522" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10426" sofa="1" begin="1523" end="1525" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10435" sofa="1" begin="1526" end="1531" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10444" sofa="1" begin="1532" end="1534" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10453" sofa="1" begin="1535" end="1540" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="10462" sofa="1" begin="1541" end="1546" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="10471" sofa="1" begin="1546" end="1547" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="10480" sofa="1" begin="1548" end="1553" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="10489" sofa="1" begin="1554" end="1562" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10498" sofa="1" begin="1563" end="1570" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10507" sofa="1" begin="1571" end="1575" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10516" sofa="1" begin="1576" end="1582" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="10525" sofa="1" begin="1583" end="1586" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="10534" sofa="1" begin="1587" end="1594" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJR"/><types:PennBioIEPOSTag xmi:id="10543" sofa="1" begin="1595" end="1604" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="10552" sofa="1" begin="1605" end="1607" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10561" sofa="1" begin="1608" end="1620" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10570" sofa="1" begin="1621" end="1627" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="10579" sofa="1" begin="1628" end="1632" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBD"/><types:PennBioIEPOSTag xmi:id="10588" sofa="1" begin="1633" end="1641" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="10597" sofa="1" begin="1642" end="1646" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10606" sofa="1" begin="1647" end="1650" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10615" sofa="1" begin="1651" end="1654" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10624" sofa="1" begin="1655" end="1658" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10633" sofa="1" begin="1658" end="1659" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10642" sofa="1" begin="1659" end="1661" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10651" sofa="1" begin="1662" end="1664" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10660" sofa="1" begin="1665" end="1672" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10669" sofa="1" begin="1672" end="1673" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:PennBioIEPOSTag xmi:id="10678" sofa="1" begin="1674" end="1682" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10687" sofa="1" begin="1683" end="1689" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="10696" sofa="1" begin="1690" end="1700" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBG"/><types:PennBioIEPOSTag xmi:id="10705" sofa="1" begin="1701" end="1704" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="DT"/><types:PennBioIEPOSTag xmi:id="10714" sofa="1" begin="1705" end="1713" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10723" sofa="1" begin="1714" end="1717" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CC"/><types:PennBioIEPOSTag xmi:id="10732" sofa="1" begin="1718" end="1724" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10741" sofa="1" begin="1725" end="1727" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10750" sofa="1" begin="1728" end="1731" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10759" sofa="1" begin="1732" end="1734" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10768" sofa="1" begin="1735" end="1736" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10777" sofa="1" begin="1736" end="1737" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value=","/><types:PennBioIEPOSTag xmi:id="10786" sofa="1" begin="1737" end="1740" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10795" sofa="1" begin="1741" end="1743" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10804" sofa="1" begin="1744" end="1746" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10813" sofa="1" begin="1747" end="1754" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBN"/><types:PennBioIEPOSTag xmi:id="10822" sofa="1" begin="1755" end="1757" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10831" sofa="1" begin="1758" end="1761" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10840" sofa="1" begin="1762" end="1764" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10849" sofa="1" begin="1765" end="1768" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="CD"/><types:PennBioIEPOSTag xmi:id="10858" sofa="1" begin="1769" end="1771" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10867" sofa="1" begin="1772" end="1774" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10876" sofa="1" begin="1775" end="1778" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="VBP"/><types:PennBioIEPOSTag xmi:id="10885" sofa="1" begin="1779" end="1782" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="RB"/><types:PennBioIEPOSTag xmi:id="10894" sofa="1" begin="1783" end="1788" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10903" sofa="1" begin="1789" end="1792" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10912" sofa="1" begin="1793" end="1797" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="IN"/><types:PennBioIEPOSTag xmi:id="10921" sofa="1" begin="1798" end="1812" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10930" sofa="1" begin="1813" end="1820" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NN"/><types:PennBioIEPOSTag xmi:id="10939" sofa="1" begin="1820" end="1821" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="HYPH"/><types:PennBioIEPOSTag xmi:id="10948" sofa="1" begin="1821" end="1826" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="JJ"/><types:PennBioIEPOSTag xmi:id="10957" sofa="1" begin="1827" end="1835" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="NNS"/><types:PennBioIEPOSTag xmi:id="10966" sofa="1" begin="1835" end="1836" componentId="de.julielab.jcore.ae.OpenNLPPosTagger" value="."/><types:Abbreviation xmi:id="10975" sofa="1" begin="31" end="34" componentId="de.julielab.jcore.ae.acronymtagger.AcronymAnnotator" expan="fosamprenavir" textReference="1183" definedHere="true"/><types:Abbreviation xmi:id="10985" sofa="1" begin="81" end="84" componentId="de.julielab.jcore.ae.acronymtagger.AcronymAnnotator" expan="ritonavir" textReference="1190" definedHere="true"/><types:Abbreviation xmi:id="11065" sofa="1" begin="234" end="237" expan="ritonavir" textReference="1190" definedHere="false"/><types:Abbreviation xmi:id="10995" sofa="1" begin="306" end="309" componentId="de.julielab.jcore.ae.acronymtagger.AcronymAnnotator" expan="amprenavir" textReference="1197" definedHere="true"/><types:Abbreviation xmi:id="11125" sofa="1" begin="473" end="476" expan="amprenavir" textReference="1197" definedHere="false"/><types:Abbreviation xmi:id="11035" sofa="1" begin="521" end="524" expan="fosamprenavir" textReference="1183" definedHere="false"/><types:Abbreviation xmi:id="11075" sofa="1" begin="580" end="583" expan="ritonavir" textReference="1190" definedHere="false"/><types:Abbreviation xmi:id="11005" sofa="1" begin="637" end="640" componentId="de.julielab.jcore.ae.acronymtagger.AcronymAnnotator" expan="Geometric least-square" textReference="1204" definedHere="true"/><types:Abbreviation xmi:id="11015" sofa="1" begin="699" end="702" componentId="de.julielab.jcore.ae.acronymtagger.AcronymAnnotator" expan="confidence intervals" textReference="1211" definedHere="true"/><types:Abbreviation xmi:id="11135" sofa="1" begin="730" end="733" expan="amprenavir" textReference="1197" definedHere="false"/><types:Abbreviation xmi:id="11025" sofa="1" begin="847" end="855" componentId="de.julielab.jcore.ae.acronymtagger.AcronymAnnotator" expan="area under the plasma concentration-time curve over the dosing period" textReference="1218" definedHere="true"/><types:Abbreviation xmi:id="11225" sofa="1" begin="952" end="955" expan="confidence intervals" textReference="1211" definedHere="false"/><types:Abbreviation xmi:id="11185" sofa="1" begin="959" end="962" expan="Geometric least-square" textReference="1204" definedHere="false"/><types:Abbreviation xmi:id="11145" sofa="1" begin="1018" end="1021" expan="amprenavir" textReference="1197" definedHere="false"/><types:Abbreviation xmi:id="11235" sofa="1" begin="1022" end="1030" expan="area under the plasma concentration-time curve over the dosing period" textReference="1218" definedHere="false"/><types:Abbreviation xmi:id="11195" sofa="1" begin="1032" end="1035" expan="Geometric least-square" textReference="1204" definedHere="false"/><types:Abbreviation xmi:id="11155" sofa="1" begin="1121" end="1124" expan="amprenavir" textReference="1197" definedHere="false"/><types:Abbreviation xmi:id="11085" sofa="1" begin="1149" end="1152" expan="ritonavir" textReference="1190" definedHere="false"/><types:Abbreviation xmi:id="11095" sofa="1" begin="1176" end="1179" expan="ritonavir" textReference="1190" definedHere="false"/><types:Abbreviation xmi:id="11205" sofa="1" begin="1194" end="1197" expan="Geometric least-square" textReference="1204" definedHere="false"/><types:Abbreviation xmi:id="11165" sofa="1" begin="1360" end="1363" expan="amprenavir" textReference="1197" definedHere="false"/><types:Abbreviation xmi:id="11215" sofa="1" begin="1441" end="1444" expan="Geometric least-square" textReference="1204" definedHere="false"/><types:Abbreviation xmi:id="11175" sofa="1" begin="1450" end="1453" expan="amprenavir" textReference="1197" definedHere="false"/><types:Abbreviation xmi:id="11045" sofa="1" begin="1519" end="1522" expan="fosamprenavir" textReference="1183" definedHere="false"/><types:Abbreviation xmi:id="11105" sofa="1" begin="1651" end="1654" expan="ritonavir" textReference="1190" definedHere="false"/><types:Abbreviation xmi:id="11055" sofa="1" begin="1728" end="1731" expan="fosamprenavir" textReference="1183" definedHere="false"/><types:Abbreviation xmi:id="11115" sofa="1" begin="1758" end="1761" expan="ritonavir" textReference="1190" definedHere="false"/><types:Sentence xmi:id="11245" sofa="1" begin="0" end="217" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11253" sofa="1" begin="218" end="371" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11261" sofa="1" begin="372" end="612" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11269" sofa="1" begin="613" end="917" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11277" sofa="1" begin="918" end="1116" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11285" sofa="1" begin="1117" end="1436" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11293" sofa="1" begin="1437" end="1547" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11301" sofa="1" begin="1548" end="1673" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><types:Sentence xmi:id="11309" sofa="1" begin="1674" end="1836" componentId="de.julielab.jcore.ae.SentenceAnnotator"/><cas:Sofa xmi:id="1" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Once-daily (QD) fosamprenavir (FPV) at 1,400 mg boosted with low-dose ritonavir (RTV) at 200 mg is effective when it is used in combination regimens for the initial treatment of human immunodeficiency virus infection. Whether a lower RTV boosting dose (i.e., 100 mg QD) could ensure sufficient amprenavir (APV) concentrations with improved safety/tolerability is unknown. This randomized, two 14-day-period, crossover pharmacokinetic study compared the steady-state plasma APV concentrations, safety, and tolerability of FPV at 1,400 mg QD boosted with either 100 mg or 200 mg of RTV QD in 36 healthy volunteers. Geometric least-square (GLS) mean ratios and the associated 90% confidence intervals (CIs) were estimated for plasma APV maximum plasma concentrations (Cmax), the area under the plasma concentration-time curve over the dosing period (AUC0-tau), and trough concentrations (Ctau) during each dosing period. Equivalence between regimens (90% CIs of GLS mean ratios, 0.80 to 1.25) was observed for the plasma APV AUC0-tau (GLS mean ratio, 0.90 [90% CI, 0.84 to 0.96]) and Cmax (0.97 [90% CI, 0.91 to 1.04]). The APV Ctau was 38% lower with RTV at 100 mg QD than with RTV at 200 mg QD (GLS mean ratio, 0.62 [90% CI, 0.55 to 0.69]) but remained sixfold higher than the protein-corrected 50% inhibitory concentration for wild-type virus, with the lowest APV Ctau observed during the 100-mg QD period being nearly threefold higher. The GLS mean APV Ctau was 2.5 times higher than the historical Ctau for unboosted FPV at 1,400 mg twice daily. Fewer clinical adverse drug events and smaller increases in triglyceride levels were observed with the RTV 100-mg QD regimen. Clinical trials evaluating the efficacy and safety of FPV at 1,400 mg QD boosted by RTV at 100 mg QD are now under way with antiretroviral therapy-naïve patients.&#10;"/><cas:View sofa="1" members="8 11 14 17 20 23 26 29 32 35 38 41 44 47 50 53 56 59 62 65 68 71 74 77 80 83 86 89 92 95 98 101 104 107 110 113 116 119 122 125 128 131 134 137 140 143 146 149 152 155 158 161 164 167 170 173 176 179 182 185 188 191 194 197 200 203 206 209 212 215 218 221 224 227 230 233 236 239 242 245 248 251 254 257 260 263 266 269 272 275 278 281 284 287 290 293 296 299 302 305 308 311 314 317 320 323 326 329 332 335 338 341 344 347 350 353 356 359 362 365 368 371 374 377 380 383 386 389 392 395 398 401 404 407 410 413 416 419 422 425 428 431 434 437 440 443 446 449 452 455 458 461 464 467 470 473 476 479 482 485 488 491 494 497 500 503 506 509 512 515 518 521 524 527 530 533 536 539 542 545 548 551 554 557 560 563 566 569 572 575 578 581 584 587 590 593 596 599 602 605 608 611 614 617 620 623 626 629 632 635 638 641 644 647 650 653 656 659 662 665 668 671 674 677 680 683 686 689 692 695 698 701 704 707 710 713 716 719 722 725 728 731 734 737 740 743 746 749 752 755 758 761 764 767 770 773 776 779 782 785 788 791 794 797 800 803 806 809 812 815 818 821 824 827 830 833 836 839 842 845 848 851 854 857 860 863 866 869 872 875 878 881 884 887 890 893 896 899 902 905 908 911 914 917 920 923 926 929 932 935 938 941 944 947 950 953 956 959 962 965 968 971 974 977 980 983 986 989 992 995 998 1001 1004 1007 1010 1013 1016 1019 1022 1025 1028 1031 1034 1037 1040 1043 1046 1049 1052 1055 1058 1061 1064 1067 1070 1073 1076 1079 1082 1085 1088 1091 1094 1097 1100 1103 1106 1109 1112 1115 1118 1121 1124 1127 1130 1133 1136 1139 1142 1145 1148 1151 1154 1157 1160 1163 1166 1169 1172 1175 1178 1183 1190 1197 1204 1211 1218 1225 1241 1257 1273 1289 1305 1321 1337 1353 1369 1385 1401 1417 1433 1449 1465 1481 1497 1513 1529 1545 1561 1577 1593 1609 1625 1641 1657 1673 1689 1705 1721 1737 1753 1769 1785 1801 1817 1833 1849 1865 1881 1897 1913 1929 1945 1961 1977 1993 2009 2025 2041 2057 2073 2089 2105 2121 2137 2153 2169 2185 2201 2217 2233 2249 2265 2281 2297 2313 2329 2345 2361 2377 2393 2409 2425 2441 2457 2473 2489 2505 2521 2537 2553 2569 2585 2601 2617 2633 2649 2665 2681 2697 2713 2729 2745 2761 2777 2793 2809 2825 2841 2857 2873 2889 2905 2921 2937 2953 2969 2985 3001 3017 3033 3049 3065 3081 3097 3113 3129 3145 3161 3177 3193 3209 3225 3241 3257 3273 3289 3305 3321 3337 3353 3369 3385 3401 3417 3433 3449 3465 3481 3497 3513 3529 3545 3561 3577 3593 3609 3625 3641 3657 3673 3689 3705 3721 3737 3753 3769 3785 3801 3817 3833 3849 3865 3881 3897 3913 3929 3945 3961 3977 3993 4009 4025 4041 4057 4073 4089 4105 4121 4137 4153 4169 4185 4201 4217 4233 4249 4265 4281 4297 4313 4329 4345 4361 4377 4393 4409 4425 4441 4457 4473 4489 4505 4521 4537 4553 4569 4585 4601 4617 4633 4649 4665 4681 4697 4713 4729 4745 4761 4777 4793 4809 4825 4841 4857 4873 4889 4905 4921 4937 4953 4969 4985 5001 5017 5033 5049 5065 5081 5097 5113 5129 5145 5161 5177 5193 5209 5225 5241 5257 5273 5289 5305 5321 5337 5353 5369 5385 5401 5417 5433 5449 5465 5481 5497 5513 5529 5545 5561 5577 5593 5609 5625 5641 5657 5673 5689 5705 5721 5737 5753 5769 5785 5801 5817 5833 5849 5865 5881 5897 5913 5929 5945 5961 5977 5993 6009 6025 6041 6057 6073 6089 6105 6121 6137 6153 6169 6185 6201 6217 6233 6249 6265 6281 6297 6313 6329 6345 6361 6377 6393 6409 6425 6441 6457 6473 6489 6505 6521 6537 6553 6569 6585 6601 6617 6633 6649 6665 6681 6697 6713 6729 6745 6761 6777 6793 6809 6825 6841 6857 6873 6889 6905 6921 6937 6953 6969 6985 7001 7017 7033 7049 7065 7081 7097 7113 7129 7145 7161 7177 7193 7209 7225 7241 7257 7273 7289 7305 7321 7337 7353 7369 7385 7401 7417 7433 7449 7465 7474 7483 7492 7501 7510 7519 7528 7537 7546 7555 7564 7573 7582 7591 7600 7609 7618 7627 7636 7645 7654 7663 7672 7681 7690 7699 7708 7717 7726 7735 7744 7753 7762 7771 7780 7789 7798 7807 7816 7825 7834 7843 7852 7861 7870 7879 7888 7897 7906 7915 7924 7933 7942 7951 7960 7969 7978 7987 7996 8005 8014 8023 8032 8041 8050 8059 8068 8077 8086 8095 8104 8113 8122 8131 8140 8149 8158 8167 8176 8185 8194 8203 8212 8221 8230 8239 8248 8257 8266 8275 8284 8293 8302 8311 8320 8329 8338 8347 8356 8365 8374 8383 8392 8401 8410 8419 8428 8437 8446 8455 8464 8473 8482 8491 8500 8509 8518 8527 8536 8545 8554 8563 8572 8581 8590 8599 8608 8617 8626 8635 8644 8653 8662 8671 8680 8689 8698 8707 8716 8725 8734 8743 8752 8761 8770 8779 8788 8797 8806 8815 8824 8833 8842 8851 8860 8869 8878 8887 8896 8905 8914 8923 8932 8941 8950 8959 8968 8977 8986 8995 9004 9013 9022 9031 9040 9049 9058 9067 9076 9085 9094 9103 9112 9121 9130 9139 9148 9157 9166 9175 9184 9193 9202 9211 9220 9229 9238 9247 9256 9265 9274 9283 9292 9301 9310 9319 9328 9337 9346 9355 9364 9373 9382 9391 9400 9409 9418 9427 9436 9445 9454 9463 9472 9481 9490 9499 9508 9517 9526 9535 9544 9553 9562 9571 9580 9589 9598 9607 9616 9625 9634 9643 9652 9661 9670 9679 9688 9697 9706 9715 9724 9733 9742 9751 9760 9769 9778 9787 9796 9805 9814 9823 9832 9841 9850 9859 9868 9877 9886 9895 9904 9913 9922 9931 9940 9949 9958 9967 9976 9985 9994 10003 10012 10021 10030 10039 10048 10057 10066 10075 10084 10093 10102 10111 10120 10129 10138 10147 10156 10165 10174 10183 10192 10201 10210 10219 10228 10237 10246 10255 10264 10273 10282 10291 10300 10309 10318 10327 10336 10345 10354 10363 10372 10381 10390 10399 10408 10417 10426 10435 10444 10453 10462 10471 10480 10489 10498 10507 10516 10525 10534 10543 10552 10561 10570 10579 10588 10597 10606 10615 10624 10633 10642 10651 10660 10669 10678 10687 10696 10705 10714 10723 10732 10741 10750 10759 10768 10777 10786 10795 10804 10813 10822 10831 10840 10849 10858 10867 10876 10885 10894 10903 10912 10921 10930 10939 10948 10957 10966 10975 10985 11065 10995 11125 11035 11075 11005 11015 11135 11025 11225 11185 11145 11235 11195 11155 11085 11095 11205 11165 11215 11175 11045 11105 11055 11115 11245 11253 11261 11269 11277 11285 11293 11301 11309"/></xmi:XMI>
\ No newline at end of file

From 06ea3af94dc5263f3a26237f1cd56586fc73bab7 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Jun 2021 08:13:51 +0200
Subject: [PATCH 072/269] Setting the UIMA type capabilities for the
 BioLemmatizer.

---
 .../biolemmatizer/desc/jcore-biolemmatizer-ae.xml   | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml b/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
index 137eb219c..9fe2de8b8 100644
--- a/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
+++ b/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
@@ -13,7 +13,18 @@
     <typeSystemDescription />
     <typePriorities />
     <fsIndexCollection />
-    <capabilities />
+    <capabilities>
+      <capability>
+        <inputs>
+          <type>de.julielab.jcore.types.Token</type>
+          <type>de.julielab.jcore.types.PennBioIEPOSTag</type>
+        </inputs>
+        <outputs>
+          <type>de.julielab.jcore.types.Lemma</type>
+        </outputs>
+        <languagesSupported />
+      </capability>
+    </capabilities>
     <operationalProperties>
       <modifiesCas>true</modifiesCas>
       <multipleDeploymentAllowed>true</multipleDeploymentAllowed>

From 0e2a9e620fef59a06a0b27f52927c305c47a6169 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Jun 2021 08:19:02 +0200
Subject: [PATCH 073/269] Setting the UIMA type capabilities for the
 BioLemmatizer and the BioSem event annotator.

---
 .../de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java     | 4 +++-
 .../julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotator.java  | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
index 2b4011ff0..12720ec9d 100644
--- a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
+++ b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
@@ -18,6 +18,7 @@
 import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.fit.descriptor.ExternalResource;
+import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.tcas.Annotation;
@@ -35,6 +36,7 @@
 import java.util.*;
 import java.util.Map.Entry;
 
+@TypeCapability(inputs = {"de.julielab.jcore.types.Gene"}, outputs = {"de.julielab.jcore.types.EventTrigger", "de.julielab.jcore.types.EventMention"})
 public class BioSemEventAnnotator extends JCasAnnotator_ImplBase {
 
 	private final static Logger log = LoggerFactory.getLogger(BioSemEventAnnotator.class);
@@ -45,7 +47,7 @@ public class BioSemEventAnnotator extends JCasAnnotator_ImplBase {
 
 	private DBUtils trainedDb;
 
-	@ExternalResource(key = RESOURCE_TRAINED_DB, mandatory = true)
+	@ExternalResource(key = RESOURCE_TRAINED_DB)
 	private DBUtilsProvider dbUtilsProvider;
 
 	private EventExtraction xtr;
diff --git a/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotator.java b/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotator.java
index cbab4f7e9..1853e3f50 100644
--- a/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotator.java
+++ b/jcore-linnaeus-species-ae/src/main/java/de/julielab/jcore/ae/linnaeus/LinnaeusSpeciesAnnotator.java
@@ -35,7 +35,7 @@
  *
  */
 @ResourceMetaData(name="JCore LINNAEUS Species AE")
-@TypeCapability(inputs = {"de.julielab.jcore.types.Organism", "de.julielab.jcore.types.ResourceEntry"})
+@TypeCapability(outputs = {"de.julielab.jcore.types.Organism", "de.julielab.jcore.types.ResourceEntry"})
 public class LinnaeusSpeciesAnnotator extends JCasAnnotator_ImplBase {
     public static final String RES_KEY_LINNAEUS_MATCHER = "LinnaeusMatcher";
     public static final String PARAM_CONFIG_FILE = "ConfigFile";

From 0e7a6cfc0671de41d37268f3fd7345148a6fce0c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Jun 2021 16:45:46 +0200
Subject: [PATCH 074/269] Bumping CoStoSys to 1.6.0-SNAPSHOT.

---
 .../jcore/ae/flairner/FlairNerAnnotator.java        |  8 ++++++++
 .../integrationtests/UpdateWithHashComparison.java  | 13 ++++++++++---
 .../de/julielab/jcore/reader/xmi/CasPopulator.java  | 10 +++-------
 .../reader/xmi/desc/jcore-xmi-db-multiplier.xml     |  1 -
 .../jcore/reader/xmi/XmiDBMultiplierTest.java       |  2 +-
 .../src/test/resources/logback-test.xml             |  4 ++--
 jedis-parent/pom.xml                                |  2 +-
 7 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 8ce44a6f5..215b07718 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -162,11 +162,19 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
                 sentence.setId("s" + i++);
             sentenceMap.put(sentence.getId(), sentence);
         }
+        if ( log.isDebugEnabled()) {
+            if (sentenceMap.isEmpty())
+                log.debug("Document {} does not have any sentences.", JCoReTools.getDocId(aJCas));
+            if (!aJCas.getAnnotationIndex(Token.class).iterator().hasNext())
+                log.debug("Document {} does not have any tokens", JCoReTools.getDocId(aJCas));
+        }
         try {
             final AnnotationAdderHelper helper = new AnnotationAdderHelper();
+            log.trace("Sending document sentences to flair for entity tagging.");
             final NerTaggingResponse taggingResponse = connector.tagSentences(StreamSupport.stream(sentIndex.spliterator(), false));
             final List<TaggedEntity> taggedEntities = taggingResponse.getTaggedEntities();
             for (TaggedEntity entity : taggedEntities) {
+                log.trace("Adding flair-tagged entity to the CAS: {}", entity);
                 final Sentence sentence = sentenceMap.get(entity.getDocumentId());
                 EntityMention em = (EntityMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityClass);
                 helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
diff --git a/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java b/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java
index 52754055b..63e967924 100644
--- a/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java
+++ b/jcore-jedis-integration-tests/src/test/java/de/julielab/jcore/jedis/integrationtests/UpdateWithHashComparison.java
@@ -9,6 +9,7 @@
 import de.julielab.jcore.reader.db.DBMultiplierReader;
 import de.julielab.jcore.reader.xml.XMLDBMultiplier;
 import de.julielab.jcore.types.Annotation;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngine;
@@ -30,9 +31,7 @@
 
 import java.io.File;
 import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.EnumSet;
-import java.util.List;
+import java.util.*;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -58,6 +57,7 @@ public class UpdateWithHashComparison {
     private static JCas cas;
     private static DataBaseConnector dbc;
     private static List<String> namesOfRunComponents = new ArrayList<>();
+    private static Set<String> idsOfProcessedDocuments = new LinkedHashSet<>();
 
     @BeforeAll
     public static void setup() throws Exception {
@@ -182,6 +182,11 @@ public void testInitialProcessingProcessing() throws Exception {
         // Check that all rows have been processed in the XML source subset table.
         assertThat(status.isProcessed).isEqualTo(3);
         assertThat(status.inProcess).isEqualTo(0);
+
+        assertThat(idsOfProcessedDocuments).hasSize(3);
+        // Check that there are actual IDs, not null string or something like that
+        for (String id : idsOfProcessedDocuments)
+            assertThat(id).matches("[0-9]+");
     }
 
     /**
@@ -199,7 +204,9 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 
         @Override
         public void process(JCas jCas) {
+            assertThat(jCas.getDocumentText()).isNotBlank();
             namesOfRunComponents.add(name);
+            idsOfProcessedDocuments.add(JCoReTools.getDocId(jCas));
             new Annotation(jCas).addToIndexes();
         }
     }
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
index fd631e58f..e5d3bf36d 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
@@ -40,10 +40,8 @@ public class CasPopulator {
     private final static Logger log = LoggerFactory.getLogger(CasPopulator.class);
     private final DataBaseConnector dbc;
     private final boolean readsBaseDocument;
-    private final int numAdditionalTables;
     private final int numDataRetrievedDataFields;
-    private final String dataTable;
-    private final String[] additionalTableNames;
+    private final String[] unqualifiedAnnotationModuleNames;
     private final XmiBuilder builder;
     private final Boolean logFinalXmi;
     private final int xercesAttributeBufferSize;
@@ -72,10 +70,8 @@ public CasPopulator(String dataTable, Initializer initializer, Boolean readDataT
         this.tableName = tableName;
         this.readsBaseDocument = initializer.getReadsBaseDocument();
         this.joinTables = initializer.isJoinTables();
-        this.numAdditionalTables = initializer.getNumAdditionalTables();
         this.numDataRetrievedDataFields = initializer.getNumDataRetrievedDataFields();
-        this.dataTable = dataTable;
-        this.additionalTableNames = initializer.getUnqualifiedAnnotationModuleNames();
+        this.unqualifiedAnnotationModuleNames = initializer.getUnqualifiedAnnotationModuleNames();
         this.builder = initializer.getXmiBuilder();
         binaryBuilder = initializer.getBinaryBuilder();
         useBinaryFormat = initializer.isUseBinaryFormat();
@@ -85,7 +81,7 @@ public CasPopulator(String dataTable, Initializer initializer, Boolean readDataT
         reverseBinaryMapping = initializer.getReverseBinaryMapping();
         featuresToMapBinary = initializer.getFeaturesToMapBinary();
         if (useBinaryFormat) {
-            binaryJeDISNodeDecoder = new BinaryJeDISNodeDecoder(Stream.of(additionalTableNames).collect(Collectors.toSet()), true);
+            binaryJeDISNodeDecoder = new BinaryJeDISNodeDecoder(Stream.of(unqualifiedAnnotationModuleNames).collect(Collectors.toSet()), true);
         } else
             binaryJeDISNodeDecoder = null;
     }
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index 992ed962a..c124b4804 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -29,7 +29,6 @@
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-<!--                <import name="de.julielab.jcore.types.jcore-all-types" />-->
             </imports>
         </typeSystemDescription>
         <fsIndexCollection />
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
index 2af097f43..cde2d026f 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
@@ -31,7 +31,7 @@
 
 
 public class XmiDBMultiplierTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
     private static String costosysConfig;
     private static int subsetCounter;
 
diff --git a/jcore-xmi-db-reader/src/test/resources/logback-test.xml b/jcore-xmi-db-reader/src/test/resources/logback-test.xml
index b8337ca9b..edc553153 100644
--- a/jcore-xmi-db-reader/src/test/resources/logback-test.xml
+++ b/jcore-xmi-db-reader/src/test/resources/logback-test.xml
@@ -9,8 +9,8 @@
         </encoder>
     </appender>
     <logger name="de.julielab.jcore.reader.xmi" level="INFO"/>
-    <logger name="de.julielab.xml.binary.BinaryJeDISNodeDecoder" level="DEBUG"/>
-    <logger name="de.julielab.jcore.reader.xmi.flowcontroller" level="TRACE"/>
+    <logger name="de.julielab.xml.binary.BinaryJeDISNodeDecoder" level="INFO"/>
+    <logger name="de.julielab.jcore.reader.xmi.CasPopulator" level="TRACE"/>
     <root level="INFO">
         <appender-ref ref="STDOUT" />
     </root>
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 226e35c36..b66c3be70 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -17,7 +17,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>costosys</artifactId>
-                <version>1.5.2-SNAPSHOT</version>
+                <version>1.6.0-SNAPSHOT</version>
             </dependency>
             <dependency>
                 <groupId>de.julielab</groupId>

From bdc31c92ef8beda950908172f8314b3923d2ce7a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Jun 2021 09:19:57 +0200
Subject: [PATCH 075/269] Fixed JeDIS tests that got stuck due to too tight
 restrictions on the number of available database connections.

With the possibility to reserve non-shared connections in CoStoSys, the old connections pool size limits did not suffice any more in a few cases.
---
 .../jcore/reader/db/DBMultiplierReaderTest.java      |  4 ++--
 .../julielab/jcore/reader/db/DBMultiplierTest.java   |  2 +-
 .../de/julielab/jcore/reader/db/DBReaderTest.java    |  4 ++--
 .../jcore/reader/xmi/XmiDBMultiplierReader.java      | 12 +++++++-----
 .../xmi/XmiDBMultiplierDifferentNsSchemaTest.java    |  2 +-
 .../jcore/reader/xmi/XmiDBMultiplierTest.java        |  4 +++-
 .../reader/xmi/XmiDBReaderBinaryFormatTest.java      |  2 +-
 .../reader/xmi/XmiDBReaderDifferentNsSchemaTest.java |  2 +-
 .../jcore/reader/xmi/XmiDBReaderGzippedDataTest.java |  4 ++--
 .../xmi/XmiDBReaderMonolithicDocumentsTest.java      |  4 ++--
 .../julielab/jcore/reader/xmi/XmiDBReaderTest.java   |  2 +-
 .../src/test/resources/logback-test.xml              |  3 ++-
 .../consumer/xmi/XmiDBWriterBinaryFormatTest.java    |  2 +-
 .../xmi/XmiDBWriterMonolithicDocumentTest.java       |  2 +-
 .../julielab/jcore/consumer/xmi/XmiDBWriterTest.java |  2 +-
 .../jcore/reader/xml/XMLDBMultiplierTest.java        |  4 ++--
 16 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
index 33f73c0eb..c10ff9670 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
@@ -27,7 +27,7 @@
 @Testcontainers
 public class DBMultiplierReaderTest {
     @Container
-    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
 
     @BeforeAll
     public static void setup() throws SQLException {
@@ -41,7 +41,7 @@ public static void setup() throws SQLException {
     @Test
     public void testDBMultiplierReader() throws UIMAException, IOException, ConfigurationException {
 
-        String costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2017", 1, postgres);
+        String costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2017", 2, postgres);
         CollectionReader reader = CollectionReaderFactory.createReader(DBMultiplierReader.class,
                 PARAM_BATCH_SIZE, 5,
                 PARAM_TABLE, "testsubset",
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
index 7a90917ad..350f610fb 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierTest.java
@@ -39,7 +39,7 @@
 public class DBMultiplierTest {
     private final static Logger log = LoggerFactory.getLogger(DBMultiplierTest.class);
     @Container
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer("postgres:11.12");
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
 
     @BeforeAll
     public static void setup() throws SQLException, IOException {
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
index 6cb6f3fcf..46b8ac436 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
@@ -31,7 +31,7 @@
 @Testcontainers
 public class DBReaderTest {
     @Container
-    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
 
     @BeforeAll
     public static void setup() throws SQLException {
@@ -43,7 +43,7 @@ public static void setup() throws SQLException {
 
     @Test
     public void testDBReader() throws UIMAException, IOException, ConfigurationException {
-        String costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2017", 1, postgres);
+        String costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2017", 2, postgres);
         CollectionReader reader = CollectionReaderFactory.createReader(DBReaderTestImpl.class,
                 PARAM_BATCH_SIZE, 5,
                 PARAM_TABLE, "testsubset",
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java
index 22cadadcc..185bdd1d4 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java
@@ -42,14 +42,14 @@ public class XmiDBMultiplierReader extends DBMultiplierReader {
     public static final String PARAM_ANNOTATIONS_TO_LOAD = Initializer.PARAM_ANNOTATIONS_TO_LOAD;
     public static final String PARAM_XMI_META_SCHEMA = "XmiMetaTablesSchema";
     private final static Logger log = LoggerFactory.getLogger(XmiDBMultiplierReader.class);
+    @ConfigurationParameter(name = PARAM_ANNOTATIONS_TO_LOAD, mandatory = false, description = "An array of qualified UIMA type names. The provided names will be converted to database table column names in an equivalent manner as the XMIDBWriter does when storing the annotations. Thus, by default the columns of the XMI table holding annotation module information are named by lowercased UIMA type name where dots are replaced by underscores.. This can be overwritten by appending '<schema>:' to a table name. The given type names will be converted to valid Postgres columns names by replacing dots with underscores and the colon will be converted to the dollar character. From the resolved columns, annotation modules in segmented XMI format are read where an annotation module contains all annotation instances of a specific type in a specific document. All annotation modules read this way are merged with the base document, resulting in valid XMI data which is then deserialized into the CAS.")
+    protected String[] qualifiedAnnotationColumnNames;
     @ConfigurationParameter(name = PARAM_READS_BASE_DOCUMENT, description = "Indicates if this reader reads segmented " +
             "annotation data. If set to false, the XMI data is expected to represent complete annotated documents. " +
             "If it is set to true, a segmented annotation graph is expected and the table given with the 'Table' parameter " +
             "will contain the document text together with some basic annotations. What exactly is stored in which manner " +
             "is determined by the jcore-xmi-db-consumer used to write the data into the database.")
     private Boolean readsBaseDocument;
-    @ConfigurationParameter(name = PARAM_ANNOTATIONS_TO_LOAD, mandatory = false, description = "An array of qualified UIMA type names. The provided names will be converted to database table column names in an equivalent manner as the XMIDBWriter does when storing the annotations. Thus, by default the columns of the XMI table holding annotation module information are named by lowercased UIMA type name where dots are replaced by underscores.. This can be overwritten by appending '<schema>:' to a table name. The given type names will be converted to valid Postgres columns names by replacing dots with underscores and the colon will be converted to the dollar character. From the resolved columns, annotation modules in segmented XMI format are read where an annotation module contains all annotation instances of a specific type in a specific document. All annotation modules read this way are merged with the base document, resulting in valid XMI data which is then deserialized into the CAS.")
-    protected String[] qualifiedAnnotationColumnNames;
     @ConfigurationParameter(name = PARAM_STORE_XMI_ID, mandatory = false, description = "This parameter is required " +
             "to be set to true, if this reader is contained in a pipeline that also contains a jcore-xmi-db-writer and" +
             "the writer will segment the CAS annotation graph and store only parts of it. Then, it is important to " +
@@ -68,7 +68,7 @@ public class XmiDBMultiplierReader extends DBMultiplierReader {
             "(j)visualvm, the hot spots of work can be identified. If one of those is the XML attribute buffer " +
             "resizing, this parameter should be set to a size that makes buffer resizing unnecessary.")
     private int xercesAttributeBufferSize;
-    @ConfigurationParameter(name = PARAM_XMI_META_SCHEMA, mandatory = false, defaultValue = "public", description = "Each XMI file defines a number of XML namespaces according to the types used in the document. Those namespaces are stored in a table named '" +XmiSplitConstants.XMI_NS_TABLE + "' when splitting annotations in annotation modules by the XMI DB writer. This parameter allows to specify in which Postgres schema this table should be looked for. Also, the table listing the annotation tables is stored in this Postgres schema. Defaults to 'public'.")
+    @ConfigurationParameter(name = PARAM_XMI_META_SCHEMA, mandatory = false, defaultValue = "public", description = "Each XMI file defines a number of XML namespaces according to the types used in the document. Those namespaces are stored in a table named '" + XmiSplitConstants.XMI_NS_TABLE + "' when splitting annotations in annotation modules by the XMI DB writer. This parameter allows to specify in which Postgres schema this table should be looked for. Also, the table listing the annotation tables is stored in this Postgres schema. Defaults to 'public'.")
     private String xmiMetaSchema;
     private boolean doGzip;
     private String[] additionalTableNames;
@@ -107,7 +107,7 @@ public void getNext(JCas jCas) throws CollectionException {
             rowBatch.setXercesAttributeBufferSize(xercesAttributeBufferSize);
             rowBatch.setXmiMetaTablesPostgresSchema(xmiMetaSchema);
         } catch (Throwable throwable) {
-            log.error("Exception ocurred while trying to get the next document", throwable);
+            log.error("Exception occurred while trying to get the next document", throwable);
             throw throwable;
         }
     }
@@ -122,6 +122,8 @@ private void adaptReaderConfigurationForXmiData() throws ResourceInitializationE
         costosysConfig = (String) getConfigParameterValue(PARAM_COSTOSYS_CONFIG_NAME);
         try {
             dbc = new DataBaseConnector(costosysConfig);
+            if (dbc.getMaxConnections() < 3)
+                dbc.setMaxConnections(3);
         } catch (FileNotFoundException e) {
             throw new ResourceInitializationException(e);
         }
@@ -185,7 +187,7 @@ private void determineDataFormat(String table) throws ResourceInitializationExce
     }
 
     private void checkForJeDISBinaryFormat(byte[] firstTwoBytes) {
-        short header = (short) ((firstTwoBytes[0]<<8) | (0xff & firstTwoBytes[1]));
+        short header = (short) ((firstTwoBytes[0] << 8) | (0xff & firstTwoBytes[1]));
         if (header != BinaryJeDISNodeEncoder.JEDIS_BINARY_MAGIC) {
             useBinaryFormat = false;
             log.debug("Is data encoded in JeDIS binary format: false");
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
index ff60e41a0..73dcdc055 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierDifferentNsSchemaTest.java
@@ -31,7 +31,7 @@
 
 
 public class XmiDBMultiplierDifferentNsSchemaTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static int subsetCounter;
 
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
index cde2d026f..fabc558aa 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierTest.java
@@ -31,7 +31,7 @@
 
 
 public class XmiDBMultiplierTest {
-    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static int subsetCounter;
 
@@ -40,6 +40,7 @@ public static void setup() throws UIMAException, IOException, ConfigurationExcep
         postgres.start();
         XmiDBSetupHelper.createDbcConfig(postgres);
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
+        dbc.setMaxConnections(3);
         costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_text", 10, postgres);
         new File(costosysConfig).deleteOnExit();
         XmiDBSetupHelper.processAndSplitData(costosysConfig, false, false,"public");
@@ -57,6 +58,7 @@ public static void shutdown() {
     @Test(threadPoolSize = 3, invocationCount = 10, timeOut = 500000)
     public void testXmiDBMultiplierReader() throws Exception {
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
+        dbc.setMaxConnections(5);
         String xmisubset;
         synchronized (XmiDBMultiplierDifferentNsSchemaTest.class) {
             xmisubset = "xmisubset" + subsetCounter++;
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
index d2fc88444..5af87e804 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderBinaryFormatTest.java
@@ -26,7 +26,7 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBReaderBinaryFormatTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmisubset;
 
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
index d592bec9e..8ae996691 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderDifferentNsSchemaTest.java
@@ -26,7 +26,7 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBReaderDifferentNsSchemaTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmisubset;
 
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
index 9a7fea0b3..e25808419 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderGzippedDataTest.java
@@ -31,7 +31,7 @@
  * The exact same test as {@link XmiDBReaderTest} but here, the data is gzipped.
  */
 public class XmiDBReaderGzippedDataTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmisubset;
 
@@ -41,7 +41,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         XmiDBSetupHelper.createDbcConfig(postgres);
 
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
-        costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_text", 1, postgres);
+        costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_text", 2, postgres);
         new File(costosysConfig).deleteOnExit();
         XmiDBSetupHelper.processAndSplitData(costosysConfig, true, false,"public");
         assertTrue(dbc.withConnectionQueryBoolean( c -> c.tableExists("_data.documents")), "The data document table exists");
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
index e0ae7f3ed..8b0dab1d2 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderMonolithicDocumentsTest.java
@@ -28,7 +28,7 @@
 
 
 public class XmiDBReaderMonolithicDocumentsTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmisubset;
 
@@ -38,7 +38,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         XmiDBSetupHelper.createDbcConfig(postgres);
 
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
-        costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_complete_cas", 1, postgres);
+        costosysConfig = DBTestUtils.createTestCostosysConfig("xmi_complete_cas", 2, postgres);
         new File(costosysConfig).deleteOnExit();
         XmiDBSetupHelper.processAndStoreCompleteXMIData(costosysConfig, true);
         dbc.reserveConnection();
diff --git a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
index cf1d089ef..36ca9601a 100644
--- a/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
+++ b/jcore-xmi-db-reader/src/test/java/de/julielab/jcore/reader/xmi/XmiDBReaderTest.java
@@ -28,7 +28,7 @@
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 public class XmiDBReaderTest {
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmisubset;
 
diff --git a/jcore-xmi-db-reader/src/test/resources/logback-test.xml b/jcore-xmi-db-reader/src/test/resources/logback-test.xml
index edc553153..6a4a567cd 100644
--- a/jcore-xmi-db-reader/src/test/resources/logback-test.xml
+++ b/jcore-xmi-db-reader/src/test/resources/logback-test.xml
@@ -10,7 +10,8 @@
     </appender>
     <logger name="de.julielab.jcore.reader.xmi" level="INFO"/>
     <logger name="de.julielab.xml.binary.BinaryJeDISNodeDecoder" level="INFO"/>
-    <logger name="de.julielab.jcore.reader.xmi.CasPopulator" level="TRACE"/>
+    <logger name="de.julielab.jcore.reader.xmi.CasPopulator" level="INFO"/>
+    <logger name="de.julielab.costosys.dbconnection.DataBaseConnector" level="INFO"/>
     <root level="INFO">
         <appender-ref ref="STDOUT" />
     </root>
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
index 15b5fc5c9..135affc2d 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterBinaryFormatTest.java
@@ -42,7 +42,7 @@
 @Testcontainers
 public class XmiDBWriterBinaryFormatTest {
     @Container
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmlSubsetTable;
     private static DataBaseConnector dbc;
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
index 6af2d578d..6f8611d29 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterMonolithicDocumentTest.java
@@ -31,7 +31,7 @@
 @Testcontainers
 public class XmiDBWriterMonolithicDocumentTest {
     @Container
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static DataBaseConnector dbc;
 
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
index 5f3a979bb..fbcb62164 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
@@ -31,7 +31,7 @@
 @Testcontainers
 public class XmiDBWriterTest {
     @Container
-    public static PostgreSQLContainer postgres = (PostgreSQLContainer) new PostgreSQLContainer();
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static String xmlSubsetTable;
     private static DataBaseConnector dbc;
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
index f14839236..3e2cd9f79 100644
--- a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -49,7 +49,7 @@ public class XMLDBMultiplierTest {
     private static final String MAX_XMI_ID_FIELD_NAME = "max_xmi_id";
     private static final String SOFA_MAPPING_FIELD_NAME = "sofa_mapping";
     private static final String SUBSET_TABLE = "test_subset";
-    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:11.12");
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
 
     @BeforeAll
@@ -59,7 +59,7 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
 
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.setActiveTableSchema("medline_2016_nozip");
-        costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2016_nozip", 1, postgres);
+        costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2016_nozip", 2, postgres);
         new File(costosysConfig).deleteOnExit();
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
             // We create two tables. One is the XML table the multiplier reads from and maps the contents to the JCas.

From f1f84d55b4dc08cf52475d42bfd3675a9378d8bb Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 1 Jul 2021 13:38:05 +0200
Subject: [PATCH 076/269] Added "cut away characters" to
 JCoReCondensedDocumentText.

---
 .../java/banner/tagging/pipe/LemmaPOS.java    |  14 +-
 .../jcore/ae/jsbd/main/SentenceAnnotator.java | 136 ++++++++++--------
 .../ae/jsbd/main/SentenceAnnotatorTest.java   |  48 ++++---
 .../src/test/resources/errordocs/README.md    |   4 +
 .../utility/JCoReCondensedDocumentText.java   |  41 ++++--
 .../JCoReCondensedDocumentTextTest.java       |  56 ++++++++
 .../src/test/resources/PMC5478802.xmi         |   5 +
 .../jcore/consumer/xmi/XmiDBWriterTest.java   |  40 +++++-
 8 files changed, 251 insertions(+), 93 deletions(-)
 create mode 100644 jcore-jsbd-ae/src/test/resources/errordocs/README.md
 create mode 100644 jcore-utilities/src/test/resources/PMC5478802.xmi

diff --git a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
index 1c28c28b0..8068cfa1b 100644
--- a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
+++ b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
@@ -43,14 +43,16 @@ public LemmaPOS(Lemmatiser lemmatiser, Tagger posTagger) {
     public void setLemmatiser(Lemmatiser lemmatiser) {
         initResourcesMap();
         getResources().lemmatiser = lemmatiser;
+        System.out.println("Setting lemmatiser to " + Thread.currentThread());
     }
 
     public void setPosTagger(Tagger posTagger) {
         initResourcesMap();
         getResources().posTagger = posTagger;
+        System.out.println("Setting PoS Tagger to " + Thread.currentThread());
     }
 
-    private void initResourcesMap() {
+    synchronized private void initResourcesMap() {
         if (resourcesByThread == null)
             resourcesByThread = new HashMap<>();
     }
@@ -67,7 +69,7 @@ private Resources getResources() {
     @Override
     public Instance pipe(Instance carrier) {
         if (expectLemmatiser != (getResources().lemmatiser != null))
-            throw new IllegalStateException("Model was trained with lemmatiser; not present in current config");
+            throw new IllegalStateException("Model was trained with lemmatiser; not present in current config; resource map: " + resourcesByThread + ", current thread: " + Thread.currentThread());
         if (expectPOSTagger != (getResources().posTagger != null))
             throw new IllegalStateException("Model was trained with POS tagger; not present in current config");
         // TODO Add prefix ability
@@ -112,5 +114,13 @@ public Instance pipe(Instance carrier) {
     private class Resources {
         public Lemmatiser lemmatiser;
         public Tagger posTagger;
+
+        @Override
+        public String toString() {
+            return "Resources{" +
+                    "lemmatiser=" + lemmatiser +
+                    ", posTagger=" + posTagger +
+                    '}';
+        }
     }
 }
diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
index a27107477..c91869654 100644
--- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
+++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
@@ -146,77 +146,91 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
      * @throws AnalysisEngineProcessException
      */
     public void process(JCas aJCas) throws AnalysisEngineProcessException {
-        if (StringUtils.isBlank(aJCas.getDocumentText())) {
-            final String docId = JCoReTools.getDocId(aJCas);
-            LOGGER.warn("The document text of document {} is empty.", docId);
-            return;
-        }
-        JCoReCondensedDocumentText documentText;
         try {
-            // If there are no cut-away types, the document text will remain unchanged.
-            documentText = new JCoReCondensedDocumentText(aJCas, cutAwayTypes);
-        } catch (ClassNotFoundException e1) {
-            throw new AnalysisEngineProcessException(e1);
-        }
-
-        if (sentenceDelimiterTypes != null) {
+            if (StringUtils.isBlank(aJCas.getDocumentText())) {
+                final String docId = JCoReTools.getDocId(aJCas);
+                LOGGER.warn("The document text of document {} is empty.", docId);
+                return;
+            }
+            JCoReCondensedDocumentText documentText;
             try {
-                // the index merger gives us access to all delimiter type
-                // indexes in one
-                JCoReAnnotationIndexMerger indexMerger = new JCoReAnnotationIndexMerger(sentenceDelimiterTypes, false,
-                        null, aJCas);
+                // If there are no cut-away types, the document text will remain unchanged.
+                documentText = new JCoReCondensedDocumentText(aJCas, cutAwayTypes);
+            } catch (ClassNotFoundException e1) {
+                LOGGER.error("Could not create the text without annotations to be cut away in document {}", JCoReTools.getDocId(aJCas), e1);
+                throw new AnalysisEngineProcessException(e1);
+            }
 
-                // the idea: collect all start and end offsets of sentence
-                // delimiter annotations (sections, titles, captions, ...) in a
-                // list and sort ascending; then, perform sentence segmentation
-                // between every two adjacent offsets. This way, no sentence can
-                // cross any delimiter annotation border
-                List<Integer> borders = new ArrayList<>();
-                borders.add(0);
-                borders.add(aJCas.getDocumentText().length());
-                while (indexMerger.incrementAnnotation()) {
-                    Annotation a = (Annotation) indexMerger.getAnnotation();
-                    // Here we convert the original offsets to the condensed offsets. If there are
-                    // no cut-away types, the offsets will just remain unchanged. Otherwise we now
-                    // have the borders of the condensed text passages associated with the sentence
-                    // delimiter annotation.
-                    borders.add(documentText.getCondensedOffsetForOriginalOffset(a.getBegin()));
-                    borders.add(documentText.getCondensedOffsetForOriginalOffset(a.getEnd()));
-                }
-                borders.sort(null);
+            if (sentenceDelimiterTypes != null) {
+                try {
+                    // the index merger gives us access to all delimiter type
+                    // indexes in one
+                    JCoReAnnotationIndexMerger indexMerger = new JCoReAnnotationIndexMerger(sentenceDelimiterTypes, false,
+                            null, aJCas);
 
-                // now do sentence segmentation between annotation borders
-                for (int i = 1; i < borders.size(); ++i) {
-                    int start = borders.get(i - 1);
-                    int end = borders.get(i);
+                    // the idea: collect all start and end offsets of sentence
+                    // delimiter annotations (sections, titles, captions, ...) in a
+                    // list and sort ascending; then, perform sentence segmentation
+                    // between every two adjacent offsets. This way, no sentence can
+                    // cross any delimiter annotation border
+                    List<Integer> borders = new ArrayList<>();
+                    borders.add(0);
+                    borders.add(aJCas.getDocumentText().length());
+                    while (indexMerger.incrementAnnotation()) {
+                        Annotation a = (Annotation) indexMerger.getAnnotation();
+                        System.out.println(a.getCoveredText());
+                        System.out.println("--");
+                        System.out.println(documentText.getCodensedText().substring(documentText.getCondensedOffsetForOriginalOffset(a.getBegin()), documentText.getOriginalOffsetForCondensedOffset(a.getEnd())));
+                        System.out.println(a.getBegin() + " - " + a.getEnd() + ", " + documentText.getCondensedOffsetForOriginalOffset(a.getBegin()) + " - " + documentText.getOriginalOffsetForCondensedOffset(a.getEnd()));
+                        System.out.println();
+                        // Here we convert the original offsets to the condensed offsets. If there are
+                        // no cut-away types, the offsets will just remain unchanged. Otherwise we now
+                        // have the borders of the condensed text passages associated with the sentence
+                        // delimiter annotation.
+                        borders.add(documentText.getCondensedOffsetForOriginalOffset(a.getBegin()));
+                        assert borders.get(borders.size() - 1) < documentText.getCodensedText().length();
+                        borders.add(documentText.getCondensedOffsetForOriginalOffset(a.getEnd()));
+                        assert borders.get(borders.size() - 1) < documentText.getCodensedText().length() : "Original offset "+a.getEnd()+" is mapped to condensed offset " + documentText.getCondensedOffsetForOriginalOffset(a.getEnd());
+                    }
+                    borders.sort(null);
 
-                    // skip leading whites spaces
-                    while (start < end && Character.isWhitespace(aJCas.getDocumentText().charAt(start)))
-                        ++start;
+                    // now do sentence segmentation between annotation borders
+                    for (int i = 1; i < borders.size(); ++i) {
+                        int start = borders.get(i - 1);
+                        int end = borders.get(i);
 
-                    // get the string between the current annotation borders and recognize sentences
-                    String textSpan = documentText.getCodensedText().substring(start, end);
-                    if (!StringUtils.isBlank(textSpan))
-                        doSegmentation(documentText, textSpan, start);
-                }
+                        // skip leading whites spaces
+                        while (start < end && Character.isWhitespace(aJCas.getDocumentText().charAt(start)))
+                            ++start;
 
-            } catch (ClassNotFoundException e) {
-                throw new AnalysisEngineProcessException(e);
-            }
-        } else {
-            // if no processingScope set -> use documentText
-            if (aJCas.getDocumentText() != null && aJCas.getDocumentText().length() > 0) {
-                doSegmentation(documentText, documentText.getCodensedText(), 0);
-            } else {
-                if (numEmptyCases.get() < 10) {
-                    LOGGER.debug("document text empty. Skipping this document.");
-                    numEmptyCases.incrementAndGet();
-                } else if (numEmptyCases.get() == 10) {
-                    LOGGER.warn("Encountered 10 documents with an empty text body. This message will not appear again " +
-                            "to avoid scrolling in cases where this is expected.");
+                        // get the string between the current annotation borders and recognized sentences
+                        String textSpan = documentText.getCodensedText().substring(start, end);
+                        if (!StringUtils.isBlank(textSpan))
+                            doSegmentation(documentText, textSpan, start);
+                    }
+
+                } catch (ClassNotFoundException e) {
+                    throw new AnalysisEngineProcessException(e);
                 }
+            } else {
+                // sentence delimiter types are not given
+                // if no processingScope set -> use documentText
+                if (aJCas.getDocumentText() != null && aJCas.getDocumentText().length() > 0) {
+                    doSegmentation(documentText, documentText.getCodensedText(), 0);
+                } else {
+                    if (numEmptyCases.get() < 10) {
+                        LOGGER.debug("document text empty. Skipping this document.");
+                        numEmptyCases.incrementAndGet();
+                    } else if (numEmptyCases.get() == 10) {
+                        LOGGER.warn("Encountered 10 documents with an empty text body. This message will not appear again " +
+                                "to avoid scrolling in cases where this is expected.");
+                    }
 
+                }
             }
+        } catch (Throwable t) {
+            LOGGER.error("Could not perform sentence splitting of document {}", JCoReTools.getDocId(aJCas), t);
+            throw t;
         }
     }
 
diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
index 0f0870ae8..1455b9339 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
@@ -25,6 +25,7 @@
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
@@ -39,7 +40,9 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
+import java.nio.file.Path;
 import java.util.*;
 import java.util.stream.Collectors;
 
@@ -284,26 +287,31 @@ public void testSplitAtNewlines() throws Exception {
 		assertThat(sentences).containsExactly("line1", "line2", "line3");
 	}
 
-//
-//	@Test
-//	public void testmuh() throws Exception {
-//		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-//				"de.julielab.jcore.types.jcore-document-structure-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
-//				"de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
-//
-//		XmiCasDeserializer.deserialize(new FileInputStream("/Users/faessler/uima-pipelines/jedis-doc-to-xmi/data/output-xmi/4768370.xmi"), jCas.getCas());
-//		JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
-//		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-//				"/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz", SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000);
-//
-//		jsbd.process(jCas.getCas());
-//
-//		Set<Integer> set = new TreeSet<>();
-//		for (Sentence s : JCasUtil.select(jCas, Sentence.class)) {
-//			set.add(s.getEnd() - s.getBegin());
-//		}
-//		XmiCasSerializer.serialize(jCas.getCas(), new FileOutputStream("smallSentences.xmi"));
-//	}
+
+	@Test
+	public void testErrordoc() throws Exception {
+		// The XMI document uses here is from PMC and is an example of a source of error the previously occurred.
+		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+				"de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
+
+		XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC5478802.xmi").toFile()), jCas.getCas());
+		JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
+		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+				"/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz",
+				SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
+				SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
+						"de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},
+				SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()}
+		);
+
+		jsbd.process(jCas.getCas());
+		for (var s : JCasUtil.select(jCas, Sentence.class)) {
+			System.out.println(s.getCoveredText());
+			System.out.println("--");
+		}
+
+	}
 
 }
 
diff --git a/jcore-jsbd-ae/src/test/resources/errordocs/README.md b/jcore-jsbd-ae/src/test/resources/errordocs/README.md
new file mode 100644
index 000000000..d2278611f
--- /dev/null
+++ b/jcore-jsbd-ae/src/test/resources/errordocs/README.md
@@ -0,0 +1,4 @@
+# Errored Documents for Tests
+
+Documents in this directory were subject of sentence splitting errors. The errors are fixed
+using the documents in a test.
\ No newline at end of file
diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
index a3e4bd532..76a8c5f45 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
@@ -22,10 +22,28 @@ public class JCoReCondensedDocumentText {
 	private NavigableMap<Integer, Integer> originalPos2SumCutMap;
 	private String condensedText;
 	private JCas cas;
+	private Set<Character> cutAwayFillCharacters;
 
 	public JCas getCas() {
 		return cas;
 	}
+	/**
+	 * <p>
+	 * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
+	 * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
+	 * empty, this class' methods will return the original CAS data.
+	 * </p>
+	 *
+	 * @param cas
+	 *            The CAS for which the document text should be cut.
+	 * @param cutAwayTypes
+	 *            The types for cutting. May be null.
+	 * @throws ClassNotFoundException
+	 *             If <tt>cutAwayTypes</tt> contains non-existing type names.
+	 */
+	public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
+		this(cas, cutAwayTypes, null);
+	}
 
 	/**
 	 * <p>
@@ -33,16 +51,21 @@ public JCas getCas() {
 	 * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
 	 * empty, this class' methods will return the original CAS data.
 	 * </p>
+	 * <p>The <tt>cutAwayFillCharacters</tt> set may provide characters that, when being the only character between
+	 * to cut-away annotations, will add to the span of text being cut away. This way, enumerations of references
+	 * (e.g. "4,6,8") can be completely removed, for example.</p>
 	 * 
 	 * @param cas
 	 *            The CAS for which the document text should be cut.
 	 * @param cutAwayTypes
 	 *            The types for cutting. May be null.
+	 * @param cutAwayFillCharacters Characters that, when being the only separator between two cut away annotations, are also cut away.
 	 * @throws ClassNotFoundException
 	 *             If <tt>cutAwayTypes</tt> contains non-existing type names.
 	 */
-	public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
+	public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, Set<Character> cutAwayFillCharacters) throws ClassNotFoundException {
 		this.cas = cas;
+		this.cutAwayFillCharacters = cutAwayFillCharacters;
 		buildMap(cas, cutAwayTypes);
 	}
 
@@ -80,24 +103,26 @@ public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundExc
 		int lastBegin = 0;
 		int lastEnd = -1;
 		// For each ignored annotation, there could be following annotations overlapping
-		// with the first, effectively enlargeing the ignored span. Thus, we iterate
-		// until we find an ignored annotation the has a positive (not 0) distance to a
+		// with the first, effectively enlarging the ignored span. Thus, we iterate
+		// until we find an ignored annotation that has a positive (not 0) distance to a
 		// previous one. Then, we store the length of the span of cut-away annotations
 		// for the largest end of the previous annotations.
 		while (merger.incrementAnnotation()) {
 			int end = merger.getCurrentEnd();
 			int begin = merger.getCurrentBegin();
 
-			if (lastEnd > 0 && begin > lastEnd) {
+			boolean moreThanOneCharacterDistance = begin - lastEnd > 2;
+			boolean previousCharacterIsCutAwayDelimiter = cutAwayFillCharacters == null || cutAwayFillCharacters.isEmpty() || (begin - lastEnd == 2 && cutAwayFillCharacters.contains(cas.getDocumentText().charAt(begin - 1)));
+			if (lastEnd > 0 && begin > lastEnd && (previousCharacterIsCutAwayDelimiter || moreThanOneCharacterDistance)) {
 				cutSum += lastEnd - lastBegin;
 				int condensedPosition = lastEnd - cutSum + 1;
 				condensedPos2SumCutMap.put(condensedPosition, cutSum);
 				originalPos2SumCutMap.put(lastEnd, cutSum);
 				lastBegin = begin;
-				sb.append(cas.getDocumentText().substring(lastEnd, begin));
+				sb.append(cas.getDocumentText(), lastEnd, begin);
 			} else if (lastEnd < 0) {
 				lastBegin = begin;
-				sb.append(cas.getDocumentText().substring(0, begin));
+				sb.append(cas.getDocumentText(), 0, begin);
 			}
 			lastEnd = end;
 		}
@@ -110,11 +135,11 @@ public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundExc
 			condensedPos2SumCutMap.put(condensedPosition, cutSum);
 			originalPos2SumCutMap.put(lastEnd, cutSum);
 		}
-		// If lastEnd is still -1 one, we just did not find any of the cut away annotations. Thus, we just copy the whole text.
+		// If lastEnd is still -1, we just did not find any of the cut away annotations. Thus, we just copy the whole text.
 		if (lastEnd == -1)
 		    lastEnd = 0;
 		if (lastEnd < cas.getDocumentText().length())
-			sb.append(cas.getDocumentText().substring(lastEnd, cas.getDocumentText().length()));
+			sb.append(cas.getDocumentText().substring(lastEnd));
 		condensedText = sb.toString();
 	}
 
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 12672e122..58fdcc137 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -1,12 +1,17 @@
 package de.julielab.jcore.utility;
 
 import de.julielab.jcore.types.InternalReference;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.junit.jupiter.api.Test;
 
+import java.io.FileInputStream;
+import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.HashSet;
+import java.util.Set;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
@@ -59,4 +64,55 @@ public void testReduce2() throws Exception {
 		assertEquals(28, condensedText.getCondensedOffsetForOriginalOffset(30));
 		assertEquals(29, condensedText.getCondensedOffsetForOriginalOffset(31));
 	}
+
+	@Test
+	public void testReduce3() throws Exception {
+		// Here we also add commas as cut away characters, offering the possibility to remove enumerations of
+		// references completely.
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("This sentence has multiple references.2,5,42 This is a second sentence.7,8");
+		InternalReference ref1 = new InternalReference(jcas, 38, 39);
+		ref1.addToIndexes();
+		InternalReference ref2 = new InternalReference(jcas, 40, 41);
+		ref2.addToIndexes();
+		InternalReference ref3 = new InternalReference(jcas, 42, 44);
+		ref3.addToIndexes();
+		InternalReference ref4 = new InternalReference(jcas, 71, 72);
+		ref4.addToIndexes();
+		InternalReference ref5 = new InternalReference(jcas, 73, 74);
+		ref5.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())), Set.of(','));
+		assertEquals("This sentence has multiple references. This is a second sentence.", condensedText.getCodensedText());
+	}
+
+	@Test
+	public void testErrorDoc() throws Exception{
+		// The XMI document uses here is from PMC and is an example of a source of error the previously occurred.
+		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+				"de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
+
+		XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "PMC5478802.xmi").toFile()), jCas.getCas());
+		JCoReCondensedDocumentText text = new JCoReCondensedDocumentText(jCas, Set.of(de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()));
+//		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table");
+		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Section");
+		JCoReAnnotationIndexMerger indexMerger = new JCoReAnnotationIndexMerger(sentenceBoundaryTypes, false,
+				null, jCas);
+
+		while (indexMerger.incrementAnnotation()) {
+			Annotation a = (Annotation) indexMerger.getAnnotation();
+			System.out.println(a.getCoveredText());
+			System.out.println("--");
+			int condensedBegin = text.getCondensedOffsetForOriginalOffset(a.getBegin());
+			int condensedEnd = text.getOriginalOffsetForCondensedOffset(a.getEnd());
+			if (condensedEnd > text.getCodensedText().length())
+				System.out.println();
+			System.out.println(text.getCodensedText().substring(condensedBegin, condensedEnd));
+			System.out.println(a.getBegin() + " - " + a.getEnd() + ", " + condensedBegin + " - " + condensedEnd);
+			System.out.println();
+		}
+	}
 }
diff --git a/jcore-utilities/src/test/resources/PMC5478802.xmi b/jcore-utilities/src/test/resources/PMC5478802.xmi
new file mode 100644
index 000000000..c4d8ca95a
--- /dev/null
+++ b/jcore-utilities/src/test/resources/PMC5478802.xmi
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore"
+         xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore"
+         xmlns:ext="http:///de/julielab/jcore/types/ext.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore"
+         xmi:version="2.0"><cas:NULL xmi:id="0"/><types:Table xmi:id="1" sofa="1145" begin="4073" end="4117" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0001" objectLabel="Table 1" objectCaption="662" objectTitle="711"/><types:Table xmi:id="946" sofa="1145" begin="6733" end="6772" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0002" objectLabel="Table 2" objectCaption="1286" objectTitle="412"/><types:Table xmi:id="1219" sofa="1145" begin="8066" end="8150" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0003" objectLabel="Table 3" objectCaption="567" objectTitle="77"/><types:Table xmi:id="301" sofa="1145" begin="8150" end="8236" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0004" objectLabel="Table 4" objectCaption="92" objectTitle="118"/><pubmed:InternalReference xmi:id="637" sofa="1145" begin="1584" end="1585" reftype="other" refid="osp499-bib-0001"/><pubmed:InternalReference xmi:id="238" sofa="1145" begin="1587" end="1588" reftype="other" refid="osp499-bib-0002"/><pubmed:InternalReference xmi:id="314" sofa="1145" begin="1590" end="1591" reftype="other" refid="osp499-bib-0003"/><pubmed:InternalReference xmi:id="853" sofa="1145" begin="1709" end="1710" reftype="other" refid="osp499-bib-0004"/><pubmed:InternalReference xmi:id="1330" sofa="1145" begin="1807" end="1808" reftype="other" refid="osp499-bib-0005"/><pubmed:InternalReference xmi:id="1136" sofa="1145" begin="1810" end="1811" reftype="other" refid="osp499-bib-0006"/><pubmed:InternalReference xmi:id="256" sofa="1145" begin="1967" end="1968" reftype="other" refid="osp499-bib-0007"/><pubmed:InternalReference xmi:id="1152" sofa="1145" begin="2165" end="2166" reftype="other" refid="osp499-bib-0008"/><pubmed:InternalReference xmi:id="101" sofa="1145" begin="2168" end="2169" reftype="other" refid="osp499-bib-0009"/><pubmed:InternalReference xmi:id="499" sofa="1145" begin="2171" end="2173" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1303" sofa="1145" begin="2235" end="2237" reftype="other" refid="osp499-bib-0011"/><pubmed:InternalReference xmi:id="1312" sofa="1145" begin="2414" end="2416" reftype="other" refid="osp499-bib-0012"/><pubmed:InternalReference xmi:id="844" sofa="1145" begin="2690" end="2692" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1054" sofa="1145" begin="2820" end="2822" reftype="other" refid="osp499-bib-0013"/><pubmed:InternalReference xmi:id="39" sofa="1145" begin="2912" end="2914" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="332" sofa="1145" begin="2916" end="2918" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="1388" sofa="1145" begin="2920" end="2922" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="671" sofa="1145" begin="2924" end="2926" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="1404" sofa="1145" begin="3010" end="3012" reftype="other" refid="osp499-bib-0018"/><pubmed:InternalReference xmi:id="379" sofa="1145" begin="3925" end="3926" reftype="other" refid="osp499-tbl-0001"/><pubmed:InternalReference xmi:id="247" sofa="1145" begin="5439" end="5440" reftype="figure" refid="osp499-fig-0001"/><pubmed:InternalReference xmi:id="937" sofa="1145" begin="6617" end="6618" reftype="other" refid="osp499-tbl-0002"/><pubmed:InternalReference xmi:id="1072" sofa="1145" begin="6730" end="6731" reftype="other" refid="osp499-tbl-0002"/><pubmed:InternalReference xmi:id="918" sofa="1145" begin="7713" end="7714" reftype="other" refid="osp499-tbl-0003"/><pubmed:InternalReference xmi:id="323" sofa="1145" begin="7895" end="7896" reftype="other" refid="osp499-tbl-0004"/><pubmed:InternalReference xmi:id="463" sofa="1145" begin="8799" end="8800" reftype="other" refid="osp499-bib-0005"/><pubmed:InternalReference xmi:id="1232" sofa="1145" begin="8802" end="8803" reftype="other" refid="osp499-bib-0006"/><pubmed:InternalReference xmi:id="1379" sofa="1145" begin="9044" end="9046" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="30" sofa="1145" begin="9048" end="9050" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="1355" sofa="1145" begin="9052" end="9054" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="14" sofa="1145" begin="9056" end="9058" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="274" sofa="1145" begin="9060" end="9062" reftype="other" refid="osp499-bib-0018"/><pubmed:InternalReference xmi:id="283" sofa="1145" begin="9064" end="9066" reftype="other" refid="osp499-bib-0019"/><pubmed:InternalReference xmi:id="795" sofa="1145" begin="9068" end="9070" reftype="other" refid="osp499-bib-0020"/><pubmed:InternalReference xmi:id="420" sofa="1145" begin="9211" end="9213" reftype="other" refid="osp499-bib-0021"/><pubmed:InternalReference xmi:id="1063" sofa="1145" begin="9215" end="9217" reftype="other" refid="osp499-bib-0022"/><pubmed:InternalReference xmi:id="1321" sofa="1145" begin="9337" end="9339" reftype="other" refid="osp499-bib-0023"/><pubmed:InternalReference xmi:id="292" sofa="1145" begin="9341" end="9343" reftype="other" refid="osp499-bib-0024"/><pubmed:InternalReference xmi:id="1011" sofa="1145" begin="9613" end="9615" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="156" sofa="1145" begin="9617" end="9619" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="588" sofa="1145" begin="9621" end="9623" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="702" sofa="1145" begin="9625" end="9627" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="1339" sofa="1145" begin="9646" end="9648" reftype="other" refid="osp499-bib-0025"/><pubmed:InternalReference xmi:id="748" sofa="1145" begin="9744" end="9746" reftype="other" refid="osp499-bib-0026"/><pubmed:InternalReference xmi:id="1270" sofa="1145" begin="9865" end="9867" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="835" sofa="1145" begin="9869" end="9871" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="388" sofa="1145" begin="10240" end="10242" reftype="other" refid="osp499-bib-0027"/><pubmed:InternalReference xmi:id="540" sofa="1145" begin="10619" end="10621" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1161" sofa="1145" begin="10623" end="10625" reftype="other" refid="osp499-bib-0028"/><pubmed:InternalReference xmi:id="869" sofa="1145" begin="10627" end="10629" reftype="other" refid="osp499-bib-0029"/><pubmed:InternalReference xmi:id="653" sofa="1145" begin="11228" end="11230" reftype="other" refid="osp499-bib-0030"/><pubmed:InternalReference xmi:id="265" sofa="1145" begin="11722" end="11724" reftype="other" refid="osp499-bib-0031"/><pubmed:InternalReference xmi:id="549" sofa="1145" begin="11726" end="11728" reftype="other" refid="osp499-bib-0032"/><pubmed:InternalReference xmi:id="611" sofa="1145" begin="11888" end="11890" reftype="other" refid="osp499-bib-0033"/><types:Paragraph xmi:id="1397" sofa="1145" begin="111" end="242" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="442" sofa="1145" begin="264" end="751" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="813" sofa="1145" begin="760" end="1247" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="354" sofa="1145" begin="1260" end="1347" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="472" sofa="1145" begin="1361" end="1969" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="885" sofa="1145" begin="1970" end="2417" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1279" sofa="1145" begin="2418" end="3238" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="397" sofa="1145" begin="3239" end="3509" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="604" sofa="1145" begin="3539" end="4072" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="149" sofa="1145" begin="4073" end="4108" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="449" sofa="1145" begin="4117" end="4470" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="133" sofa="1145" begin="4471" end="4698" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="85" sofa="1145" begin="4699" end="5506" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="820" sofa="1145" begin="5507" end="5646" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="165" sofa="1145" begin="5656" end="5736" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="959" sofa="1145" begin="5758" end="6515" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1254" sofa="1145" begin="6524" end="6732" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="597" sofa="1145" begin="6733" end="6763" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1372" sofa="1145" begin="6772" end="7429" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="172" sofa="1145" begin="7430" end="8065" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1129" sofa="1145" begin="8066" end="8141" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="23" sofa="1145" begin="8150" end="8227" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="126" sofa="1145" begin="8247" end="9344" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="58" sofa="1145" begin="9345" end="10602" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="492" sofa="1145" begin="10603" end="11348" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="878" sofa="1145" begin="11349" end="12295" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="757" sofa="1145" begin="12307" end="12727" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="862" sofa="1145" begin="12759" end="12793" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="646" sofa="1145" begin="12802" end="12807" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1348" sofa="1145" begin="12819" end="13111" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="966" sofa="1145" begin="13112" end="13162" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="456" sofa="1145" begin="13184" end="13495" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:AbstractSectionHeading xmi:id="927" sofa="1145" begin="101" end="110" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="48" sofa="1145" begin="243" end="263" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="764" sofa="1145" begin="752" end="759" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="197" sofa="1145" begin="1248" end="1259" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AuthorInfo xmi:id="65" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="F." affiliation="osp499-aff-0001" lastName="Tatsugami"/><types:AuthorInfo xmi:id="226" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="K." affiliation="osp499-aff-0001" lastName="Awai"/><types:AuthorInfo xmi:id="576" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="H." affiliation="osp499-aff-0001" lastName="Sakane"/><types:AuthorInfo xmi:id="690" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="M." affiliation="osp499-aff-0001" lastName="Iida"/><types:AuthorInfo xmi:id="724" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" contact="kaichi@hiroshima-u.ac.jp" lastName="Kaichi"/><types:AuthorInfo xmi:id="736" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" lastName="Honda"/><types:AuthorInfo xmi:id="774" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="H." affiliation="osp499-aff-0001" lastName="Higashibori"/><types:AuthorInfo xmi:id="1029" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" lastName="Baba"/><types:Title xmi:id="620" sofa="1145" begin="0" end="92" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="document"/><types:Title xmi:id="110" sofa="1145" begin="93" end="100" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="other"/><types:Title xmi:id="711" sofa="1145" begin="4109" end="4116" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="827" sofa="1145" begin="5647" end="5655" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="figure"/><types:Title xmi:id="412" sofa="1145" begin="6764" end="6771" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="77" sofa="1145" begin="8142" end="8149" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="118" sofa="1145" begin="8228" end="8235" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Caption xmi:id="662" sofa="1145" begin="4073" end="4109" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="786" sofa="1145" begin="5507" end="5647" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="1286" sofa="1145" begin="6733" end="6764" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="567" sofa="1145" begin="8066" end="8142" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="92" sofa="1145" begin="8150" end="8228" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Keyword xmi:id="140" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="mortality"/><types:Keyword xmi:id="804" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="abdominal obesity"/><types:Keyword xmi:id="1081" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="computed tomography"/><types:SectionTitle xmi:id="628" sofa="1145" begin="1348" end="1360" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1261" sofa="1145" begin="3510" end="3517" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="179" sofa="1145" begin="3518" end="3538" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1413" sofa="1145" begin="5737" end="5757" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1020" sofa="1145" begin="6516" end="6523" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="217" sofa="1145" begin="8236" end="8246" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="361" sofa="1145" begin="12296" end="12306" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="188" sofa="1145" begin="12728" end="12758" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="558" sofa="1145" begin="12794" end="12801" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="523" sofa="1145" begin="12808" end="12818" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="370" sofa="1145" begin="13163" end="13183" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:Date xmi:id="207" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" day="20" month="1" year="2017"/><types:Section xmi:id="1103" sofa="1145" begin="1348" end="3510" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="628" sectionId="osp499-sec-0005" depth="0"/><types:Section xmi:id="1116" sofa="1145" begin="3510" end="6516" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1261" sectionId="osp499-sec-0006" depth="0"/><types:Section xmi:id="429" sofa="1145" begin="3518" end="5737" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="179" sectionId="osp499-sec-0007" depth="1"/><types:Section xmi:id="341" sofa="1145" begin="5737" end="6516" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1413" sectionId="osp499-sec-0008" depth="1"/><types:Section xmi:id="1241" sofa="1145" begin="6516" end="8236" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1020" sectionId="osp499-sec-0009" depth="0"/><types:Section xmi:id="479" sofa="1145" begin="8236" end="12296" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="217" sectionId="osp499-sec-0010" depth="0"/><types:Section xmi:id="1170" sofa="1145" begin="12296" end="12728" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="361" sectionId="osp499-sec-0011" depth="0"/><types:Section xmi:id="1041" sofa="1145" begin="12728" end="12794" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="188" sectionId="osp499-sec-0012" depth="0"/><types:Section xmi:id="998" sofa="1145" begin="12794" end="12808" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="558" sectionId="osp499-sec-0013" depth="0"/><types:Section xmi:id="892" sofa="1145" begin="12808" end="13163" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="523" sectionId="osp499-sec-0014" depth="0"/><types:Section xmi:id="1090" sofa="1145" begin="13163" end="13496" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="370" sectionId="osp499-sec-0015" depth="0"/><pubmed:OtherID xmi:id="404" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" id="28702215" source="PubMed"/><types:Figure xmi:id="508" sofa="1145" begin="5507" end="5656" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-fig-0001" objectLabel="Figure 1" objectCaption="786" objectTitle="827"/><types:AbstractSection xmi:id="990" sofa="1145" begin="101" end="243" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="927"/><types:AbstractSection xmi:id="1364" sofa="1145" begin="243" end="752" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="48"/><types:AbstractSection xmi:id="1295" sofa="1145" begin="752" end="1248" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="764"/><types:AbstractSection xmi:id="532" sofa="1145" begin="1248" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="197"/><pubmed:ManualDescriptor xmi:id="905" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" keywordList="719"/><types:Journal xmi:id="973" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" pubDate="207" volume="3" title="Obesity Science &amp; Practice" issue="2" pages="219--223"/><types:AbstractText xmi:id="1183" sofa="1145" begin="93" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" structuredAbstractParts="990 1364 1295 532"/><pubmed:Header xmi:id="1197" sofa="1145" begin="0" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" source="PubMed Central" docId="PMC5478802" copyright="© 2017 The Authors. Obesity Science &amp; Practice published by John Wiley &amp; Sons Ltd, World Obesity and The Obesity Society." truncated="false" authors="680" pubTypeList="973" doi="10.1002/osp4.99" otherIDs="520"/><tcas:DocumentAnnotation xmi:id="1422" sofa="1145" begin="0" end="13496" language="x-unspecified"/><ext:DBProcessingMetaData xmi:id="1427" sofa="1145" begin="0" end="0" subsetTable="jsbd.errordoc" doNotMarkAsProcessed="false"><primaryKey>PMC5478802</primaryKey></ext:DBProcessingMetaData><cas:Sofa xmi:id="1145" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Relationship between sudden natural death and abdominal fat evaluated on postmortem CT scans&#10;Summary&#10;Objective&#10;This study examined the association between sudden natural death and abdominal fat using postmortem computed tomography (CT) scans.&#10;Subjects and methods&#10;Postmortem CT images at the umbilical level of 241 subjects were used to measure abdominal areas of subcutaneous‐ and visceral fat, the rate of visceral fat and the waist circumference. Of the study subjects, 174 died of sudden natural death (130 men and 44 women), and 67 died of different causes (46 men and 21 women). All were between 40 and 75 years of age. Logistic regression analysis was performed to identify independent abdominal parameters associated with sudden natural death.&#10;Results&#10;By univariate analysis, the areas of subcutaneous and visceral fat were significantly larger in sudden natural death than who died of different causes (subcutaneous fat, odds ratio [OR] = 1.004, 95% confidence interval [CI] = 1.000–1.007, p = 0.03; visceral fat, OR = 1.008, 95% CI = 1.003–1.013, p &lt; 0.01). Multivariate analysis showed that the area of visceral fat was an independent factor associated with the risk of sudden natural death (OR = 1.008, 95% CI = 1.002–1.015, p = 0.02).&#10;Conclusions&#10;Postmortem CT revealed that sudden natural death was related to abdominal fat deposits.&#10;Introduction&#10;Sudden natural death is defined as death not attributable to a traumatic event or suicide within 24 hours of symptom onset in an apparently healthy individual or in a patient whose disease stage was not predictive of death 1, 2, 3. The frequency of sudden unexpected death due to cardiac or unidentifiable causes in employment age is 11 in 100,000 4. In order, most sudden natural deaths are due to cardiovascular followed by infectious diseases 5, 6. In individuals with acute chest pain, the cause of sudden natural death was pulmonary embolism, aortic dissection and obstructive coronary artery disease 7.&#10;Obesity is a risk factor for cardiovascular, metabolic, neoplastic and musculoskeletal disorders, and abdominal obesity is associated with increased morbidity independent of age, race and gender 8, 9, 10. Abdominal adiposity is a significant predictor of mortality 11 independent of the body mass index. The association between the waist circumference and metabolic risk may be partly explicable by a strong association with visceral adiposity 12.&#10;Computed tomography (CT) studies have shown that visceral fat was a significant predictor of mortality: in a model including three fat measures (subcutaneous, visceral and liver fat), age and length of follow‐up, only visceral fat was a significant predictor of mortality 10. In women, standard deviation increment in visceral fat was associated with a significantly increased all‐cause mortality risk 13. CT also revealed visceral adiposity was associated with incident cardiovascular disease 14, 15, 16, 17. In Japanese Americans, visceral fat was associated with obesity‐related mortality 18. Although the association between abdominal adiposity and morbidity is known, there are no CT studies that used direct measurements of abdominal fat to elucidate the association between abdominal fat and sudden natural death.&#10;In Japan, postmortem CT studies are performed to determine the cause of sudden death. Under the hypothesis that it is associated with abdominal fat, we investigated the relationship between sudden natural death and abdominal fat measured directly on postmortem CT scans.&#10;Methods&#10;Subjects and methods&#10;This retrospective study was approved by our institutional review board. We used postmortem CT studies acquired between February 2008 and March 2016 of 241 subjects who were between 40 and 75 years of age at the time of their sudden death. Of these, 174 died of sudden natural death (group 1; 130 men and 44 women), and 67 died of different causes (group 2; 46 men and 21 women) (Table 1). There was no significant difference in the age of the two groups (p = 0.09, two‐sample t‐test) nor in their gender (p = 0.34, chi‐square test).&#10;Clinical data on 241 study subjects&#10;Table 1&#10;The criteria for sudden natural death were death due to natural diseases and abrupt and unexpected death in individuals who appeared well. Individuals who died of non‐natural causes such as trauma and suicide were excluded. Contrast‐enhanced CT studies were performed in six group 1 subjects; two group 1 subjects and one group 2 subject were autopsied.&#10;We perused their medical records to review the demographic data of our study subjects for vascular risk factors (diabetes mellitus, hypertension, dyslipidemia and smoking) and the interval between the time of death and imaging.&#10;In individuals who died of sudden death, we routinely obtain whole‐body postmortem CT scans on a 16‐row CT scanner (Lightspeed16, GE Medical Systems, Milwaukee, WI, USA) or a 320‐row CT scanner (Aquilion ONE, Toshiba Corp. Medical Systems, Otawara, Japan). Fat analysis was performed on a workstation (AZE Virtual Place Raijin; AZE Ltd., Tokyo, Japan). The areas of subcutaneous and visceral fat and the waist circumference were measured on 5‐mm‐thick CT images at the umbilical level. Adipose tissue areas were calculated using an attenuation range of −150 to −10 Hounsfield units. The abdominal subcutaneous fat area was defined as the area of adipose tissue between the skin and the outermost aspect of the abdominal muscle wall (Figure 1). We also calculated the rate of visceral fat using the equation:&#10;Screen capture from the fat analysis tool. The blue and red areas show visceral and subcutaneous fat, respectively, at the umbilical level.&#10;Figure 1&#10;rate of visceral fat = area of visceral fat/area of subcutaneous + visceral fat.&#10;Statistical analysis&#10;We compared the interval between the time of death and imaging in groups 1 and 2 using the two‐sample t‐test. To compare abdominal parameters, we performed logistic regression analysis. Factors associated with sudden natural death (dependent variables) were identified by univariate analysis. Independent variables included the age, gender, interval between the time of death and imaging, area of subcutaneous fat, area of visceral fat, rate of visceral fat and waist circumference. All statistical tests were two‐sided; probability values of &lt;0.05 were considered statistically significant. Multivariate analysis adjusted for relevant factors was also performed using logistic regression analysis. All statistical analyses were with IBM SPSS Statistics 21.&#10;Results&#10;Postmortem CT scans revealed the cause of sudden natural death in 54 group 1 subjects (Table 2); in the other 120, it could not be ascertained. The causes of sudden death in group 2 are also shown in Table 2.&#10;Cause of death in 241 subjects&#10;Table 2&#10;There was no significant difference in the interval between the time of death and imaging between group 1 (median 70 min and range 10–720 min) and group 2 (median 70 min and range 20–660 min) (p = 0.55 and two‐sample t‐test). In group 1, the mean area of subcutaneous and visceral fat was 152.5 ± 96.4 cm2 and 139.3 ± 72.9 cm2, respectively; the mean rate of visceral fat was 0.50 ± 0.12, and the mean waist circumference was 94.7 ± 26.7 cm. In group 2, the mean area of subcutaneous and visceral fat was 125.4 ± 59.8 cm2 and 108.0 ± 47.3 cm2, respectively; the mean rate of visceral fat was 0.47 ± 0.11, and the mean waist circumference was 97.9 ± 28.0 cm.&#10;By univariate analysis, the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2 (subcutaneous fat, odds ratio [OR] = 1.004, 95% confidence interval [CI] = 1.000–1.007, p = 0.03; visceral fat, OR = 1.008, 95% CI = 1.003–1.013, p &lt; 0.01) (Table 3). For multivariate analysis, we selected the area of visceral fat as an independent factor associated with sudden natural death (OR = 1.008, 95% CI = 1.002–1.015, p = 0.02) (Table 4). The difference between the two groups in the rate of visceral fat and the waist circumference was not statistically significant (p = 0.13 and p = 0.42, respectively).&#10;Univariate analysis to explore factors associated with sudden natural death&#10;Table 3&#10;Multivariate analysis to explore factors associated with sudden natural death&#10;Table 4&#10;Discussion&#10;Our study showed that the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2 and that the rate of visceral fat and the waist circumference were not significantly different. Multivariate analysis revealed that the area of visceral fat was an independent factor associated with the risk of sudden natural death. These findings support the hypothesis that sudden natural death is associated with abdominal fat deposits. Most identified causes of sudden natural death (n = 54) were aortic diseases (n = 30). Others 5, 6 suggested that in individuals without a definite radiologically identified cause of sudden natural death it was due to coronary artery disease and numerous studies found an association between abdominal adiposity and cardiovascular disease 14, 15, 16, 17, 18, 19, 20. The cellular lipid content determines the size of adipocytes; large, mature adipoctyes were filled almost entirely by large lipid droplets 21, 22. The adipocyte volume determines cell functionality and the larger the adipocytes, the higher the cardiometabolic risk 23, 24.&#10;Our multivariate analysis showed that the area of visceral fat is an independent factor associated with the risk of sudden natural death. This is consistent with earlier findings that cardiovascular disease was asssociated with visceral – rather than subcutaneous fat 14, 15, 16, 17. Pickhardt et al. 25 documented that visceral fat in women was correlated with metabolic syndrome. While Fox et al. 26 showed that it was more strongly associated with risk factors for cardiovascular disease in women than in men; others 16, 17 claimed that visceral fat areas were significantly related to cardiovascular disease in both genders. Sex differences are recognized in the distribution of adipose tissue. In men, adipose tissue is primarily found in the central or abdominal region; this raises their risk for metabolic disorders; women, on the other hand, harbour more subcutaneous than visceral fat 27. These gender‐specific differences may explain the lack of significant differences in the rate of visceral fat between our two groups. Despite gender‐specific differences in the distribution of abdominal fat distribution, we think that visceral fat is associated with obesity‐related morbidity such as cardiovascular disease resulting in sudden natural death.&#10;Earlier studies 10, 28, 29 reported an association between the waist circumference and mortality. In our series, the waist circumference was not associated with sudden natural death. The waist circumference reflects abdominal fat deposits, and the observation that it was not associated with sudden natural death is inconsistent with our finding that the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2. We used postmortem CT scans in our analyses; these scans reflected postmortem‐resuscitation and post‐cardiopulmonary resuscitation changes including gastrointestinal distension 30 that increase the waist circumference. Consequently, our findings may not reflect the pre‐mortem waist circumference.&#10;Our study has some limitations. As only three subjects were autopsied, the true cause of death remains uncertain. We did not have full medical histories on all subjects, and the effect of premortem patient characteristics could not be considered in our analyses. For example, there is a strong association between depression and suicide, and between obesity and depression 31, 32; however, we did not study the history of depression in this investigation. Also, cardiovascular disease is the leading cause of death in postmenopausal women 33. Although the differences of subcutaneous and visceral fat between groups 1 and 2 were statistically significant and the area of visceral fat was an independent factor associated with sudden natural death, they were modest, and the clinical significance is questionable. Lastly, our study population was composed of a heterogeneous mixture of men and women because only 65 of the 241 subjects were women.&#10;Conclusion&#10;The areas of subcutaneous and visceral fat were signifiantly larger in individuals who died of sudden natural death than in those who died of other identified causes. Visceral fat was an independent factor associated with the risk of sudden natural death. This raises the possibility that abdominal fat deposits may be associated with sudden natural death due to obesity‐related morbidity such as cardiovascular disease.&#10;Conflict of interest statement&#10;No conflict of interest statement.&#10;Funding&#10;None.&#10;Disclosure&#10;Dr Awai reports grants from Research Grant, Toshiba Medical Systems; grants from Research Grant, Hitachi Medical Corporation; grants from Research Grant, Eizai Co.; grants from Research Grant, Bayer Seiyaku Co.; and grants from Research Grant, Daiichi Sankyo, Co., outside the submitted work.&#10;The other authors declare no conflict of interest.&#10;Author contributions&#10;YK conceptualized the study, performed statistical analyses and wrote the paper; HS, HH, YH, FT, YB and MI contributed to interpreting the CT images and critically reviewed the paper. KA contributed to interpreting the data and critically reviewed the paper. All authors approved the final version of the paper.&#10;"/><cas:FSArray xmi:id="719" elements="804 1081 140"/><cas:FSArray xmi:id="680" elements="724 576 774 736 65 1029 690 226"/><cas:FSArray xmi:id="520" elements="404"/><cas:View sofa="1145" members="1 946 1219 301 637 238 314 853 1330 1136 256 1152 101 499 1303 1312 844 1054 39 332 1388 671 1404 379 247 937 1072 918 323 463 1232 1379 30 1355 14 274 283 795 420 1063 1321 292 1011 156 588 702 1339 748 1270 835 388 540 1161 869 653 265 549 611 1397 442 813 354 472 885 1279 397 604 149 449 133 85 820 165 959 1254 597 1372 172 1129 23 126 58 492 878 757 862 646 1348 966 456 927 48 764 197 65 226 576 690 724 736 774 1029 620 110 711 827 412 77 118 662 786 1286 567 92 140 804 1081 628 1261 179 1413 1020 217 361 188 558 523 370 207 1103 1116 429 341 1241 479 1170 1041 998 892 1090 404 508 990 1364 1295 532 905 973 1183 1197 1422 1427"/></xmi:XMI>
\ No newline at end of file
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
index fbcb62164..68150ad75 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
@@ -19,6 +19,7 @@
 import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.IOException;
+import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.List;
 import java.util.Map;
@@ -33,7 +34,6 @@ public class XmiDBWriterTest {
     @Container
     public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
-    private static String xmlSubsetTable;
     private static DataBaseConnector dbc;
 
     @BeforeAll
@@ -41,8 +41,8 @@ public static void setup() throws SQLException, UIMAException, IOException, Conf
         dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.reserveConnection();
         costosysConfig = DBTestUtils.createTestCostosysConfig("medline_2017", 1, postgres);
-        xmlSubsetTable = DBTestUtils.setupDatabase(dbc, "src/test/resources/pubmedsample18n0001.xml.gz", "medline_2017", 177, postgres);
         dbc.releaseConnections();
+        DBTestUtils.createAndSetHiddenConfig("src/test/resources/hiddenConfig.txt", postgres);
     }
 
     @AfterAll
@@ -185,4 +185,40 @@ public void testXmiDBWriterSplitAnnotationsDefaultAnnotationSchemas() throws Exc
             assertThat(columnNames).contains(tokenColumn, sentenceColumn);
         }
     }
+
+    @Test
+    public void testXmiSubtypeStorage() throws Exception {
+
+        AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.xmi.desc.jcore-xmi-db-writer",
+                XMIDBWriter.PARAM_ANNOS_TO_STORE, new String[]{Token.class.getCanonicalName(), Sentence.class.getCanonicalName()},
+                XMIDBWriter.PARAM_COSTOSYS_CONFIG, costosysConfig,
+                XMIDBWriter.PARAM_STORE_ALL, false,
+                XMIDBWriter.PARAM_STORE_BASE_DOCUMENT, true,
+                XMIDBWriter.PARAM_TABLE_DOCUMENT, "_data.documents3",
+                XMIDBWriter.PARAM_DO_GZIP, false,
+                XMIDBWriter.PARAM_STORE_RECURSIVELY, true,
+                XMIDBWriter.PARAM_UPDATE_MODE, true,
+                XMIDBWriter.PARAM_BASE_DOCUMENT_ANNOTATION_TYPES, new String[]{InternalReference.class.getCanonicalName()}
+        );
+        JCas jCas = getJCasWithRequiredTypes();
+        final Header header = new Header(jCas);
+        header.setDocId("789");
+        header.addToIndexes();
+        jCas.setDocumentText("This is a sentence.1,2");
+        new de.julielab.jcore.types.pubmed.InternalReference(jCas, 19, 20).addToIndexes();
+        new de.julielab.jcore.types.pubmed.InternalReference(jCas, 21, 22).addToIndexes();
+        assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+        jCas.reset();
+        xmiWriter.collectionProcessComplete();
+
+        dbc = DBTestUtils.getDataBaseConnector(postgres);
+        try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+            assertThat(dbc.tableExists("_data.documents3")).isTrue();
+            ResultSet rs = ignored.createStatement().executeQuery("SELECT " + XmiSplitConstants.BASE_DOC_COLUMN + " FROM " + "_data.documents3");
+            assertThat(rs.next()).isTrue();
+            String documentString = rs.getString(1);
+            System.out.println(documentString);
+
+        }
+    }
 }

From 7557e934527b16c0704d3551a4cd6303933e5aa7 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 1 Jul 2021 14:49:07 +0200
Subject: [PATCH 077/269] JSBD: Fixed a bug where the document length offset
 was not condensation-adjusted. Fixed #121

Also adding a comma to the cut away characters.
---
 .../jcore/ae/jsbd/main/SentenceAnnotator.java |  11 +-
 .../ae/jsbd/main/SentenceAnnotatorTest.java   | 497 +++++++++---------
 .../test/resources/errordocs/PMC5478802.xmi   |   5 +
 .../JCoReCondensedDocumentTextTest.java       |  18 +-
 4 files changed, 264 insertions(+), 267 deletions(-)
 create mode 100644 jcore-jsbd-ae/src/test/resources/errordocs/PMC5478802.xmi

diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
index c91869654..fe5cbd833 100644
--- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
+++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
@@ -155,7 +155,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             JCoReCondensedDocumentText documentText;
             try {
                 // If there are no cut-away types, the document text will remain unchanged.
-                documentText = new JCoReCondensedDocumentText(aJCas, cutAwayTypes);
+                documentText = new JCoReCondensedDocumentText(aJCas, cutAwayTypes, Set.of(','));
             } catch (ClassNotFoundException e1) {
                 LOGGER.error("Could not create the text without annotations to be cut away in document {}", JCoReTools.getDocId(aJCas), e1);
                 throw new AnalysisEngineProcessException(e1);
@@ -175,22 +175,15 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
                     // cross any delimiter annotation border
                     List<Integer> borders = new ArrayList<>();
                     borders.add(0);
-                    borders.add(aJCas.getDocumentText().length());
+                    borders.add(documentText.getCondensedOffsetForOriginalOffset(aJCas.getDocumentText().length()));
                     while (indexMerger.incrementAnnotation()) {
                         Annotation a = (Annotation) indexMerger.getAnnotation();
-                        System.out.println(a.getCoveredText());
-                        System.out.println("--");
-                        System.out.println(documentText.getCodensedText().substring(documentText.getCondensedOffsetForOriginalOffset(a.getBegin()), documentText.getOriginalOffsetForCondensedOffset(a.getEnd())));
-                        System.out.println(a.getBegin() + " - " + a.getEnd() + ", " + documentText.getCondensedOffsetForOriginalOffset(a.getBegin()) + " - " + documentText.getOriginalOffsetForCondensedOffset(a.getEnd()));
-                        System.out.println();
                         // Here we convert the original offsets to the condensed offsets. If there are
                         // no cut-away types, the offsets will just remain unchanged. Otherwise we now
                         // have the borders of the condensed text passages associated with the sentence
                         // delimiter annotation.
                         borders.add(documentText.getCondensedOffsetForOriginalOffset(a.getBegin()));
-                        assert borders.get(borders.size() - 1) < documentText.getCodensedText().length();
                         borders.add(documentText.getCondensedOffsetForOriginalOffset(a.getEnd()));
-                        assert borders.get(borders.size() - 1) < documentText.getCodensedText().length() : "Original offset "+a.getEnd()+" is mapped to condensed offset " + documentText.getCondensedOffsetForOriginalOffset(a.getEnd());
                     }
                     borders.sort(null);
 
diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
index 1455b9339..22edbe983 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
@@ -1,17 +1,17 @@
-/** 
+/**
  * SentenceAnnotatorTest.java
- * 
+ * <p>
  * Copyright (c) 2015, JULIE Lab.
- * All rights reserved. This program and the accompanying materials 
+ * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the BSD-2-Clause License
- *
+ * <p>
  * Author: tomanek
- * 
+ * <p>
  * Current version: 2.2
  * Since version:   1.0
- *
- * Creation date: Nov 29, 2006 
- * 
+ * <p>
+ * Creation date: Nov 29, 2006
+ * <p>
  * This is a JUnit test for the SentenceAnnotator.
  **/
 
@@ -47,271 +47,268 @@
 import java.util.stream.Collectors;
 
 import static org.assertj.core.api.Assertions.assertThat;
+import static org.assertj.core.api.Assertions.assertThatCode;
 import static org.junit.jupiter.api.Assertions.*;
+
 public class SentenceAnnotatorTest {
 
-	/**
-	 * Logger for this class
-	 */
-	private static final Logger LOGGER = LoggerFactory.getLogger(SentenceAnnotatorTest.class);
-
-	private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
-
-	// uncomment to test with/without scope
-	// private static final String DESCRIPTOR =
-	// "src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml";
-	private static final String DESCRIPTOR = "src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml";
-
-	// last sentence has no EOS symbol to test that also this is handled
-	// correctly
-	private static final String[] TEST_TEXT = { "First sentence. Second \t sentence! \n    Last sentence?",
-			"Hallo, jemand da? Nein, niemand.", "A test. It can't be just one sentence. Testing the test.", "" };
-
-	private static final String[] TEST_TEXT_OFFSETS = { "0-15;16-34;40-54", "0-17;18-32", "0-7;8-38;39-56", "" };
-
-	private static final int[] endOffsets = { 54, 32, 27, 0 };
-
-	/**
-	 * Use the model in resources, split the text in TEST_TEXT and compare the
-	 * split result against TEST_TEXT_OFFSETS
-	 */
-	@Test
-	public void testProcess() {
-
-		boolean annotationsOK = true;
-
-		XMLInputSource sentenceXML = null;
-		ResourceSpecifier sentenceSpec = null;
-		AnalysisEngine sentenceAnnotator = null;
-
-		try {
-			sentenceXML = new XMLInputSource(DESCRIPTOR);
-			sentenceSpec = UIMAFramework.getXMLParser().parseResourceSpecifier(sentenceXML);
-			sentenceAnnotator = UIMAFramework.produceAnalysisEngine(sentenceSpec);
-		} catch (Exception e) {
-			LOGGER.error("testProcess()", e);
-		}
-
-		for (int i = 0; i < TEST_TEXT.length; i++) {
-
-			JCas jcas = null;
-			try {
-				jcas = sentenceAnnotator.newJCas();
-			} catch (ResourceInitializationException e) {
-				LOGGER.error("testProcess()", e);
-			}
-
-			if (LOGGER.isDebugEnabled()) {
-				LOGGER.debug("testProcess() - testing text: " + TEST_TEXT[i]);
-			}
-			jcas.setDocumentText(TEST_TEXT[i]);
-
-			// make one test scope ranging over complete document text
-			// annotations for the processing scope
-			TestScope scope1 = new TestScope(jcas, 0, endOffsets[i]);
-			scope1.addToIndexes();
-			// TestScope scope2 = new TestScope(jcas,37,54);
-
-			
-			try {
-				sentenceAnnotator.process(jcas, null);
-			} catch (Exception e) {
-				LOGGER.error("testProcess()", e);
-			}
-
-			// get the offsets of the sentences
-			JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-			Iterator sentIter = indexes.getAnnotationIndex(Sentence.type).iterator();
-
-			String predictedOffsets = getPredictedOffsets(i, sentIter);
-			
-			// compare offsets
-			if (!predictedOffsets.equals(TEST_TEXT_OFFSETS[i])) {
-				annotationsOK = false;
-				continue;
-			}
-		}
-		assertTrue(annotationsOK);
-	}
-
-
-	private String getPredictedOffsets(int i, Iterator sentIter) {
-		String predictedOffsets = "";
-		while (sentIter.hasNext()) {
-			Sentence s = (Sentence) sentIter.next();
-			LOGGER.debug("sentence: " + s.getCoveredText() + ": " + s.getBegin() + " - " + s.getEnd());
-			predictedOffsets += (predictedOffsets.length() > 0) ? ";" : "";
-			predictedOffsets += s.getBegin() + "-" + s.getEnd();
-		}
-
-		if (LOGGER.isDebugEnabled()) {
-			LOGGER.debug("testProcess() - predicted: " + predictedOffsets);
-		}
-		if (LOGGER.isDebugEnabled()) {
-			LOGGER.debug("testProcess() - wanted: " + TEST_TEXT_OFFSETS[i]);
-		}
-		return predictedOffsets;
-	}
-
-	@Test
-	public void testUimaFitIntegration() throws UIMAException, IOException {
-		AnalysisEngine sentenceAE = AnalysisEngineFactory.createEngine(SentenceAnnotator.class,
-				SentenceAnnotator.PARAM_MODEL_FILE, "de/julielab/jcore/ae/jsbd/model/test-model.gz",
-				SentenceAnnotator.PARAM_POSTPROCESSING, "biomed");
-		JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
-		String abstractText = FileUtils.readFileToString(new File("src/test/resources/test-abstract.txt"), "UTF-8");
-		cas.setDocumentText(abstractText);
-		sentenceAE.process(cas);
-		Collection<Sentence> sentences = JCasUtil.select(cas, Sentence.class);
-		for (Sentence sentence : sentences) {
-			System.out.println(sentence.getCoveredText());
-		}
-		assertEquals(14, sentences.size());
-	}
-
-	@Test
-	public void testModelClassPathResource() throws Exception {
-		AnalysisEngine sentenceAE = AnalysisEngineFactory.createEngine(SentenceAnnotator.class,
-				SentenceAnnotator.PARAM_MODEL_FILE, "de/julielab/jcore/ae/jsbd/model/test-model.gz",
-				SentenceAnnotator.PARAM_POSTPROCESSING, "biomed");
-		JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
-		String abstractText = FileUtils.readFileToString(new File("src/test/resources/test-abstract.txt"), "UTF-8");
-		cas.setDocumentText(abstractText);
-		sentenceAE.process(cas);
-		Collection<Sentence> sentences = JCasUtil.select(cas, Sentence.class);
-		System.out.println(sentences.size());
-		for (Sentence sentence : sentences) {
-			System.out.println(sentence.getCoveredText());
-		}
-		assertEquals(14, sentences.size());
-	}
-
-	@Test
-	public void testSentenceDelimiterTypes() throws Exception {
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-				"de.julielab.jcore.types.jcore-document-structure-types");
-		
-		jCas.setDocumentText("Introduction " + "We here show good results. This is a figure caption "
-				+ "And this is a paragraph without a fullstop for some reason " + "Conclusion "
-				+ "We are the greatest.");
-		Title t1 = new Title(jCas, 0, 12);
-		Caption c = new Caption(jCas, 40, 64);
-		Paragraph p = new Paragraph(jCas, 65, 123);
-		Title t2 = new Title(jCas, 124, 134);
-		t1.addToIndexes();
-		c.addToIndexes();
-		p.addToIndexes();
-		t2.addToIndexes();
-		assertEquals("Introduction", t1.getCoveredText());
-		assertEquals("This is a figure caption", c.getCoveredText());
-		assertEquals("And this is a paragraph without a fullstop for some reason", p.getCoveredText());
-		assertEquals("Conclusion", t2.getCoveredText());
-
-		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-				"de/julielab/jcore/ae/jsbd/model/test-model.gz", SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES,
-				new LinkedHashSet<Object>(
-						Arrays.asList(Title.class.getName(), Caption.class.getName(), Paragraph.class.getName())));
-		
-		jsbd.process(jCas.getCas());
-		
-		Set<Range<Integer>> expectedSpans = new HashSet<>();
-		expectedSpans.add(Range.between(0,  12));
-		expectedSpans.add(Range.between(13, 39));
-		expectedSpans.add(Range.between(40, 64));
-		expectedSpans.add(Range.between(65, 123));
-		expectedSpans.add(Range.between(124, 134));
-		expectedSpans.add(Range.between(135, 155));
-		
-		FSIterator<Annotation> it = jCas.getAnnotationIndex(Sentence.type).iterator();
-		assertTrue(it.hasNext());
-		while (it.hasNext()) {
-			Annotation sentence = it.next();
-			Range<Integer> sentenceRange = Range.between(sentence.getBegin(), sentence.getEnd());
-			assertTrue(expectedSpans.remove(sentenceRange), "Range " + sentenceRange + " was not expected");
-		}
-		assertTrue(expectedSpans.isEmpty());
-	}
-
-	@Test
-	public void testSentenceWhitespaces() throws Exception {
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-				"de.julielab.jcore.types.jcore-document-structure-types");
-
-		// This text is taken from pmid 23092121
-		jCas.setDocumentText("  : We present a theoretical study of the electronic subband structure and collective electronic excitation associated with plasmon and surface plasmon modes in metal-based hollow nanosphere. The dependence of the electronic subband energy on the sample parameters of the hollow nanosphere is examined.");
-
-		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-				"de/julielab/jcore/ae/jsbd/model/test-model.gz");
-
-		jsbd.process(jCas.getCas());
+    /**
+     * Logger for this class
+     */
+    private static final Logger LOGGER = LoggerFactory.getLogger(SentenceAnnotatorTest.class);
+
+    private static final String LOGGER_PROPERTIES = "src/test/java/log4j.properties";
+
+    // uncomment to test with/without scope
+    // private static final String DESCRIPTOR =
+    // "src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml";
+    private static final String DESCRIPTOR = "src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml";
+
+    // last sentence has no EOS symbol to test that also this is handled
+    // correctly
+    private static final String[] TEST_TEXT = {"First sentence. Second \t sentence! \n    Last sentence?",
+            "Hallo, jemand da? Nein, niemand.", "A test. It can't be just one sentence. Testing the test.", ""};
+
+    private static final String[] TEST_TEXT_OFFSETS = {"0-15;16-34;40-54", "0-17;18-32", "0-7;8-38;39-56", ""};
+
+    private static final int[] endOffsets = {54, 32, 27, 0};
+
+    /**
+     * Use the model in resources, split the text in TEST_TEXT and compare the
+     * split result against TEST_TEXT_OFFSETS
+     */
+    @Test
+    public void testProcess() {
+
+        boolean annotationsOK = true;
+
+        XMLInputSource sentenceXML = null;
+        ResourceSpecifier sentenceSpec = null;
+        AnalysisEngine sentenceAnnotator = null;
+
+        try {
+            sentenceXML = new XMLInputSource(DESCRIPTOR);
+            sentenceSpec = UIMAFramework.getXMLParser().parseResourceSpecifier(sentenceXML);
+            sentenceAnnotator = UIMAFramework.produceAnalysisEngine(sentenceSpec);
+        } catch (Exception e) {
+            LOGGER.error("testProcess()", e);
+        }
+
+        for (int i = 0; i < TEST_TEXT.length; i++) {
+
+            JCas jcas = null;
+            try {
+                jcas = sentenceAnnotator.newJCas();
+            } catch (ResourceInitializationException e) {
+                LOGGER.error("testProcess()", e);
+            }
+
+            if (LOGGER.isDebugEnabled()) {
+                LOGGER.debug("testProcess() - testing text: " + TEST_TEXT[i]);
+            }
+            jcas.setDocumentText(TEST_TEXT[i]);
+
+            // make one test scope ranging over complete document text
+            // annotations for the processing scope
+            TestScope scope1 = new TestScope(jcas, 0, endOffsets[i]);
+            scope1.addToIndexes();
+            // TestScope scope2 = new TestScope(jcas,37,54);
+
+
+            try {
+                sentenceAnnotator.process(jcas, null);
+            } catch (Exception e) {
+                LOGGER.error("testProcess()", e);
+            }
+
+            // get the offsets of the sentences
+            JFSIndexRepository indexes = jcas.getJFSIndexRepository();
+            Iterator sentIter = indexes.getAnnotationIndex(Sentence.type).iterator();
+
+            String predictedOffsets = getPredictedOffsets(i, sentIter);
+
+            // compare offsets
+            if (!predictedOffsets.equals(TEST_TEXT_OFFSETS[i])) {
+                annotationsOK = false;
+                continue;
+            }
+        }
+        assertTrue(annotationsOK);
+    }
 
 
-        Sentence sentence = JCasUtil.select(jCas, Sentence.class).iterator().next();
-        assertFalse(sentence.getCoveredText().startsWith(" "));
+    private String getPredictedOffsets(int i, Iterator sentIter) {
+        String predictedOffsets = "";
+        while (sentIter.hasNext()) {
+            Sentence s = (Sentence) sentIter.next();
+            LOGGER.debug("sentence: " + s.getCoveredText() + ": " + s.getBegin() + " - " + s.getEnd());
+            predictedOffsets += (predictedOffsets.length() > 0) ? ";" : "";
+            predictedOffsets += s.getBegin() + "-" + s.getEnd();
+        }
+
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("testProcess() - predicted: " + predictedOffsets);
+        }
+        if (LOGGER.isDebugEnabled()) {
+            LOGGER.debug("testProcess() - wanted: " + TEST_TEXT_OFFSETS[i]);
+        }
+        return predictedOffsets;
     }
 
-	@Test
-	public void testTrailingNewline() throws Exception {
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-				"de.julielab.jcore.types.jcore-document-structure-types");
+    @Test
+    public void testUimaFitIntegration() throws UIMAException, IOException {
+        AnalysisEngine sentenceAE = AnalysisEngineFactory.createEngine(SentenceAnnotator.class,
+                SentenceAnnotator.PARAM_MODEL_FILE, "de/julielab/jcore/ae/jsbd/model/test-model.gz",
+                SentenceAnnotator.PARAM_POSTPROCESSING, "biomed");
+        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
+        String abstractText = FileUtils.readFileToString(new File("src/test/resources/test-abstract.txt"), "UTF-8");
+        cas.setDocumentText(abstractText);
+        sentenceAE.process(cas);
+        Collection<Sentence> sentences = JCasUtil.select(cas, Sentence.class);
+        for (Sentence sentence : sentences) {
+            System.out.println(sentence.getCoveredText());
+        }
+        assertEquals(14, sentences.size());
+    }
 
-		// This text is taken from PMC3408706. Note the "paragraph separator" at the end
-		jCas.setDocumentText("In1 the next step, we plan to use higher level QM/MM methods to calculate the energy barrier of the reaction catalyzed by endonuclease APE1, in compliance with the mechanism proposed, and to screen for effective inhibitors with the use of the constructed mechanistic full-atomic model of the enzyme.    \u2029");
-        new InternalReference(jCas, 2, 3).addToIndexes();
+    @Test
+    public void testModelClassPathResource() throws Exception {
+        AnalysisEngine sentenceAE = AnalysisEngineFactory.createEngine(SentenceAnnotator.class,
+                SentenceAnnotator.PARAM_MODEL_FILE, "de/julielab/jcore/ae/jsbd/model/test-model.gz",
+                SentenceAnnotator.PARAM_POSTPROCESSING, "biomed");
+        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
+        String abstractText = FileUtils.readFileToString(new File("src/test/resources/test-abstract.txt"), "UTF-8");
+        cas.setDocumentText(abstractText);
+        sentenceAE.process(cas);
+        Collection<Sentence> sentences = JCasUtil.select(cas, Sentence.class);
+        System.out.println(sentences.size());
+        for (Sentence sentence : sentences) {
+            System.out.println(sentence.getCoveredText());
+        }
+        assertEquals(14, sentences.size());
+    }
+
+    @Test
+    public void testSentenceDelimiterTypes() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-types");
+
+        jCas.setDocumentText("Introduction " + "We here show good results. This is a figure caption "
+                + "And this is a paragraph without a fullstop for some reason " + "Conclusion "
+                + "We are the greatest.");
+        Title t1 = new Title(jCas, 0, 12);
+        Caption c = new Caption(jCas, 40, 64);
+        Paragraph p = new Paragraph(jCas, 65, 123);
+        Title t2 = new Title(jCas, 124, 134);
+        t1.addToIndexes();
+        c.addToIndexes();
+        p.addToIndexes();
+        t2.addToIndexes();
+        assertEquals("Introduction", t1.getCoveredText());
+        assertEquals("This is a figure caption", c.getCoveredText());
+        assertEquals("And this is a paragraph without a fullstop for some reason", p.getCoveredText());
+        assertEquals("Conclusion", t2.getCoveredText());
+
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz", SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES,
+                new LinkedHashSet<Object>(
+                        Arrays.asList(Title.class.getName(), Caption.class.getName(), Paragraph.class.getName())));
+
+        jsbd.process(jCas.getCas());
+
+        Set<Range<Integer>> expectedSpans = new HashSet<>();
+        expectedSpans.add(Range.between(0, 12));
+        expectedSpans.add(Range.between(13, 39));
+        expectedSpans.add(Range.between(40, 64));
+        expectedSpans.add(Range.between(65, 123));
+        expectedSpans.add(Range.between(124, 134));
+        expectedSpans.add(Range.between(135, 155));
+
+        FSIterator<Annotation> it = jCas.getAnnotationIndex(Sentence.type).iterator();
+        assertTrue(it.hasNext());
+        while (it.hasNext()) {
+            Annotation sentence = it.next();
+            Range<Integer> sentenceRange = Range.between(sentence.getBegin(), sentence.getEnd());
+            assertTrue(expectedSpans.remove(sentenceRange), "Range " + sentenceRange + " was not expected");
+        }
+        assertTrue(expectedSpans.isEmpty());
+    }
+
+    @Test
+    public void testSentenceWhitespaces() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-types");
+
+        // This text is taken from pmid 23092121
+        jCas.setDocumentText("  : We present a theoretical study of the electronic subband structure and collective electronic excitation associated with plasmon and surface plasmon modes in metal-based hollow nanosphere. The dependence of the electronic subband energy on the sample parameters of the hollow nanosphere is examined.");
 
-		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-				"de/julielab/jcore/ae/jsbd/model/test-model.gz", SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{InternalReference.class.getCanonicalName()});
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz");
 
-		jsbd.process(jCas.getCas());
+        jsbd.process(jCas.getCas());
 
 
-		Sentence sentence = JCasUtil.select(jCas, Sentence.class).iterator().next();
-		assertFalse(sentence.getCoveredText().endsWith("\u2029"));
-	}
+        Sentence sentence = JCasUtil.select(jCas, Sentence.class).iterator().next();
+        assertFalse(sentence.getCoveredText().startsWith(" "));
+    }
 
-	@Test
-	public void testSplitAtNewlines() throws Exception {
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-				"de.julielab.jcore.types.jcore-document-structure-types");
+    @Test
+    public void testTrailingNewline() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-types");
 
-		String ls = System.getProperty("line.separator");
-		jCas.setDocumentText("line1"+ls+"line2"+ls+"line3");
+        // This text is taken from PMC3408706. Note the "paragraph separator" at the end
+        jCas.setDocumentText("In1 the next step, we plan to use higher level QM/MM methods to calculate the energy barrier of the reaction catalyzed by endonuclease APE1, in compliance with the mechanism proposed, and to screen for effective inhibitors with the use of the constructed mechanistic full-atomic model of the enzyme.    \u2029");
+        new InternalReference(jCas, 2, 3).addToIndexes();
 
-		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-				"de/julielab/jcore/ae/jsbd/model/test-model.gz", SentenceAnnotator.PARAM_ALWAYS_SPLIT_NEWLINE, true);
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz", SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{InternalReference.class.getCanonicalName()});
 
-		jsbd.process(jCas.getCas());
+        jsbd.process(jCas.getCas());
 
 
-		Collection<String> sentences = JCasUtil.select(jCas, Sentence.class).stream().map(Annotation::getCoveredText).collect(Collectors.toList());
-		assertThat(sentences).containsExactly("line1", "line2", "line3");
-	}
+        Sentence sentence = JCasUtil.select(jCas, Sentence.class).iterator().next();
+        assertFalse(sentence.getCoveredText().endsWith("\u2029"));
+    }
 
+    @Test
+    public void testSplitAtNewlines() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-types");
 
-	@Test
-	public void testErrordoc() throws Exception {
-		// The XMI document uses here is from PMC and is an example of a source of error the previously occurred.
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-				"de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
-				"de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
+        String ls = System.getProperty("line.separator");
+        jCas.setDocumentText("line1" + ls + "line2" + ls + "line3");
 
-		XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC5478802.xmi").toFile()), jCas.getCas());
-		JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
-		AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-				"/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz",
-				SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
-				SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
-						"de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},
-				SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()}
-		);
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz", SentenceAnnotator.PARAM_ALWAYS_SPLIT_NEWLINE, true);
 
-		jsbd.process(jCas.getCas());
-		for (var s : JCasUtil.select(jCas, Sentence.class)) {
-			System.out.println(s.getCoveredText());
-			System.out.println("--");
-		}
+        jsbd.process(jCas.getCas());
 
-	}
+
+        Collection<String> sentences = JCasUtil.select(jCas, Sentence.class).stream().map(Annotation::getCoveredText).collect(Collectors.toList());
+        assertThat(sentences).containsExactly("line1", "line2", "line3");
+    }
+
+
+    @Test
+    public void testErrordoc() throws Exception {
+        // The XMI document uses here is from PMC and is an example of a source of error the previously occurred.
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
+
+        XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC5478802.xmi").toFile()), jCas.getCas());
+        JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz",
+                SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
+                SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
+                        "de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},
+                SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()}
+        );
+
+        assertThatCode(() -> jsbd.process(jCas.getCas())).doesNotThrowAnyException();
+    }
 
 }
 
diff --git a/jcore-jsbd-ae/src/test/resources/errordocs/PMC5478802.xmi b/jcore-jsbd-ae/src/test/resources/errordocs/PMC5478802.xmi
new file mode 100644
index 000000000..c4d8ca95a
--- /dev/null
+++ b/jcore-jsbd-ae/src/test/resources/errordocs/PMC5478802.xmi
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore"
+         xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore"
+         xmlns:ext="http:///de/julielab/jcore/types/ext.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore"
+         xmi:version="2.0"><cas:NULL xmi:id="0"/><types:Table xmi:id="1" sofa="1145" begin="4073" end="4117" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0001" objectLabel="Table 1" objectCaption="662" objectTitle="711"/><types:Table xmi:id="946" sofa="1145" begin="6733" end="6772" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0002" objectLabel="Table 2" objectCaption="1286" objectTitle="412"/><types:Table xmi:id="1219" sofa="1145" begin="8066" end="8150" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0003" objectLabel="Table 3" objectCaption="567" objectTitle="77"/><types:Table xmi:id="301" sofa="1145" begin="8150" end="8236" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0004" objectLabel="Table 4" objectCaption="92" objectTitle="118"/><pubmed:InternalReference xmi:id="637" sofa="1145" begin="1584" end="1585" reftype="other" refid="osp499-bib-0001"/><pubmed:InternalReference xmi:id="238" sofa="1145" begin="1587" end="1588" reftype="other" refid="osp499-bib-0002"/><pubmed:InternalReference xmi:id="314" sofa="1145" begin="1590" end="1591" reftype="other" refid="osp499-bib-0003"/><pubmed:InternalReference xmi:id="853" sofa="1145" begin="1709" end="1710" reftype="other" refid="osp499-bib-0004"/><pubmed:InternalReference xmi:id="1330" sofa="1145" begin="1807" end="1808" reftype="other" refid="osp499-bib-0005"/><pubmed:InternalReference xmi:id="1136" sofa="1145" begin="1810" end="1811" reftype="other" refid="osp499-bib-0006"/><pubmed:InternalReference xmi:id="256" sofa="1145" begin="1967" end="1968" reftype="other" refid="osp499-bib-0007"/><pubmed:InternalReference xmi:id="1152" sofa="1145" begin="2165" end="2166" reftype="other" refid="osp499-bib-0008"/><pubmed:InternalReference xmi:id="101" sofa="1145" begin="2168" end="2169" reftype="other" refid="osp499-bib-0009"/><pubmed:InternalReference xmi:id="499" sofa="1145" begin="2171" end="2173" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1303" sofa="1145" begin="2235" end="2237" reftype="other" refid="osp499-bib-0011"/><pubmed:InternalReference xmi:id="1312" sofa="1145" begin="2414" end="2416" reftype="other" refid="osp499-bib-0012"/><pubmed:InternalReference xmi:id="844" sofa="1145" begin="2690" end="2692" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1054" sofa="1145" begin="2820" end="2822" reftype="other" refid="osp499-bib-0013"/><pubmed:InternalReference xmi:id="39" sofa="1145" begin="2912" end="2914" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="332" sofa="1145" begin="2916" end="2918" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="1388" sofa="1145" begin="2920" end="2922" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="671" sofa="1145" begin="2924" end="2926" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="1404" sofa="1145" begin="3010" end="3012" reftype="other" refid="osp499-bib-0018"/><pubmed:InternalReference xmi:id="379" sofa="1145" begin="3925" end="3926" reftype="other" refid="osp499-tbl-0001"/><pubmed:InternalReference xmi:id="247" sofa="1145" begin="5439" end="5440" reftype="figure" refid="osp499-fig-0001"/><pubmed:InternalReference xmi:id="937" sofa="1145" begin="6617" end="6618" reftype="other" refid="osp499-tbl-0002"/><pubmed:InternalReference xmi:id="1072" sofa="1145" begin="6730" end="6731" reftype="other" refid="osp499-tbl-0002"/><pubmed:InternalReference xmi:id="918" sofa="1145" begin="7713" end="7714" reftype="other" refid="osp499-tbl-0003"/><pubmed:InternalReference xmi:id="323" sofa="1145" begin="7895" end="7896" reftype="other" refid="osp499-tbl-0004"/><pubmed:InternalReference xmi:id="463" sofa="1145" begin="8799" end="8800" reftype="other" refid="osp499-bib-0005"/><pubmed:InternalReference xmi:id="1232" sofa="1145" begin="8802" end="8803" reftype="other" refid="osp499-bib-0006"/><pubmed:InternalReference xmi:id="1379" sofa="1145" begin="9044" end="9046" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="30" sofa="1145" begin="9048" end="9050" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="1355" sofa="1145" begin="9052" end="9054" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="14" sofa="1145" begin="9056" end="9058" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="274" sofa="1145" begin="9060" end="9062" reftype="other" refid="osp499-bib-0018"/><pubmed:InternalReference xmi:id="283" sofa="1145" begin="9064" end="9066" reftype="other" refid="osp499-bib-0019"/><pubmed:InternalReference xmi:id="795" sofa="1145" begin="9068" end="9070" reftype="other" refid="osp499-bib-0020"/><pubmed:InternalReference xmi:id="420" sofa="1145" begin="9211" end="9213" reftype="other" refid="osp499-bib-0021"/><pubmed:InternalReference xmi:id="1063" sofa="1145" begin="9215" end="9217" reftype="other" refid="osp499-bib-0022"/><pubmed:InternalReference xmi:id="1321" sofa="1145" begin="9337" end="9339" reftype="other" refid="osp499-bib-0023"/><pubmed:InternalReference xmi:id="292" sofa="1145" begin="9341" end="9343" reftype="other" refid="osp499-bib-0024"/><pubmed:InternalReference xmi:id="1011" sofa="1145" begin="9613" end="9615" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="156" sofa="1145" begin="9617" end="9619" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="588" sofa="1145" begin="9621" end="9623" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="702" sofa="1145" begin="9625" end="9627" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="1339" sofa="1145" begin="9646" end="9648" reftype="other" refid="osp499-bib-0025"/><pubmed:InternalReference xmi:id="748" sofa="1145" begin="9744" end="9746" reftype="other" refid="osp499-bib-0026"/><pubmed:InternalReference xmi:id="1270" sofa="1145" begin="9865" end="9867" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="835" sofa="1145" begin="9869" end="9871" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="388" sofa="1145" begin="10240" end="10242" reftype="other" refid="osp499-bib-0027"/><pubmed:InternalReference xmi:id="540" sofa="1145" begin="10619" end="10621" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1161" sofa="1145" begin="10623" end="10625" reftype="other" refid="osp499-bib-0028"/><pubmed:InternalReference xmi:id="869" sofa="1145" begin="10627" end="10629" reftype="other" refid="osp499-bib-0029"/><pubmed:InternalReference xmi:id="653" sofa="1145" begin="11228" end="11230" reftype="other" refid="osp499-bib-0030"/><pubmed:InternalReference xmi:id="265" sofa="1145" begin="11722" end="11724" reftype="other" refid="osp499-bib-0031"/><pubmed:InternalReference xmi:id="549" sofa="1145" begin="11726" end="11728" reftype="other" refid="osp499-bib-0032"/><pubmed:InternalReference xmi:id="611" sofa="1145" begin="11888" end="11890" reftype="other" refid="osp499-bib-0033"/><types:Paragraph xmi:id="1397" sofa="1145" begin="111" end="242" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="442" sofa="1145" begin="264" end="751" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="813" sofa="1145" begin="760" end="1247" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="354" sofa="1145" begin="1260" end="1347" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="472" sofa="1145" begin="1361" end="1969" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="885" sofa="1145" begin="1970" end="2417" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1279" sofa="1145" begin="2418" end="3238" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="397" sofa="1145" begin="3239" end="3509" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="604" sofa="1145" begin="3539" end="4072" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="149" sofa="1145" begin="4073" end="4108" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="449" sofa="1145" begin="4117" end="4470" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="133" sofa="1145" begin="4471" end="4698" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="85" sofa="1145" begin="4699" end="5506" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="820" sofa="1145" begin="5507" end="5646" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="165" sofa="1145" begin="5656" end="5736" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="959" sofa="1145" begin="5758" end="6515" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1254" sofa="1145" begin="6524" end="6732" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="597" sofa="1145" begin="6733" end="6763" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1372" sofa="1145" begin="6772" end="7429" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="172" sofa="1145" begin="7430" end="8065" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1129" sofa="1145" begin="8066" end="8141" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="23" sofa="1145" begin="8150" end="8227" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="126" sofa="1145" begin="8247" end="9344" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="58" sofa="1145" begin="9345" end="10602" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="492" sofa="1145" begin="10603" end="11348" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="878" sofa="1145" begin="11349" end="12295" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="757" sofa="1145" begin="12307" end="12727" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="862" sofa="1145" begin="12759" end="12793" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="646" sofa="1145" begin="12802" end="12807" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1348" sofa="1145" begin="12819" end="13111" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="966" sofa="1145" begin="13112" end="13162" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="456" sofa="1145" begin="13184" end="13495" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:AbstractSectionHeading xmi:id="927" sofa="1145" begin="101" end="110" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="48" sofa="1145" begin="243" end="263" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="764" sofa="1145" begin="752" end="759" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="197" sofa="1145" begin="1248" end="1259" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AuthorInfo xmi:id="65" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="F." affiliation="osp499-aff-0001" lastName="Tatsugami"/><types:AuthorInfo xmi:id="226" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="K." affiliation="osp499-aff-0001" lastName="Awai"/><types:AuthorInfo xmi:id="576" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="H." affiliation="osp499-aff-0001" lastName="Sakane"/><types:AuthorInfo xmi:id="690" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="M." affiliation="osp499-aff-0001" lastName="Iida"/><types:AuthorInfo xmi:id="724" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" contact="kaichi@hiroshima-u.ac.jp" lastName="Kaichi"/><types:AuthorInfo xmi:id="736" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" lastName="Honda"/><types:AuthorInfo xmi:id="774" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="H." affiliation="osp499-aff-0001" lastName="Higashibori"/><types:AuthorInfo xmi:id="1029" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" lastName="Baba"/><types:Title xmi:id="620" sofa="1145" begin="0" end="92" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="document"/><types:Title xmi:id="110" sofa="1145" begin="93" end="100" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="other"/><types:Title xmi:id="711" sofa="1145" begin="4109" end="4116" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="827" sofa="1145" begin="5647" end="5655" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="figure"/><types:Title xmi:id="412" sofa="1145" begin="6764" end="6771" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="77" sofa="1145" begin="8142" end="8149" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="118" sofa="1145" begin="8228" end="8235" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Caption xmi:id="662" sofa="1145" begin="4073" end="4109" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="786" sofa="1145" begin="5507" end="5647" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="1286" sofa="1145" begin="6733" end="6764" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="567" sofa="1145" begin="8066" end="8142" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="92" sofa="1145" begin="8150" end="8228" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Keyword xmi:id="140" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="mortality"/><types:Keyword xmi:id="804" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="abdominal obesity"/><types:Keyword xmi:id="1081" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="computed tomography"/><types:SectionTitle xmi:id="628" sofa="1145" begin="1348" end="1360" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1261" sofa="1145" begin="3510" end="3517" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="179" sofa="1145" begin="3518" end="3538" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1413" sofa="1145" begin="5737" end="5757" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1020" sofa="1145" begin="6516" end="6523" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="217" sofa="1145" begin="8236" end="8246" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="361" sofa="1145" begin="12296" end="12306" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="188" sofa="1145" begin="12728" end="12758" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="558" sofa="1145" begin="12794" end="12801" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="523" sofa="1145" begin="12808" end="12818" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="370" sofa="1145" begin="13163" end="13183" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:Date xmi:id="207" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" day="20" month="1" year="2017"/><types:Section xmi:id="1103" sofa="1145" begin="1348" end="3510" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="628" sectionId="osp499-sec-0005" depth="0"/><types:Section xmi:id="1116" sofa="1145" begin="3510" end="6516" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1261" sectionId="osp499-sec-0006" depth="0"/><types:Section xmi:id="429" sofa="1145" begin="3518" end="5737" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="179" sectionId="osp499-sec-0007" depth="1"/><types:Section xmi:id="341" sofa="1145" begin="5737" end="6516" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1413" sectionId="osp499-sec-0008" depth="1"/><types:Section xmi:id="1241" sofa="1145" begin="6516" end="8236" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1020" sectionId="osp499-sec-0009" depth="0"/><types:Section xmi:id="479" sofa="1145" begin="8236" end="12296" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="217" sectionId="osp499-sec-0010" depth="0"/><types:Section xmi:id="1170" sofa="1145" begin="12296" end="12728" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="361" sectionId="osp499-sec-0011" depth="0"/><types:Section xmi:id="1041" sofa="1145" begin="12728" end="12794" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="188" sectionId="osp499-sec-0012" depth="0"/><types:Section xmi:id="998" sofa="1145" begin="12794" end="12808" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="558" sectionId="osp499-sec-0013" depth="0"/><types:Section xmi:id="892" sofa="1145" begin="12808" end="13163" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="523" sectionId="osp499-sec-0014" depth="0"/><types:Section xmi:id="1090" sofa="1145" begin="13163" end="13496" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="370" sectionId="osp499-sec-0015" depth="0"/><pubmed:OtherID xmi:id="404" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" id="28702215" source="PubMed"/><types:Figure xmi:id="508" sofa="1145" begin="5507" end="5656" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-fig-0001" objectLabel="Figure 1" objectCaption="786" objectTitle="827"/><types:AbstractSection xmi:id="990" sofa="1145" begin="101" end="243" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="927"/><types:AbstractSection xmi:id="1364" sofa="1145" begin="243" end="752" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="48"/><types:AbstractSection xmi:id="1295" sofa="1145" begin="752" end="1248" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="764"/><types:AbstractSection xmi:id="532" sofa="1145" begin="1248" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="197"/><pubmed:ManualDescriptor xmi:id="905" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" keywordList="719"/><types:Journal xmi:id="973" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" pubDate="207" volume="3" title="Obesity Science &amp; Practice" issue="2" pages="219--223"/><types:AbstractText xmi:id="1183" sofa="1145" begin="93" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" structuredAbstractParts="990 1364 1295 532"/><pubmed:Header xmi:id="1197" sofa="1145" begin="0" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" source="PubMed Central" docId="PMC5478802" copyright="© 2017 The Authors. Obesity Science &amp; Practice published by John Wiley &amp; Sons Ltd, World Obesity and The Obesity Society." truncated="false" authors="680" pubTypeList="973" doi="10.1002/osp4.99" otherIDs="520"/><tcas:DocumentAnnotation xmi:id="1422" sofa="1145" begin="0" end="13496" language="x-unspecified"/><ext:DBProcessingMetaData xmi:id="1427" sofa="1145" begin="0" end="0" subsetTable="jsbd.errordoc" doNotMarkAsProcessed="false"><primaryKey>PMC5478802</primaryKey></ext:DBProcessingMetaData><cas:Sofa xmi:id="1145" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Relationship between sudden natural death and abdominal fat evaluated on postmortem CT scans&#10;Summary&#10;Objective&#10;This study examined the association between sudden natural death and abdominal fat using postmortem computed tomography (CT) scans.&#10;Subjects and methods&#10;Postmortem CT images at the umbilical level of 241 subjects were used to measure abdominal areas of subcutaneous‐ and visceral fat, the rate of visceral fat and the waist circumference. Of the study subjects, 174 died of sudden natural death (130 men and 44 women), and 67 died of different causes (46 men and 21 women). All were between 40 and 75 years of age. Logistic regression analysis was performed to identify independent abdominal parameters associated with sudden natural death.&#10;Results&#10;By univariate analysis, the areas of subcutaneous and visceral fat were significantly larger in sudden natural death than who died of different causes (subcutaneous fat, odds ratio [OR] = 1.004, 95% confidence interval [CI] = 1.000–1.007, p = 0.03; visceral fat, OR = 1.008, 95% CI = 1.003–1.013, p &lt; 0.01). Multivariate analysis showed that the area of visceral fat was an independent factor associated with the risk of sudden natural death (OR = 1.008, 95% CI = 1.002–1.015, p = 0.02).&#10;Conclusions&#10;Postmortem CT revealed that sudden natural death was related to abdominal fat deposits.&#10;Introduction&#10;Sudden natural death is defined as death not attributable to a traumatic event or suicide within 24 hours of symptom onset in an apparently healthy individual or in a patient whose disease stage was not predictive of death 1, 2, 3. The frequency of sudden unexpected death due to cardiac or unidentifiable causes in employment age is 11 in 100,000 4. In order, most sudden natural deaths are due to cardiovascular followed by infectious diseases 5, 6. In individuals with acute chest pain, the cause of sudden natural death was pulmonary embolism, aortic dissection and obstructive coronary artery disease 7.&#10;Obesity is a risk factor for cardiovascular, metabolic, neoplastic and musculoskeletal disorders, and abdominal obesity is associated with increased morbidity independent of age, race and gender 8, 9, 10. Abdominal adiposity is a significant predictor of mortality 11 independent of the body mass index. The association between the waist circumference and metabolic risk may be partly explicable by a strong association with visceral adiposity 12.&#10;Computed tomography (CT) studies have shown that visceral fat was a significant predictor of mortality: in a model including three fat measures (subcutaneous, visceral and liver fat), age and length of follow‐up, only visceral fat was a significant predictor of mortality 10. In women, standard deviation increment in visceral fat was associated with a significantly increased all‐cause mortality risk 13. CT also revealed visceral adiposity was associated with incident cardiovascular disease 14, 15, 16, 17. In Japanese Americans, visceral fat was associated with obesity‐related mortality 18. Although the association between abdominal adiposity and morbidity is known, there are no CT studies that used direct measurements of abdominal fat to elucidate the association between abdominal fat and sudden natural death.&#10;In Japan, postmortem CT studies are performed to determine the cause of sudden death. Under the hypothesis that it is associated with abdominal fat, we investigated the relationship between sudden natural death and abdominal fat measured directly on postmortem CT scans.&#10;Methods&#10;Subjects and methods&#10;This retrospective study was approved by our institutional review board. We used postmortem CT studies acquired between February 2008 and March 2016 of 241 subjects who were between 40 and 75 years of age at the time of their sudden death. Of these, 174 died of sudden natural death (group 1; 130 men and 44 women), and 67 died of different causes (group 2; 46 men and 21 women) (Table 1). There was no significant difference in the age of the two groups (p = 0.09, two‐sample t‐test) nor in their gender (p = 0.34, chi‐square test).&#10;Clinical data on 241 study subjects&#10;Table 1&#10;The criteria for sudden natural death were death due to natural diseases and abrupt and unexpected death in individuals who appeared well. Individuals who died of non‐natural causes such as trauma and suicide were excluded. Contrast‐enhanced CT studies were performed in six group 1 subjects; two group 1 subjects and one group 2 subject were autopsied.&#10;We perused their medical records to review the demographic data of our study subjects for vascular risk factors (diabetes mellitus, hypertension, dyslipidemia and smoking) and the interval between the time of death and imaging.&#10;In individuals who died of sudden death, we routinely obtain whole‐body postmortem CT scans on a 16‐row CT scanner (Lightspeed16, GE Medical Systems, Milwaukee, WI, USA) or a 320‐row CT scanner (Aquilion ONE, Toshiba Corp. Medical Systems, Otawara, Japan). Fat analysis was performed on a workstation (AZE Virtual Place Raijin; AZE Ltd., Tokyo, Japan). The areas of subcutaneous and visceral fat and the waist circumference were measured on 5‐mm‐thick CT images at the umbilical level. Adipose tissue areas were calculated using an attenuation range of −150 to −10 Hounsfield units. The abdominal subcutaneous fat area was defined as the area of adipose tissue between the skin and the outermost aspect of the abdominal muscle wall (Figure 1). We also calculated the rate of visceral fat using the equation:&#10;Screen capture from the fat analysis tool. The blue and red areas show visceral and subcutaneous fat, respectively, at the umbilical level.&#10;Figure 1&#10;rate of visceral fat = area of visceral fat/area of subcutaneous + visceral fat.&#10;Statistical analysis&#10;We compared the interval between the time of death and imaging in groups 1 and 2 using the two‐sample t‐test. To compare abdominal parameters, we performed logistic regression analysis. Factors associated with sudden natural death (dependent variables) were identified by univariate analysis. Independent variables included the age, gender, interval between the time of death and imaging, area of subcutaneous fat, area of visceral fat, rate of visceral fat and waist circumference. All statistical tests were two‐sided; probability values of &lt;0.05 were considered statistically significant. Multivariate analysis adjusted for relevant factors was also performed using logistic regression analysis. All statistical analyses were with IBM SPSS Statistics 21.&#10;Results&#10;Postmortem CT scans revealed the cause of sudden natural death in 54 group 1 subjects (Table 2); in the other 120, it could not be ascertained. The causes of sudden death in group 2 are also shown in Table 2.&#10;Cause of death in 241 subjects&#10;Table 2&#10;There was no significant difference in the interval between the time of death and imaging between group 1 (median 70 min and range 10–720 min) and group 2 (median 70 min and range 20–660 min) (p = 0.55 and two‐sample t‐test). In group 1, the mean area of subcutaneous and visceral fat was 152.5 ± 96.4 cm2 and 139.3 ± 72.9 cm2, respectively; the mean rate of visceral fat was 0.50 ± 0.12, and the mean waist circumference was 94.7 ± 26.7 cm. In group 2, the mean area of subcutaneous and visceral fat was 125.4 ± 59.8 cm2 and 108.0 ± 47.3 cm2, respectively; the mean rate of visceral fat was 0.47 ± 0.11, and the mean waist circumference was 97.9 ± 28.0 cm.&#10;By univariate analysis, the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2 (subcutaneous fat, odds ratio [OR] = 1.004, 95% confidence interval [CI] = 1.000–1.007, p = 0.03; visceral fat, OR = 1.008, 95% CI = 1.003–1.013, p &lt; 0.01) (Table 3). For multivariate analysis, we selected the area of visceral fat as an independent factor associated with sudden natural death (OR = 1.008, 95% CI = 1.002–1.015, p = 0.02) (Table 4). The difference between the two groups in the rate of visceral fat and the waist circumference was not statistically significant (p = 0.13 and p = 0.42, respectively).&#10;Univariate analysis to explore factors associated with sudden natural death&#10;Table 3&#10;Multivariate analysis to explore factors associated with sudden natural death&#10;Table 4&#10;Discussion&#10;Our study showed that the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2 and that the rate of visceral fat and the waist circumference were not significantly different. Multivariate analysis revealed that the area of visceral fat was an independent factor associated with the risk of sudden natural death. These findings support the hypothesis that sudden natural death is associated with abdominal fat deposits. Most identified causes of sudden natural death (n = 54) were aortic diseases (n = 30). Others 5, 6 suggested that in individuals without a definite radiologically identified cause of sudden natural death it was due to coronary artery disease and numerous studies found an association between abdominal adiposity and cardiovascular disease 14, 15, 16, 17, 18, 19, 20. The cellular lipid content determines the size of adipocytes; large, mature adipoctyes were filled almost entirely by large lipid droplets 21, 22. The adipocyte volume determines cell functionality and the larger the adipocytes, the higher the cardiometabolic risk 23, 24.&#10;Our multivariate analysis showed that the area of visceral fat is an independent factor associated with the risk of sudden natural death. This is consistent with earlier findings that cardiovascular disease was asssociated with visceral – rather than subcutaneous fat 14, 15, 16, 17. Pickhardt et al. 25 documented that visceral fat in women was correlated with metabolic syndrome. While Fox et al. 26 showed that it was more strongly associated with risk factors for cardiovascular disease in women than in men; others 16, 17 claimed that visceral fat areas were significantly related to cardiovascular disease in both genders. Sex differences are recognized in the distribution of adipose tissue. In men, adipose tissue is primarily found in the central or abdominal region; this raises their risk for metabolic disorders; women, on the other hand, harbour more subcutaneous than visceral fat 27. These gender‐specific differences may explain the lack of significant differences in the rate of visceral fat between our two groups. Despite gender‐specific differences in the distribution of abdominal fat distribution, we think that visceral fat is associated with obesity‐related morbidity such as cardiovascular disease resulting in sudden natural death.&#10;Earlier studies 10, 28, 29 reported an association between the waist circumference and mortality. In our series, the waist circumference was not associated with sudden natural death. The waist circumference reflects abdominal fat deposits, and the observation that it was not associated with sudden natural death is inconsistent with our finding that the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2. We used postmortem CT scans in our analyses; these scans reflected postmortem‐resuscitation and post‐cardiopulmonary resuscitation changes including gastrointestinal distension 30 that increase the waist circumference. Consequently, our findings may not reflect the pre‐mortem waist circumference.&#10;Our study has some limitations. As only three subjects were autopsied, the true cause of death remains uncertain. We did not have full medical histories on all subjects, and the effect of premortem patient characteristics could not be considered in our analyses. For example, there is a strong association between depression and suicide, and between obesity and depression 31, 32; however, we did not study the history of depression in this investigation. Also, cardiovascular disease is the leading cause of death in postmenopausal women 33. Although the differences of subcutaneous and visceral fat between groups 1 and 2 were statistically significant and the area of visceral fat was an independent factor associated with sudden natural death, they were modest, and the clinical significance is questionable. Lastly, our study population was composed of a heterogeneous mixture of men and women because only 65 of the 241 subjects were women.&#10;Conclusion&#10;The areas of subcutaneous and visceral fat were signifiantly larger in individuals who died of sudden natural death than in those who died of other identified causes. Visceral fat was an independent factor associated with the risk of sudden natural death. This raises the possibility that abdominal fat deposits may be associated with sudden natural death due to obesity‐related morbidity such as cardiovascular disease.&#10;Conflict of interest statement&#10;No conflict of interest statement.&#10;Funding&#10;None.&#10;Disclosure&#10;Dr Awai reports grants from Research Grant, Toshiba Medical Systems; grants from Research Grant, Hitachi Medical Corporation; grants from Research Grant, Eizai Co.; grants from Research Grant, Bayer Seiyaku Co.; and grants from Research Grant, Daiichi Sankyo, Co., outside the submitted work.&#10;The other authors declare no conflict of interest.&#10;Author contributions&#10;YK conceptualized the study, performed statistical analyses and wrote the paper; HS, HH, YH, FT, YB and MI contributed to interpreting the CT images and critically reviewed the paper. KA contributed to interpreting the data and critically reviewed the paper. All authors approved the final version of the paper.&#10;"/><cas:FSArray xmi:id="719" elements="804 1081 140"/><cas:FSArray xmi:id="680" elements="724 576 774 736 65 1029 690 226"/><cas:FSArray xmi:id="520" elements="404"/><cas:View sofa="1145" members="1 946 1219 301 637 238 314 853 1330 1136 256 1152 101 499 1303 1312 844 1054 39 332 1388 671 1404 379 247 937 1072 918 323 463 1232 1379 30 1355 14 274 283 795 420 1063 1321 292 1011 156 588 702 1339 748 1270 835 388 540 1161 869 653 265 549 611 1397 442 813 354 472 885 1279 397 604 149 449 133 85 820 165 959 1254 597 1372 172 1129 23 126 58 492 878 757 862 646 1348 966 456 927 48 764 197 65 226 576 690 724 736 774 1029 620 110 711 827 412 77 118 662 786 1286 567 92 140 804 1081 628 1261 179 1413 1020 217 361 188 558 523 370 207 1103 1116 429 341 1241 479 1170 1041 998 892 1090 404 508 990 1364 1295 532 905 973 1183 1197 1422 1427"/></xmi:XMI>
\ No newline at end of file
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 58fdcc137..22758d549 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -71,7 +71,7 @@ public void testReduce3() throws Exception {
 		// references completely.
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
 				"de.julielab.jcore.types.jcore-document-structure-types");
-		jcas.setDocumentText("This sentence has multiple references.2,5,42 This is a second sentence.7,8");
+		jcas.setDocumentText("This sentence has multiple references.2,5;42 This is a second sentence.7,8");
 		InternalReference ref1 = new InternalReference(jcas, 38, 39);
 		ref1.addToIndexes();
 		InternalReference ref2 = new InternalReference(jcas, 40, 41);
@@ -84,7 +84,7 @@ public void testReduce3() throws Exception {
 		ref5.addToIndexes();
 
 		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
-				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())), Set.of(','));
+				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())), Set.of(',', ';'));
 		assertEquals("This sentence has multiple references. This is a second sentence.", condensedText.getCodensedText());
 	}
 
@@ -96,9 +96,9 @@ public void testErrorDoc() throws Exception{
 				"de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
 
 		XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "PMC5478802.xmi").toFile()), jCas.getCas());
-		JCoReCondensedDocumentText text = new JCoReCondensedDocumentText(jCas, Set.of(de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()));
-//		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table");
-		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Section");
+		JCoReCondensedDocumentText text = new JCoReCondensedDocumentText(jCas, Set.of(de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()), Set.of(','));
+		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table");
+//		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Section");
 		JCoReAnnotationIndexMerger indexMerger = new JCoReAnnotationIndexMerger(sentenceBoundaryTypes, false,
 				null, jCas);
 
@@ -106,12 +106,14 @@ public void testErrorDoc() throws Exception{
 			Annotation a = (Annotation) indexMerger.getAnnotation();
 			System.out.println(a.getCoveredText());
 			System.out.println("--");
-			int condensedBegin = text.getCondensedOffsetForOriginalOffset(a.getBegin());
-			int condensedEnd = text.getOriginalOffsetForCondensedOffset(a.getEnd());
+			int begin = a.getBegin();
+			int condensedBegin = text.getCondensedOffsetForOriginalOffset(begin);
+			int end = a.getEnd();
+			int condensedEnd = text.getCondensedOffsetForOriginalOffset(end);
 			if (condensedEnd > text.getCodensedText().length())
 				System.out.println();
 			System.out.println(text.getCodensedText().substring(condensedBegin, condensedEnd));
-			System.out.println(a.getBegin() + " - " + a.getEnd() + ", " + condensedBegin + " - " + condensedEnd);
+			System.out.println(begin + " - " + end + ", " + condensedBegin + " - " + condensedEnd);
 			System.out.println();
 		}
 	}

From 6e39d19caef183226a0fe55cbdad1e2bedc84748 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 1 Jul 2021 14:51:01 +0200
Subject: [PATCH 078/269] Removed a non-required test.

---
 .../JCoReCondensedDocumentTextTest.java       | 34 -------------------
 .../src/test/resources/PMC5478802.xmi         |  5 ---
 2 files changed, 39 deletions(-)
 delete mode 100644 jcore-utilities/src/test/resources/PMC5478802.xmi

diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 22758d549..1c5597a3e 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -1,14 +1,10 @@
 package de.julielab.jcore.utility;
 
 import de.julielab.jcore.types.InternalReference;
-import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.jcas.tcas.Annotation;
 import org.junit.jupiter.api.Test;
 
-import java.io.FileInputStream;
-import java.nio.file.Path;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
@@ -87,34 +83,4 @@ public void testReduce3() throws Exception {
 				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())), Set.of(',', ';'));
 		assertEquals("This sentence has multiple references. This is a second sentence.", condensedText.getCodensedText());
 	}
-
-	@Test
-	public void testErrorDoc() throws Exception{
-		// The XMI document uses here is from PMC and is an example of a source of error the previously occurred.
-		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
-				"de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
-				"de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
-
-		XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "PMC5478802.xmi").toFile()), jCas.getCas());
-		JCoReCondensedDocumentText text = new JCoReCondensedDocumentText(jCas, Set.of(de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()), Set.of(','));
-		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table");
-//		Set<String> sentenceBoundaryTypes = Set.of("de.julielab.jcore.types.Section");
-		JCoReAnnotationIndexMerger indexMerger = new JCoReAnnotationIndexMerger(sentenceBoundaryTypes, false,
-				null, jCas);
-
-		while (indexMerger.incrementAnnotation()) {
-			Annotation a = (Annotation) indexMerger.getAnnotation();
-			System.out.println(a.getCoveredText());
-			System.out.println("--");
-			int begin = a.getBegin();
-			int condensedBegin = text.getCondensedOffsetForOriginalOffset(begin);
-			int end = a.getEnd();
-			int condensedEnd = text.getCondensedOffsetForOriginalOffset(end);
-			if (condensedEnd > text.getCodensedText().length())
-				System.out.println();
-			System.out.println(text.getCodensedText().substring(condensedBegin, condensedEnd));
-			System.out.println(begin + " - " + end + ", " + condensedBegin + " - " + condensedEnd);
-			System.out.println();
-		}
-	}
 }
diff --git a/jcore-utilities/src/test/resources/PMC5478802.xmi b/jcore-utilities/src/test/resources/PMC5478802.xmi
deleted file mode 100644
index c4d8ca95a..000000000
--- a/jcore-utilities/src/test/resources/PMC5478802.xmi
+++ /dev/null
@@ -1,5 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore"
-         xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore"
-         xmlns:ext="http:///de/julielab/jcore/types/ext.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore"
-         xmi:version="2.0"><cas:NULL xmi:id="0"/><types:Table xmi:id="1" sofa="1145" begin="4073" end="4117" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0001" objectLabel="Table 1" objectCaption="662" objectTitle="711"/><types:Table xmi:id="946" sofa="1145" begin="6733" end="6772" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0002" objectLabel="Table 2" objectCaption="1286" objectTitle="412"/><types:Table xmi:id="1219" sofa="1145" begin="8066" end="8150" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0003" objectLabel="Table 3" objectCaption="567" objectTitle="77"/><types:Table xmi:id="301" sofa="1145" begin="8150" end="8236" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-tbl-0004" objectLabel="Table 4" objectCaption="92" objectTitle="118"/><pubmed:InternalReference xmi:id="637" sofa="1145" begin="1584" end="1585" reftype="other" refid="osp499-bib-0001"/><pubmed:InternalReference xmi:id="238" sofa="1145" begin="1587" end="1588" reftype="other" refid="osp499-bib-0002"/><pubmed:InternalReference xmi:id="314" sofa="1145" begin="1590" end="1591" reftype="other" refid="osp499-bib-0003"/><pubmed:InternalReference xmi:id="853" sofa="1145" begin="1709" end="1710" reftype="other" refid="osp499-bib-0004"/><pubmed:InternalReference xmi:id="1330" sofa="1145" begin="1807" end="1808" reftype="other" refid="osp499-bib-0005"/><pubmed:InternalReference xmi:id="1136" sofa="1145" begin="1810" end="1811" reftype="other" refid="osp499-bib-0006"/><pubmed:InternalReference xmi:id="256" sofa="1145" begin="1967" end="1968" reftype="other" refid="osp499-bib-0007"/><pubmed:InternalReference xmi:id="1152" sofa="1145" begin="2165" end="2166" reftype="other" refid="osp499-bib-0008"/><pubmed:InternalReference xmi:id="101" sofa="1145" begin="2168" end="2169" reftype="other" refid="osp499-bib-0009"/><pubmed:InternalReference xmi:id="499" sofa="1145" begin="2171" end="2173" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1303" sofa="1145" begin="2235" end="2237" reftype="other" refid="osp499-bib-0011"/><pubmed:InternalReference xmi:id="1312" sofa="1145" begin="2414" end="2416" reftype="other" refid="osp499-bib-0012"/><pubmed:InternalReference xmi:id="844" sofa="1145" begin="2690" end="2692" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1054" sofa="1145" begin="2820" end="2822" reftype="other" refid="osp499-bib-0013"/><pubmed:InternalReference xmi:id="39" sofa="1145" begin="2912" end="2914" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="332" sofa="1145" begin="2916" end="2918" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="1388" sofa="1145" begin="2920" end="2922" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="671" sofa="1145" begin="2924" end="2926" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="1404" sofa="1145" begin="3010" end="3012" reftype="other" refid="osp499-bib-0018"/><pubmed:InternalReference xmi:id="379" sofa="1145" begin="3925" end="3926" reftype="other" refid="osp499-tbl-0001"/><pubmed:InternalReference xmi:id="247" sofa="1145" begin="5439" end="5440" reftype="figure" refid="osp499-fig-0001"/><pubmed:InternalReference xmi:id="937" sofa="1145" begin="6617" end="6618" reftype="other" refid="osp499-tbl-0002"/><pubmed:InternalReference xmi:id="1072" sofa="1145" begin="6730" end="6731" reftype="other" refid="osp499-tbl-0002"/><pubmed:InternalReference xmi:id="918" sofa="1145" begin="7713" end="7714" reftype="other" refid="osp499-tbl-0003"/><pubmed:InternalReference xmi:id="323" sofa="1145" begin="7895" end="7896" reftype="other" refid="osp499-tbl-0004"/><pubmed:InternalReference xmi:id="463" sofa="1145" begin="8799" end="8800" reftype="other" refid="osp499-bib-0005"/><pubmed:InternalReference xmi:id="1232" sofa="1145" begin="8802" end="8803" reftype="other" refid="osp499-bib-0006"/><pubmed:InternalReference xmi:id="1379" sofa="1145" begin="9044" end="9046" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="30" sofa="1145" begin="9048" end="9050" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="1355" sofa="1145" begin="9052" end="9054" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="14" sofa="1145" begin="9056" end="9058" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="274" sofa="1145" begin="9060" end="9062" reftype="other" refid="osp499-bib-0018"/><pubmed:InternalReference xmi:id="283" sofa="1145" begin="9064" end="9066" reftype="other" refid="osp499-bib-0019"/><pubmed:InternalReference xmi:id="795" sofa="1145" begin="9068" end="9070" reftype="other" refid="osp499-bib-0020"/><pubmed:InternalReference xmi:id="420" sofa="1145" begin="9211" end="9213" reftype="other" refid="osp499-bib-0021"/><pubmed:InternalReference xmi:id="1063" sofa="1145" begin="9215" end="9217" reftype="other" refid="osp499-bib-0022"/><pubmed:InternalReference xmi:id="1321" sofa="1145" begin="9337" end="9339" reftype="other" refid="osp499-bib-0023"/><pubmed:InternalReference xmi:id="292" sofa="1145" begin="9341" end="9343" reftype="other" refid="osp499-bib-0024"/><pubmed:InternalReference xmi:id="1011" sofa="1145" begin="9613" end="9615" reftype="other" refid="osp499-bib-0014"/><pubmed:InternalReference xmi:id="156" sofa="1145" begin="9617" end="9619" reftype="other" refid="osp499-bib-0015"/><pubmed:InternalReference xmi:id="588" sofa="1145" begin="9621" end="9623" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="702" sofa="1145" begin="9625" end="9627" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="1339" sofa="1145" begin="9646" end="9648" reftype="other" refid="osp499-bib-0025"/><pubmed:InternalReference xmi:id="748" sofa="1145" begin="9744" end="9746" reftype="other" refid="osp499-bib-0026"/><pubmed:InternalReference xmi:id="1270" sofa="1145" begin="9865" end="9867" reftype="other" refid="osp499-bib-0016"/><pubmed:InternalReference xmi:id="835" sofa="1145" begin="9869" end="9871" reftype="other" refid="osp499-bib-0017"/><pubmed:InternalReference xmi:id="388" sofa="1145" begin="10240" end="10242" reftype="other" refid="osp499-bib-0027"/><pubmed:InternalReference xmi:id="540" sofa="1145" begin="10619" end="10621" reftype="other" refid="osp499-bib-0010"/><pubmed:InternalReference xmi:id="1161" sofa="1145" begin="10623" end="10625" reftype="other" refid="osp499-bib-0028"/><pubmed:InternalReference xmi:id="869" sofa="1145" begin="10627" end="10629" reftype="other" refid="osp499-bib-0029"/><pubmed:InternalReference xmi:id="653" sofa="1145" begin="11228" end="11230" reftype="other" refid="osp499-bib-0030"/><pubmed:InternalReference xmi:id="265" sofa="1145" begin="11722" end="11724" reftype="other" refid="osp499-bib-0031"/><pubmed:InternalReference xmi:id="549" sofa="1145" begin="11726" end="11728" reftype="other" refid="osp499-bib-0032"/><pubmed:InternalReference xmi:id="611" sofa="1145" begin="11888" end="11890" reftype="other" refid="osp499-bib-0033"/><types:Paragraph xmi:id="1397" sofa="1145" begin="111" end="242" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="442" sofa="1145" begin="264" end="751" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="813" sofa="1145" begin="760" end="1247" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="354" sofa="1145" begin="1260" end="1347" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="472" sofa="1145" begin="1361" end="1969" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="885" sofa="1145" begin="1970" end="2417" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1279" sofa="1145" begin="2418" end="3238" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="397" sofa="1145" begin="3239" end="3509" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="604" sofa="1145" begin="3539" end="4072" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="149" sofa="1145" begin="4073" end="4108" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="449" sofa="1145" begin="4117" end="4470" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="133" sofa="1145" begin="4471" end="4698" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="85" sofa="1145" begin="4699" end="5506" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="820" sofa="1145" begin="5507" end="5646" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="165" sofa="1145" begin="5656" end="5736" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="959" sofa="1145" begin="5758" end="6515" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1254" sofa="1145" begin="6524" end="6732" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="597" sofa="1145" begin="6733" end="6763" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1372" sofa="1145" begin="6772" end="7429" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="172" sofa="1145" begin="7430" end="8065" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1129" sofa="1145" begin="8066" end="8141" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="23" sofa="1145" begin="8150" end="8227" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="126" sofa="1145" begin="8247" end="9344" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="58" sofa="1145" begin="9345" end="10602" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="492" sofa="1145" begin="10603" end="11348" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="878" sofa="1145" begin="11349" end="12295" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="757" sofa="1145" begin="12307" end="12727" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="862" sofa="1145" begin="12759" end="12793" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="646" sofa="1145" begin="12802" end="12807" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1348" sofa="1145" begin="12819" end="13111" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="966" sofa="1145" begin="13112" end="13162" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="456" sofa="1145" begin="13184" end="13495" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:AbstractSectionHeading xmi:id="927" sofa="1145" begin="101" end="110" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="48" sofa="1145" begin="243" end="263" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="764" sofa="1145" begin="752" end="759" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="197" sofa="1145" begin="1248" end="1259" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AuthorInfo xmi:id="65" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="F." affiliation="osp499-aff-0001" lastName="Tatsugami"/><types:AuthorInfo xmi:id="226" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="K." affiliation="osp499-aff-0001" lastName="Awai"/><types:AuthorInfo xmi:id="576" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="H." affiliation="osp499-aff-0001" lastName="Sakane"/><types:AuthorInfo xmi:id="690" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="M." affiliation="osp499-aff-0001" lastName="Iida"/><types:AuthorInfo xmi:id="724" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" contact="kaichi@hiroshima-u.ac.jp" lastName="Kaichi"/><types:AuthorInfo xmi:id="736" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" lastName="Honda"/><types:AuthorInfo xmi:id="774" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="H." affiliation="osp499-aff-0001" lastName="Higashibori"/><types:AuthorInfo xmi:id="1029" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Y." affiliation="osp499-aff-0001" lastName="Baba"/><types:Title xmi:id="620" sofa="1145" begin="0" end="92" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="document"/><types:Title xmi:id="110" sofa="1145" begin="93" end="100" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="other"/><types:Title xmi:id="711" sofa="1145" begin="4109" end="4116" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="827" sofa="1145" begin="5647" end="5655" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="figure"/><types:Title xmi:id="412" sofa="1145" begin="6764" end="6771" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="77" sofa="1145" begin="8142" end="8149" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="118" sofa="1145" begin="8228" end="8235" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Caption xmi:id="662" sofa="1145" begin="4073" end="4109" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="786" sofa="1145" begin="5507" end="5647" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="1286" sofa="1145" begin="6733" end="6764" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="567" sofa="1145" begin="8066" end="8142" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="92" sofa="1145" begin="8150" end="8228" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Keyword xmi:id="140" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="mortality"/><types:Keyword xmi:id="804" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="abdominal obesity"/><types:Keyword xmi:id="1081" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="computed tomography"/><types:SectionTitle xmi:id="628" sofa="1145" begin="1348" end="1360" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1261" sofa="1145" begin="3510" end="3517" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="179" sofa="1145" begin="3518" end="3538" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1413" sofa="1145" begin="5737" end="5757" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1020" sofa="1145" begin="6516" end="6523" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="217" sofa="1145" begin="8236" end="8246" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="361" sofa="1145" begin="12296" end="12306" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="188" sofa="1145" begin="12728" end="12758" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="558" sofa="1145" begin="12794" end="12801" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="523" sofa="1145" begin="12808" end="12818" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="370" sofa="1145" begin="13163" end="13183" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:Date xmi:id="207" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" day="20" month="1" year="2017"/><types:Section xmi:id="1103" sofa="1145" begin="1348" end="3510" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="628" sectionId="osp499-sec-0005" depth="0"/><types:Section xmi:id="1116" sofa="1145" begin="3510" end="6516" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1261" sectionId="osp499-sec-0006" depth="0"/><types:Section xmi:id="429" sofa="1145" begin="3518" end="5737" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="179" sectionId="osp499-sec-0007" depth="1"/><types:Section xmi:id="341" sofa="1145" begin="5737" end="6516" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1413" sectionId="osp499-sec-0008" depth="1"/><types:Section xmi:id="1241" sofa="1145" begin="6516" end="8236" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1020" sectionId="osp499-sec-0009" depth="0"/><types:Section xmi:id="479" sofa="1145" begin="8236" end="12296" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="217" sectionId="osp499-sec-0010" depth="0"/><types:Section xmi:id="1170" sofa="1145" begin="12296" end="12728" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="361" sectionId="osp499-sec-0011" depth="0"/><types:Section xmi:id="1041" sofa="1145" begin="12728" end="12794" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="188" sectionId="osp499-sec-0012" depth="0"/><types:Section xmi:id="998" sofa="1145" begin="12794" end="12808" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="558" sectionId="osp499-sec-0013" depth="0"/><types:Section xmi:id="892" sofa="1145" begin="12808" end="13163" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="523" sectionId="osp499-sec-0014" depth="0"/><types:Section xmi:id="1090" sofa="1145" begin="13163" end="13496" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="370" sectionId="osp499-sec-0015" depth="0"/><pubmed:OtherID xmi:id="404" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" id="28702215" source="PubMed"/><types:Figure xmi:id="508" sofa="1145" begin="5507" end="5656" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="osp499-fig-0001" objectLabel="Figure 1" objectCaption="786" objectTitle="827"/><types:AbstractSection xmi:id="990" sofa="1145" begin="101" end="243" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="927"/><types:AbstractSection xmi:id="1364" sofa="1145" begin="243" end="752" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="48"/><types:AbstractSection xmi:id="1295" sofa="1145" begin="752" end="1248" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="764"/><types:AbstractSection xmi:id="532" sofa="1145" begin="1248" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="197"/><pubmed:ManualDescriptor xmi:id="905" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" keywordList="719"/><types:Journal xmi:id="973" sofa="1145" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" pubDate="207" volume="3" title="Obesity Science &amp; Practice" issue="2" pages="219--223"/><types:AbstractText xmi:id="1183" sofa="1145" begin="93" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" structuredAbstractParts="990 1364 1295 532"/><pubmed:Header xmi:id="1197" sofa="1145" begin="0" end="1348" componentId="de.julielab.jcore.reader.pmc.PMCReader" source="PubMed Central" docId="PMC5478802" copyright="© 2017 The Authors. Obesity Science &amp; Practice published by John Wiley &amp; Sons Ltd, World Obesity and The Obesity Society." truncated="false" authors="680" pubTypeList="973" doi="10.1002/osp4.99" otherIDs="520"/><tcas:DocumentAnnotation xmi:id="1422" sofa="1145" begin="0" end="13496" language="x-unspecified"/><ext:DBProcessingMetaData xmi:id="1427" sofa="1145" begin="0" end="0" subsetTable="jsbd.errordoc" doNotMarkAsProcessed="false"><primaryKey>PMC5478802</primaryKey></ext:DBProcessingMetaData><cas:Sofa xmi:id="1145" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Relationship between sudden natural death and abdominal fat evaluated on postmortem CT scans&#10;Summary&#10;Objective&#10;This study examined the association between sudden natural death and abdominal fat using postmortem computed tomography (CT) scans.&#10;Subjects and methods&#10;Postmortem CT images at the umbilical level of 241 subjects were used to measure abdominal areas of subcutaneous‐ and visceral fat, the rate of visceral fat and the waist circumference. Of the study subjects, 174 died of sudden natural death (130 men and 44 women), and 67 died of different causes (46 men and 21 women). All were between 40 and 75 years of age. Logistic regression analysis was performed to identify independent abdominal parameters associated with sudden natural death.&#10;Results&#10;By univariate analysis, the areas of subcutaneous and visceral fat were significantly larger in sudden natural death than who died of different causes (subcutaneous fat, odds ratio [OR] = 1.004, 95% confidence interval [CI] = 1.000–1.007, p = 0.03; visceral fat, OR = 1.008, 95% CI = 1.003–1.013, p &lt; 0.01). Multivariate analysis showed that the area of visceral fat was an independent factor associated with the risk of sudden natural death (OR = 1.008, 95% CI = 1.002–1.015, p = 0.02).&#10;Conclusions&#10;Postmortem CT revealed that sudden natural death was related to abdominal fat deposits.&#10;Introduction&#10;Sudden natural death is defined as death not attributable to a traumatic event or suicide within 24 hours of symptom onset in an apparently healthy individual or in a patient whose disease stage was not predictive of death 1, 2, 3. The frequency of sudden unexpected death due to cardiac or unidentifiable causes in employment age is 11 in 100,000 4. In order, most sudden natural deaths are due to cardiovascular followed by infectious diseases 5, 6. In individuals with acute chest pain, the cause of sudden natural death was pulmonary embolism, aortic dissection and obstructive coronary artery disease 7.&#10;Obesity is a risk factor for cardiovascular, metabolic, neoplastic and musculoskeletal disorders, and abdominal obesity is associated with increased morbidity independent of age, race and gender 8, 9, 10. Abdominal adiposity is a significant predictor of mortality 11 independent of the body mass index. The association between the waist circumference and metabolic risk may be partly explicable by a strong association with visceral adiposity 12.&#10;Computed tomography (CT) studies have shown that visceral fat was a significant predictor of mortality: in a model including three fat measures (subcutaneous, visceral and liver fat), age and length of follow‐up, only visceral fat was a significant predictor of mortality 10. In women, standard deviation increment in visceral fat was associated with a significantly increased all‐cause mortality risk 13. CT also revealed visceral adiposity was associated with incident cardiovascular disease 14, 15, 16, 17. In Japanese Americans, visceral fat was associated with obesity‐related mortality 18. Although the association between abdominal adiposity and morbidity is known, there are no CT studies that used direct measurements of abdominal fat to elucidate the association between abdominal fat and sudden natural death.&#10;In Japan, postmortem CT studies are performed to determine the cause of sudden death. Under the hypothesis that it is associated with abdominal fat, we investigated the relationship between sudden natural death and abdominal fat measured directly on postmortem CT scans.&#10;Methods&#10;Subjects and methods&#10;This retrospective study was approved by our institutional review board. We used postmortem CT studies acquired between February 2008 and March 2016 of 241 subjects who were between 40 and 75 years of age at the time of their sudden death. Of these, 174 died of sudden natural death (group 1; 130 men and 44 women), and 67 died of different causes (group 2; 46 men and 21 women) (Table 1). There was no significant difference in the age of the two groups (p = 0.09, two‐sample t‐test) nor in their gender (p = 0.34, chi‐square test).&#10;Clinical data on 241 study subjects&#10;Table 1&#10;The criteria for sudden natural death were death due to natural diseases and abrupt and unexpected death in individuals who appeared well. Individuals who died of non‐natural causes such as trauma and suicide were excluded. Contrast‐enhanced CT studies were performed in six group 1 subjects; two group 1 subjects and one group 2 subject were autopsied.&#10;We perused their medical records to review the demographic data of our study subjects for vascular risk factors (diabetes mellitus, hypertension, dyslipidemia and smoking) and the interval between the time of death and imaging.&#10;In individuals who died of sudden death, we routinely obtain whole‐body postmortem CT scans on a 16‐row CT scanner (Lightspeed16, GE Medical Systems, Milwaukee, WI, USA) or a 320‐row CT scanner (Aquilion ONE, Toshiba Corp. Medical Systems, Otawara, Japan). Fat analysis was performed on a workstation (AZE Virtual Place Raijin; AZE Ltd., Tokyo, Japan). The areas of subcutaneous and visceral fat and the waist circumference were measured on 5‐mm‐thick CT images at the umbilical level. Adipose tissue areas were calculated using an attenuation range of −150 to −10 Hounsfield units. The abdominal subcutaneous fat area was defined as the area of adipose tissue between the skin and the outermost aspect of the abdominal muscle wall (Figure 1). We also calculated the rate of visceral fat using the equation:&#10;Screen capture from the fat analysis tool. The blue and red areas show visceral and subcutaneous fat, respectively, at the umbilical level.&#10;Figure 1&#10;rate of visceral fat = area of visceral fat/area of subcutaneous + visceral fat.&#10;Statistical analysis&#10;We compared the interval between the time of death and imaging in groups 1 and 2 using the two‐sample t‐test. To compare abdominal parameters, we performed logistic regression analysis. Factors associated with sudden natural death (dependent variables) were identified by univariate analysis. Independent variables included the age, gender, interval between the time of death and imaging, area of subcutaneous fat, area of visceral fat, rate of visceral fat and waist circumference. All statistical tests were two‐sided; probability values of &lt;0.05 were considered statistically significant. Multivariate analysis adjusted for relevant factors was also performed using logistic regression analysis. All statistical analyses were with IBM SPSS Statistics 21.&#10;Results&#10;Postmortem CT scans revealed the cause of sudden natural death in 54 group 1 subjects (Table 2); in the other 120, it could not be ascertained. The causes of sudden death in group 2 are also shown in Table 2.&#10;Cause of death in 241 subjects&#10;Table 2&#10;There was no significant difference in the interval between the time of death and imaging between group 1 (median 70 min and range 10–720 min) and group 2 (median 70 min and range 20–660 min) (p = 0.55 and two‐sample t‐test). In group 1, the mean area of subcutaneous and visceral fat was 152.5 ± 96.4 cm2 and 139.3 ± 72.9 cm2, respectively; the mean rate of visceral fat was 0.50 ± 0.12, and the mean waist circumference was 94.7 ± 26.7 cm. In group 2, the mean area of subcutaneous and visceral fat was 125.4 ± 59.8 cm2 and 108.0 ± 47.3 cm2, respectively; the mean rate of visceral fat was 0.47 ± 0.11, and the mean waist circumference was 97.9 ± 28.0 cm.&#10;By univariate analysis, the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2 (subcutaneous fat, odds ratio [OR] = 1.004, 95% confidence interval [CI] = 1.000–1.007, p = 0.03; visceral fat, OR = 1.008, 95% CI = 1.003–1.013, p &lt; 0.01) (Table 3). For multivariate analysis, we selected the area of visceral fat as an independent factor associated with sudden natural death (OR = 1.008, 95% CI = 1.002–1.015, p = 0.02) (Table 4). The difference between the two groups in the rate of visceral fat and the waist circumference was not statistically significant (p = 0.13 and p = 0.42, respectively).&#10;Univariate analysis to explore factors associated with sudden natural death&#10;Table 3&#10;Multivariate analysis to explore factors associated with sudden natural death&#10;Table 4&#10;Discussion&#10;Our study showed that the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2 and that the rate of visceral fat and the waist circumference were not significantly different. Multivariate analysis revealed that the area of visceral fat was an independent factor associated with the risk of sudden natural death. These findings support the hypothesis that sudden natural death is associated with abdominal fat deposits. Most identified causes of sudden natural death (n = 54) were aortic diseases (n = 30). Others 5, 6 suggested that in individuals without a definite radiologically identified cause of sudden natural death it was due to coronary artery disease and numerous studies found an association between abdominal adiposity and cardiovascular disease 14, 15, 16, 17, 18, 19, 20. The cellular lipid content determines the size of adipocytes; large, mature adipoctyes were filled almost entirely by large lipid droplets 21, 22. The adipocyte volume determines cell functionality and the larger the adipocytes, the higher the cardiometabolic risk 23, 24.&#10;Our multivariate analysis showed that the area of visceral fat is an independent factor associated with the risk of sudden natural death. This is consistent with earlier findings that cardiovascular disease was asssociated with visceral – rather than subcutaneous fat 14, 15, 16, 17. Pickhardt et al. 25 documented that visceral fat in women was correlated with metabolic syndrome. While Fox et al. 26 showed that it was more strongly associated with risk factors for cardiovascular disease in women than in men; others 16, 17 claimed that visceral fat areas were significantly related to cardiovascular disease in both genders. Sex differences are recognized in the distribution of adipose tissue. In men, adipose tissue is primarily found in the central or abdominal region; this raises their risk for metabolic disorders; women, on the other hand, harbour more subcutaneous than visceral fat 27. These gender‐specific differences may explain the lack of significant differences in the rate of visceral fat between our two groups. Despite gender‐specific differences in the distribution of abdominal fat distribution, we think that visceral fat is associated with obesity‐related morbidity such as cardiovascular disease resulting in sudden natural death.&#10;Earlier studies 10, 28, 29 reported an association between the waist circumference and mortality. In our series, the waist circumference was not associated with sudden natural death. The waist circumference reflects abdominal fat deposits, and the observation that it was not associated with sudden natural death is inconsistent with our finding that the areas of subcutaneous and visceral fat were significantly larger in group 1 than in group 2. We used postmortem CT scans in our analyses; these scans reflected postmortem‐resuscitation and post‐cardiopulmonary resuscitation changes including gastrointestinal distension 30 that increase the waist circumference. Consequently, our findings may not reflect the pre‐mortem waist circumference.&#10;Our study has some limitations. As only three subjects were autopsied, the true cause of death remains uncertain. We did not have full medical histories on all subjects, and the effect of premortem patient characteristics could not be considered in our analyses. For example, there is a strong association between depression and suicide, and between obesity and depression 31, 32; however, we did not study the history of depression in this investigation. Also, cardiovascular disease is the leading cause of death in postmenopausal women 33. Although the differences of subcutaneous and visceral fat between groups 1 and 2 were statistically significant and the area of visceral fat was an independent factor associated with sudden natural death, they were modest, and the clinical significance is questionable. Lastly, our study population was composed of a heterogeneous mixture of men and women because only 65 of the 241 subjects were women.&#10;Conclusion&#10;The areas of subcutaneous and visceral fat were signifiantly larger in individuals who died of sudden natural death than in those who died of other identified causes. Visceral fat was an independent factor associated with the risk of sudden natural death. This raises the possibility that abdominal fat deposits may be associated with sudden natural death due to obesity‐related morbidity such as cardiovascular disease.&#10;Conflict of interest statement&#10;No conflict of interest statement.&#10;Funding&#10;None.&#10;Disclosure&#10;Dr Awai reports grants from Research Grant, Toshiba Medical Systems; grants from Research Grant, Hitachi Medical Corporation; grants from Research Grant, Eizai Co.; grants from Research Grant, Bayer Seiyaku Co.; and grants from Research Grant, Daiichi Sankyo, Co., outside the submitted work.&#10;The other authors declare no conflict of interest.&#10;Author contributions&#10;YK conceptualized the study, performed statistical analyses and wrote the paper; HS, HH, YH, FT, YB and MI contributed to interpreting the CT images and critically reviewed the paper. KA contributed to interpreting the data and critically reviewed the paper. All authors approved the final version of the paper.&#10;"/><cas:FSArray xmi:id="719" elements="804 1081 140"/><cas:FSArray xmi:id="680" elements="724 576 774 736 65 1029 690 226"/><cas:FSArray xmi:id="520" elements="404"/><cas:View sofa="1145" members="1 946 1219 301 637 238 314 853 1330 1136 256 1152 101 499 1303 1312 844 1054 39 332 1388 671 1404 379 247 937 1072 918 323 463 1232 1379 30 1355 14 274 283 795 420 1063 1321 292 1011 156 588 702 1339 748 1270 835 388 540 1161 869 653 265 549 611 1397 442 813 354 472 885 1279 397 604 149 449 133 85 820 165 959 1254 597 1372 172 1129 23 126 58 492 878 757 862 646 1348 966 456 927 48 764 197 65 226 576 690 724 736 774 1029 620 110 711 827 412 77 118 662 786 1286 567 92 140 804 1081 628 1261 179 1413 1020 217 361 188 558 523 370 207 1103 1116 429 341 1241 479 1170 1041 998 892 1090 404 508 990 1364 1295 532 905 973 1183 1197 1422 1427"/></xmi:XMI>
\ No newline at end of file

From e038a6340a3cb91d4ee06388b94b07d71ba49553 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 7 Jul 2021 16:11:27 +0200
Subject: [PATCH 079/269] Fixing offset issues with JCoReCondensedDocumentText.

Converting original offsets to condensed offsets when the original offset lied in a cut away area was not working correctly. In fact, the case was not even covered in the code.
---
 .../utility/JCoReCondensedDocumentText.java   | 339 ++++++++++--------
 .../JCoReCondensedDocumentTextTest.java       |  89 +++++
 jedis-parent/pom.xml                          |   2 +-
 3 files changed, 273 insertions(+), 157 deletions(-)

diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
index 76a8c5f45..7067539ad 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
@@ -13,169 +13,196 @@
  * document text that results from cutting out said text passages. It offers a
  * method to return the actual text string and a method to map the character
  * offsets of the compacted string to the original CAS document text.
- * 
- * @author faessler
  *
+ * @author faessler
  */
 public class JCoReCondensedDocumentText {
-	private NavigableMap<Integer, Integer> condensedPos2SumCutMap;
-	private NavigableMap<Integer, Integer> originalPos2SumCutMap;
-	private String condensedText;
-	private JCas cas;
-	private Set<Character> cutAwayFillCharacters;
+    private NavigableMap<Integer, Integer> condensedPos2SumCutMap;
+    private NavigableMap<Integer, Integer> originalPos2SumCutMap;
+    private String condensedText;
+    private JCas cas;
+    private Set<Character> cutAwayFillCharacters;
+
+    /**
+     * <p>
+     * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
+     * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
+     * empty, this class' methods will return the original CAS data.
+     * </p>
+     *
+     * @param cas          The CAS for which the document text should be cut.
+     * @param cutAwayTypes The types for cutting. May be null.
+     * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains non-existing type names.
+     */
+    public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
+        this(cas, cutAwayTypes, null);
+    }
+
+    /**
+     * <p>
+     * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
+     * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
+     * empty, this class' methods will return the original CAS data.
+     * </p>
+     * <p>The <tt>cutAwayFillCharacters</tt> set may provide characters that, when being the only character between
+     * to cut-away annotations, will add to the span of text being cut away. This way, enumerations of references
+     * (e.g. "4,6,8") can be completely removed, for example.</p>
+     *
+     * @param cas                   The CAS for which the document text should be cut.
+     * @param cutAwayTypes          The types for cutting. May be null.
+     * @param cutAwayFillCharacters Characters that, when being the only separator between two cut away annotations, are also cut away.
+     * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains non-existing type names.
+     */
+    public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, Set<Character> cutAwayFillCharacters) throws ClassNotFoundException {
+        this.cas = cas;
+        this.cutAwayFillCharacters = cutAwayFillCharacters;
+        buildMap(cas, cutAwayTypes);
+    }
+
+    public JCas getCas() {
+        return cas;
+    }
 
-	public JCas getCas() {
-		return cas;
-	}
-	/**
-	 * <p>
-	 * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
-	 * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
-	 * empty, this class' methods will return the original CAS data.
-	 * </p>
-	 *
-	 * @param cas
-	 *            The CAS for which the document text should be cut.
-	 * @param cutAwayTypes
-	 *            The types for cutting. May be null.
-	 * @throws ClassNotFoundException
-	 *             If <tt>cutAwayTypes</tt> contains non-existing type names.
-	 */
-	public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
-		this(cas, cutAwayTypes, null);
-	}
+    /**
+     * <p>
+     * Creates a map that maps those positions of the small-cut text that correspond
+     * to an intermediate next position after a cut-away annotation in the original
+     * text to the sum of ranges covered by cut-away annotations up to the original
+     * offset.
+     * </p>
+     * <p>
+     * If <tt>cutAwayTypes</tt> is empty, no work will be done and the methods of
+     * this class we return the original text and offets of the CAS.
+     * </p>
+     *
+     * @param cas          The CAS for create a cut-away document text for.
+     * @param cutAwayTypes The qualified type names of the annotations whose covered text
+     *                     should be cut away.
+     * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains type identifiers to
+     *                                non-existing types.
+     */
+    public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
+        if (cutAwayTypes == null || cutAwayTypes.isEmpty())
+            return;
+        StringBuilder sb = new StringBuilder();
+        condensedPos2SumCutMap = new TreeMap<>();
+        condensedPos2SumCutMap.put(0, 0);
+        originalPos2SumCutMap = new TreeMap<>();
+        originalPos2SumCutMap.put(0, 0);
+        JCoReAnnotationIndexMerger merger = new JCoReAnnotationIndexMerger(cutAwayTypes, true, null, cas);
+        int cutSum = 0;
+        int lastBegin = 0;
+        int lastEnd = -1;
+        int lastCutSum = 0;
+        // For each ignored annotation, there could be following annotations overlapping
+        // with the first, effectively enlarging the ignored span. Thus, we iterate
+        // until we find an ignored annotation that has a positive (not 0) distance to a
+        // previous one. Then, we store the length of the span of cut-away annotations
+        // for the largest end of the previous annotations.
+        while (merger.incrementAnnotation()) {
+            int begin = merger.getCurrentBegin();
+            int end = merger.getCurrentEnd();
 
-	/**
-	 * <p>
-	 * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
-	 * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
-	 * empty, this class' methods will return the original CAS data.
-	 * </p>
-	 * <p>The <tt>cutAwayFillCharacters</tt> set may provide characters that, when being the only character between
-	 * to cut-away annotations, will add to the span of text being cut away. This way, enumerations of references
-	 * (e.g. "4,6,8") can be completely removed, for example.</p>
-	 * 
-	 * @param cas
-	 *            The CAS for which the document text should be cut.
-	 * @param cutAwayTypes
-	 *            The types for cutting. May be null.
-	 * @param cutAwayFillCharacters Characters that, when being the only separator between two cut away annotations, are also cut away.
-	 * @throws ClassNotFoundException
-	 *             If <tt>cutAwayTypes</tt> contains non-existing type names.
-	 */
-	public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, Set<Character> cutAwayFillCharacters) throws ClassNotFoundException {
-		this.cas = cas;
-		this.cutAwayFillCharacters = cutAwayFillCharacters;
-		buildMap(cas, cutAwayTypes);
-	}
+            boolean moreThanOneCharacterDistance = begin - lastEnd > 2;
+            boolean previousCharacterIsCutAwayDelimiter = cutAwayFillCharacters == null || cutAwayFillCharacters.isEmpty() || (begin - lastEnd == 2 && cutAwayFillCharacters.contains(cas.getDocumentText().charAt(begin - 1)));
+            if (lastEnd > 0 && begin > lastEnd && (previousCharacterIsCutAwayDelimiter || moreThanOneCharacterDistance)) {
+                // Adapt offsets to remove superfluous white spaces from the condensed text
+                boolean precedingCharacterIsWS = lastBegin == 0 || Character.isWhitespace(cas.getDocumentText().charAt(lastBegin - 1));
+                boolean succeedingCharacterIsWS = lastEnd < cas.getDocumentText().length() && Character.isWhitespace(cas.getDocumentText().charAt(lastEnd));
+                if (precedingCharacterIsWS && succeedingCharacterIsWS)
+                    ++lastEnd;
+                if (precedingCharacterIsWS && end >= cas.getDocumentText().length())
+                    --begin;
+                // The current cut away annotation begins after the previous cut away annotation, thus there is no
+                // overlap and we can add the current state to the maps.
+                cutSum += lastEnd - lastBegin;
+                int condensedPosition = lastEnd - cutSum + 1;
+                condensedPos2SumCutMap.put(condensedPosition, cutSum);
+                // For original offsets we need to be able to know where the begin and the end of
+                // the cut away annotation was. This is exploited in getCondensedOffsetForOriginalOffset()
+                originalPos2SumCutMap.put(lastBegin, lastCutSum);
+                originalPos2SumCutMap.put(lastEnd, cutSum);
+                lastBegin = begin;
+                lastCutSum = cutSum;
+                sb.append(cas.getDocumentText(), lastEnd, begin);
+            } else if (lastEnd < 0) {
+                // This is the first annotation
+                if (begin > 0 && end >= cas.getDocumentText().length() && Character.isWhitespace(cas.getDocumentText().charAt(begin - 1)))
+                    // Case: A single cut away annotation right at the end of the document text.
+                    // Then we want to extend the cut away area to the leading whitespace to remove that as well.
+                    --begin;
+                lastBegin = begin;
+                sb.append(cas.getDocumentText(), 0, begin);
+            }
+            lastEnd = end;
+        }
+        // Since we iterate one annotation further than the annotation we store the span
+        // for, we need to take care of the very last ignored annotation after the loop
+        // - it has never been handled itself.
+        if (lastEnd > 0) {
+            // Adapt offsets to avoid unnecessary white spaces regarding the tail of the document text.
+            boolean precedingCharacterIsWS = lastBegin < 1 || Character.isWhitespace(cas.getDocumentText().charAt(lastBegin - 1));
+            boolean succeedingCharacterIsWS = lastEnd < cas.getDocumentText().length() && Character.isWhitespace(cas.getDocumentText().charAt(lastEnd));
+            if (precedingCharacterIsWS && (succeedingCharacterIsWS || lastEnd >= cas.getDocumentText().length()))
+                ++lastEnd;
+            cutSum += lastEnd - lastBegin;
+            int condensedPosition = lastEnd - cutSum + 1;
+            condensedPos2SumCutMap.put(condensedPosition, cutSum);
+            originalPos2SumCutMap.put(lastBegin, lastCutSum);
+            originalPos2SumCutMap.put(lastEnd, cutSum);
+        }
+        // If lastEnd is still -1, we just did not find any of the cut away annotations. Thus, we just copy the whole text.
+        if (lastEnd == -1)
+            lastEnd = 0;
+        if (lastEnd < cas.getDocumentText().length())
+            sb.append(cas.getDocumentText().substring(lastEnd));
+        condensedText = sb.toString();
+    }
 
-	/**
-	 * <p>
-	 * Creates a map that maps those positions of the small-cut text that correspond
-	 * to an intermediate next position after a cut-away annotation in the original
-	 * text to the sum of ranges covered by cut-away annotations up to the original
-	 * offset.
-	 * </p>
-	 * <p>
-	 * If <tt>cutAwayTypes</tt> is empty, no work will be done and the methods of
-	 * this class we return the original text and offets of the CAS.
-	 * </p>
-	 * 
-	 * @param cas
-	 *            The CAS for create a cut-away document text for.
-	 * @param cutAwayTypes
-	 *            The qualified type names of the annotations whose covered text
-	 *            should be cut away.
-	 * @throws ClassNotFoundException
-	 *             If <tt>cutAwayTypes</tt> contains type identifiers to
-	 *             non-existing types.
-	 */
-	public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
-		if (cutAwayTypes == null || cutAwayTypes.isEmpty())
-			return;
-		StringBuilder sb = new StringBuilder();
-		condensedPos2SumCutMap = new TreeMap<>();
-		condensedPos2SumCutMap.put(0, 0);
-		originalPos2SumCutMap = new TreeMap<>();
-		originalPos2SumCutMap.put(0, 0);
-		JCoReAnnotationIndexMerger merger = new JCoReAnnotationIndexMerger(cutAwayTypes, true, null, cas);
-		int cutSum = 0;
-		int lastBegin = 0;
-		int lastEnd = -1;
-		// For each ignored annotation, there could be following annotations overlapping
-		// with the first, effectively enlarging the ignored span. Thus, we iterate
-		// until we find an ignored annotation that has a positive (not 0) distance to a
-		// previous one. Then, we store the length of the span of cut-away annotations
-		// for the largest end of the previous annotations.
-		while (merger.incrementAnnotation()) {
-			int end = merger.getCurrentEnd();
-			int begin = merger.getCurrentBegin();
 
-			boolean moreThanOneCharacterDistance = begin - lastEnd > 2;
-			boolean previousCharacterIsCutAwayDelimiter = cutAwayFillCharacters == null || cutAwayFillCharacters.isEmpty() || (begin - lastEnd == 2 && cutAwayFillCharacters.contains(cas.getDocumentText().charAt(begin - 1)));
-			if (lastEnd > 0 && begin > lastEnd && (previousCharacterIsCutAwayDelimiter || moreThanOneCharacterDistance)) {
-				cutSum += lastEnd - lastBegin;
-				int condensedPosition = lastEnd - cutSum + 1;
-				condensedPos2SumCutMap.put(condensedPosition, cutSum);
-				originalPos2SumCutMap.put(lastEnd, cutSum);
-				lastBegin = begin;
-				sb.append(cas.getDocumentText(), lastEnd, begin);
-			} else if (lastEnd < 0) {
-				lastBegin = begin;
-				sb.append(cas.getDocumentText(), 0, begin);
-			}
-			lastEnd = end;
-		}
-		// Since we iterate one annotation further than the annotation we store the span
-		// for, we need to take care of the very last ignored annotation after the loop
-		// - it has never been handled itself.
-		if (lastEnd > 0) {
-			cutSum += lastEnd - lastBegin;
-			int condensedPosition = lastEnd - cutSum + 1;
-			condensedPos2SumCutMap.put(condensedPosition, cutSum);
-			originalPos2SumCutMap.put(lastEnd, cutSum);
-		}
-		// If lastEnd is still -1, we just did not find any of the cut away annotations. Thus, we just copy the whole text.
-		if (lastEnd == -1)
-		    lastEnd = 0;
-		if (lastEnd < cas.getDocumentText().length())
-			sb.append(cas.getDocumentText().substring(lastEnd));
-		condensedText = sb.toString();
-	}
+    /**
+     * Given a character offset relative to the condensed document text, this method
+     * returns the corresponding offset in the original CAS document text.
+     *
+     * @param condensedOffset The character offset in the condensed document text string.
+     * @return The character offset relative to the original CAS document text
+     * associated with <tt>condensedOffset</tt>.
+     */
+    public int getOriginalOffsetForCondensedOffset(int condensedOffset) {
+        if (condensedPos2SumCutMap == null)
+            return condensedOffset;
+        Entry<Integer, Integer> floorEntry = condensedPos2SumCutMap.floorEntry(condensedOffset);
+        return condensedOffset + floorEntry.getValue();
+    }
 
-	/**
-	 * Given a character offset relative to the condensed document text, this method
-	 * returns the corresponding offset in the original CAS document text.
-	 * 
-	 * @param condensedOffset
-	 *            The character offset in the condensed document text string.
-	 * @return The character offset relative to the original CAS document text
-	 *         associated with <tt>condensedOffset</tt>.
-	 */
-	public int getOriginalOffsetForCondensedOffset(int condensedOffset) {
-		if (condensedPos2SumCutMap == null)
-			return condensedOffset;
-		Entry<Integer, Integer> floorEntry = condensedPos2SumCutMap.floorEntry(condensedOffset);
-		return condensedOffset + floorEntry.getValue();
-	}
-	
-	/**
-	 * Given a character offset relative to the original CAS document text, this method
-	 * returns the corresponding offset in the condensed document text.
-	 * 
-	 * @param originalOffset
-	 *            The character offset in the originalOffset document CAS text string.
-	 * @return The character offset relative to the condensed document text
-	 *         associated with <tt>originalOffset</tt>.
-	 */
-	public int getCondensedOffsetForOriginalOffset(int originalOffset) {
-		if (originalPos2SumCutMap == null)
-			return originalOffset;
-		Entry<Integer, Integer> floorEntry = originalPos2SumCutMap.floorEntry(originalOffset);
-		return originalOffset - floorEntry.getValue();
-	}
+    /**
+     * Given a character offset relative to the original CAS document text, this method
+     * returns the corresponding offset in the condensed document text.
+     *
+     * @param originalOffset The character offset in the originalOffset document CAS text string.
+     * @return The character offset relative to the condensed document text
+     * associated with <tt>originalOffset</tt>.
+     */
+    public int getCondensedOffsetForOriginalOffset(int originalOffset) {
+        if (originalPos2SumCutMap == null)
+            return originalOffset;
+        Entry<Integer, Integer> floorEntry = originalPos2SumCutMap.floorEntry(originalOffset);
+        Entry<Integer, Integer> ceilingEntry = originalPos2SumCutMap.ceilingEntry(originalOffset);
+        // floor entry can never be null because the mapping 0=0 always exists
+        if (floorEntry != null && ceilingEntry != null) {
+            // Determine if the original offset is inside or outside of a cut away annotation.
+            // If the difference of key and value is the same for floor and ceiling, the originalOffset
+            // is within of a cut away annotation. Otherwise, it is outside a cut away annotation
+            int floorDiff = floorEntry.getKey() - floorEntry.getValue();
+            int ceilingDiff = ceilingEntry.getKey() - ceilingEntry.getValue();
+            boolean withinCutAway = floorDiff == ceilingDiff;
+            if (withinCutAway)
+                return originalOffset - ceilingEntry.getValue() + (ceilingEntry.getKey() - originalOffset);
+        }
+        return originalOffset - floorEntry.getValue();
+    }
 
-	public String getCodensedText() {
-		return condensedText != null ? condensedText : cas.getDocumentText();
-	}
+    public String getCodensedText() {
+        return condensedText != null ? condensedText : cas.getDocumentText();
+    }
 }
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 1c5597a3e..86ef54bf9 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -1,5 +1,6 @@
 package de.julielab.jcore.utility;
 
+import de.julielab.jcore.types.Annotation;
 import de.julielab.jcore.types.InternalReference;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
@@ -83,4 +84,92 @@ public void testReduce3() throws Exception {
 				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())), Set.of(',', ';'));
 		assertEquals("This sentence has multiple references. This is a second sentence.", condensedText.getCodensedText());
 	}
+
+	@Test
+	public void testCondensedOffsetsWithinCutawayAnnotations() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Not cut away 1. Cut away 1. Not cut away 2. Cut away 2. Not cut away 3.");
+		Annotation cutAwayAnnotation = new Annotation(jcas, 16, 27);
+		cutAwayAnnotation.addToIndexes();
+		Annotation cutAwayAnnotation2 = new Annotation(jcas, 44, 55);
+		cutAwayAnnotation2.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(Annotation.class.getCanonicalName())));
+		assertEquals("Not cut away 1. Not cut away 2. Not cut away 3.", condensedText.getCodensedText());
+		assertEquals(10, condensedText.getCondensedOffsetForOriginalOffset(10));
+		assertEquals(15, condensedText.getCondensedOffsetForOriginalOffset(15));
+		assertEquals(16, condensedText.getCondensedOffsetForOriginalOffset(16));
+		assertEquals(16, condensedText.getCondensedOffsetForOriginalOffset(17));
+		assertEquals(16, condensedText.getCondensedOffsetForOriginalOffset(27));
+		assertEquals(19, condensedText.getCondensedOffsetForOriginalOffset(31));
+	}
+
+	@Test
+	public void testCutAwayAtBeginning() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Cut away. Not cut away.");
+		Annotation cutAwayAnnotation = new Annotation(jcas, 0, 9);
+		cutAwayAnnotation.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(Annotation.class.getCanonicalName())));
+		assertEquals("Not cut away.", condensedText.getCodensedText());
+		assertEquals(0, condensedText.getCondensedOffsetForOriginalOffset(3));
+		assertEquals(3, condensedText.getCondensedOffsetForOriginalOffset(13));
+	}
+
+	@Test
+	public void testCutAwayAtEnd() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Not cut away. Cut away.");
+		Annotation cutAwayAnnotation = new Annotation(jcas, 14, 23);
+		cutAwayAnnotation.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(Annotation.class.getCanonicalName())));
+		assertEquals("Not cut away.", condensedText.getCodensedText());
+		assertEquals(10, condensedText.getCondensedOffsetForOriginalOffset(10));
+		assertEquals(13, condensedText.getCondensedOffsetForOriginalOffset(16));
+		assertEquals(13, condensedText.getCondensedOffsetForOriginalOffset(23));
+	}
+
+	@Test
+	public void testEmbeddedCutAway() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Not cut away. Cut away. Not cut away.");
+		Annotation cutAwayAnnotation = new Annotation(jcas, 14, 23);
+		cutAwayAnnotation.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(Annotation.class.getCanonicalName())));
+		assertEquals("Not cut away. Not cut away.", condensedText.getCodensedText());
+		assertEquals(10, condensedText.getCondensedOffsetForOriginalOffset(10));
+		assertEquals(14, condensedText.getCondensedOffsetForOriginalOffset(16));
+		assertEquals(14, condensedText.getCondensedOffsetForOriginalOffset(23));
+		assertEquals(15, condensedText.getCondensedOffsetForOriginalOffset(25));
+	}
+
+	@Test
+	public void testEnclosingCutAway() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Cut away. Not cut away. Cut away.");
+		Annotation cutAwayAnnotation = new Annotation(jcas, 0, 9);
+		cutAwayAnnotation.addToIndexes();
+		Annotation cutAwayAnnotation2 = new Annotation(jcas, 24, 33);
+		cutAwayAnnotation2.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(Annotation.class.getCanonicalName())));
+		assertEquals("Not cut away.", condensedText.getCodensedText());
+		assertEquals(0, condensedText.getCondensedOffsetForOriginalOffset(10));
+		assertEquals(3, condensedText.getCondensedOffsetForOriginalOffset(13));
+		assertEquals(13, condensedText.getCondensedOffsetForOriginalOffset(27));
+		assertEquals(13, condensedText.getCondensedOffsetForOriginalOffset(33));
+	}
 }
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index b66c3be70..3daef871c 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -27,7 +27,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>jcore-xmi-splitter</artifactId>
-                <version>2.3.5</version>
+                <version>2.4.0-SNAPSHOT</version>
             </dependency>
         </dependencies>
     </dependencyManagement>

From b711381cbaead72a1c24ad182d4e198b406cc661 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 7 Jul 2021 16:26:55 +0200
Subject: [PATCH 080/269] Adding a test to assure that the in-cut away-offset
 issue is gone.

---
 .../ae/jsbd/main/SentenceAnnotatorTest.java   | 22 +++++++++++++++++++
 .../test/resources/errordocs/PMC8205280.xmi   |  1 +
 2 files changed, 23 insertions(+)
 create mode 100644 jcore-jsbd-ae/src/test/resources/errordocs/PMC8205280.xmi

diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
index 22edbe983..5a5b23a47 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
@@ -310,5 +310,27 @@ public void testErrordoc() throws Exception {
         assertThatCode(() -> jsbd.process(jCas.getCas())).doesNotThrowAnyException();
     }
 
+    @Test
+    public void testErrordoc2() throws Exception {
+        // This XMI file has larger cut away types where an original offset request actually lies inside of a
+        // cut away annotation. This case led to errors prior to a respective bug fix in the
+        // JCoReCondensedDocumentText
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
+
+        XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC8205280.xmi").toFile()), jCas.getCas());
+        JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz",
+                SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
+                SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
+                        "de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},
+                SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()}
+        );
+
+        assertThatCode(() -> jsbd.process(jCas.getCas())).doesNotThrowAnyException();
+    }
+
 }
 
diff --git a/jcore-jsbd-ae/src/test/resources/errordocs/PMC8205280.xmi b/jcore-jsbd-ae/src/test/resources/errordocs/PMC8205280.xmi
new file mode 100644
index 000000000..b2063eca5
--- /dev/null
+++ b/jcore-jsbd-ae/src/test/resources/errordocs/PMC8205280.xmi
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="UTF-8"?><xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore" xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore" xmi:version="2.0"><cas:NULL xmi:id="0"/><pubmed:ManualDescriptor xmi:id="337" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" keywordList="285"/><types:Title xmi:id="1" sofa="9" begin="0" end="49" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="document"/><types:Title xmi:id="624" sofa="9" begin="7424" end="7431" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="686" sofa="9" begin="8538" end="8545" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="748" sofa="9" begin="9808" end="9815" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="table"/><types:Title xmi:id="785" sofa="9" begin="10205" end="10211" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="figure"/><types:AbstractSectionHeading xmi:id="32" sofa="9" begin="50" end="57" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="57" sofa="9" begin="411" end="429" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="82" sofa="9" begin="984" end="991" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="107" sofa="9" begin="1322" end="1343" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:AbstractSectionHeading xmi:id="132" sofa="9" begin="1539" end="1549" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="abstractSection"/><types:Paragraph xmi:id="42" sofa="9" begin="58" end="410" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="67" sofa="9" begin="430" end="983" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="92" sofa="9" begin="992" end="1321" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="117" sofa="9" begin="1344" end="1538" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="142" sofa="9" begin="1550" end="1820" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="372" sofa="9" begin="1834" end="1920" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="379" sofa="9" begin="1921" end="2721" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="431" sofa="9" begin="2722" end="3656" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="465" sofa="9" begin="3657" end="4572" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="543" sofa="9" begin="4597" end="5617" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="572" sofa="9" begin="5639" end="6244" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="579" sofa="9" begin="6245" end="6627" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="608" sofa="9" begin="6636" end="7432" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="641" sofa="9" begin="7321" end="7423" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="661" sofa="9" begin="7432" end="8546" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="703" sofa="9" begin="8470" end="8537" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="723" sofa="9" begin="8546" end="10212" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="765" sofa="9" begin="9665" end="9807" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="802" sofa="9" begin="9816" end="9885" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="809" sofa="9" begin="9886" end="10145" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="816" sofa="9" begin="10146" end="10204" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="857" sofa="9" begin="10223" end="10935" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="864" sofa="9" begin="10936" end="11918" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="889" sofa="9" begin="11919" end="12987" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="932" sofa="9" begin="12988" end="14157" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="993" sofa="9" begin="14158" end="15659" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1058" sofa="9" begin="15682" end="16520" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1074" sofa="9" begin="16521" end="17700" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1103" sofa="9" begin="17713" end="18568" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1132" sofa="9" begin="18580" end="19417" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1161" sofa="9" begin="19453" end="19460" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1190" sofa="9" begin="19477" end="19934" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1219" sofa="9" begin="19954" end="19975" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1235" sofa="9" begin="19975" end="19998" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:AbstractSection xmi:id="24" sofa="9" begin="50" end="411" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="32"/><types:AbstractSection xmi:id="49" sofa="9" begin="411" end="984" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="57"/><types:AbstractSection xmi:id="74" sofa="9" begin="984" end="1322" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="82"/><types:AbstractSection xmi:id="99" sofa="9" begin="1322" end="1539" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="107"/><types:AbstractSection xmi:id="124" sofa="9" begin="1539" end="1821" componentId="de.julielab.jcore.reader.pmc.PMCReader" abstractSectionHeading="132"/><types:AbstractText xmi:id="16" sofa="9" begin="50" end="1821" componentId="de.julielab.jcore.reader.pmc.PMCReader" structuredAbstractParts="24 49 74 99 124"/><pubmed:Header xmi:id="156" sofa="9" begin="0" end="1821" componentId="de.julielab.jcore.reader.pmc.PMCReader" source="PubMed Central" docId="PMC8205280" copyright="© 2021 Elsevier Inc. All rights reserved." truncated="false" authors="277" pubTypeList="175" doi="10.1016/j.pedn.2021.06.005"/><types:SectionTitle xmi:id="363" sofa="9" begin="1821" end="1833" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="512" sofa="9" begin="4573" end="4580" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="534" sofa="9" begin="4581" end="4596" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="563" sofa="9" begin="5618" end="5638" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="599" sofa="9" begin="6628" end="6635" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="848" sofa="9" begin="10212" end="10222" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1049" sofa="9" begin="15660" end="15681" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1094" sofa="9" begin="17701" end="17712" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1123" sofa="9" begin="18569" end="18579" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1152" sofa="9" begin="19418" end="19452" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1181" sofa="9" begin="19460" end="19476" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1210" sofa="9" begin="19935" end="19953" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><pubmed:InternalReference xmi:id="386" sofa="9" begin="2070" end="2086" reftype="bibliography" refid="bb0020"/><pubmed:InternalReference xmi:id="395" sofa="9" begin="2088" end="2107" reftype="bibliography" refid="bb0085"/><pubmed:InternalReference xmi:id="404" sofa="9" begin="2197" end="2217" reftype="bibliography" refid="bb0035"/><pubmed:InternalReference xmi:id="413" sofa="9" begin="2219" end="2239" reftype="bibliography" refid="bb0055"/><pubmed:InternalReference xmi:id="422" sofa="9" begin="2317" end="2334" reftype="bibliography" refid="bb0030"/><pubmed:InternalReference xmi:id="438" sofa="9" begin="2989" end="3007" reftype="bibliography" refid="bb0005"/><pubmed:InternalReference xmi:id="447" sofa="9" begin="3284" end="3302" reftype="bibliography" refid="bb0005"/><pubmed:InternalReference xmi:id="456" sofa="9" begin="3632" end="3654" reftype="bibliography" refid="bb0045"/><pubmed:InternalReference xmi:id="472" sofa="9" begin="3897" end="3915" reftype="bibliography" refid="bb0050"/><pubmed:InternalReference xmi:id="481" sofa="9" begin="4013" end="4032" reftype="bibliography" refid="bb0095"/><pubmed:InternalReference xmi:id="490" sofa="9" begin="4408" end="4428" reftype="bibliography" refid="bb0015"/><pubmed:InternalReference xmi:id="615" sofa="9" begin="7311" end="7318" reftype="table" refid="t0005"/><pubmed:InternalReference xmi:id="668" sofa="9" begin="7744" end="7751" reftype="table" refid="t0010"/><pubmed:InternalReference xmi:id="677" sofa="9" begin="8461" end="8468" reftype="table" refid="t0010"/><pubmed:InternalReference xmi:id="730" sofa="9" begin="8696" end="8703" reftype="table" refid="t0015"/><pubmed:InternalReference xmi:id="739" sofa="9" begin="8708" end="8714" reftype="figure" refid="f0005"/><pubmed:InternalReference xmi:id="871" sofa="9" begin="11259" end="11278" reftype="bibliography" refid="bb0070"/><pubmed:InternalReference xmi:id="880" sofa="9" begin="11397" end="11416" reftype="bibliography" refid="bb0070"/><pubmed:InternalReference xmi:id="896" sofa="9" begin="12244" end="12269" reftype="bibliography" refid="bb0065"/><pubmed:InternalReference xmi:id="905" sofa="9" begin="12402" end="12427" reftype="bibliography" refid="bb0065"/><pubmed:InternalReference xmi:id="914" sofa="9" begin="12594" end="12611" reftype="bibliography" refid="bb0075"/><pubmed:InternalReference xmi:id="923" sofa="9" begin="12831" end="12848" reftype="bibliography" refid="bb0075"/><pubmed:InternalReference xmi:id="939" sofa="9" begin="13410" end="13428" reftype="bibliography" refid="bb0025"/><pubmed:InternalReference xmi:id="948" sofa="9" begin="13430" end="13454" reftype="bibliography" refid="bb0080"/><pubmed:InternalReference xmi:id="957" sofa="9" begin="13589" end="13607" reftype="bibliography" refid="bb0025"/><pubmed:InternalReference xmi:id="966" sofa="9" begin="13609" end="13633" reftype="bibliography" refid="bb0080"/><pubmed:InternalReference xmi:id="975" sofa="9" begin="13635" end="13654" reftype="bibliography" refid="bb0095"/><pubmed:InternalReference xmi:id="984" sofa="9" begin="13825" end="13837" reftype="bibliography" refid="bb0090"/><pubmed:InternalReference xmi:id="1000" sofa="9" begin="14400" end="14412" reftype="bibliography" refid="bb0090"/><pubmed:InternalReference xmi:id="1009" sofa="9" begin="14742" end="14761" reftype="bibliography" refid="bb0060"/><pubmed:InternalReference xmi:id="1018" sofa="9" begin="14898" end="14917" reftype="bibliography" refid="bb0060"/><pubmed:InternalReference xmi:id="1027" sofa="9" begin="15120" end="15139" reftype="bibliography" refid="bb0060"/><pubmed:InternalReference xmi:id="1065" sofa="9" begin="15988" end="16006" reftype="bibliography" refid="bb0050"/><pubmed:InternalReference xmi:id="1226" sofa="9" begin="19955" end="19974" reftype="bibliography" refid="bb0010"/><pubmed:InternalReference xmi:id="1242" sofa="9" begin="19976" end="19997" reftype="bibliography" refid="bb0040"/><types:Section xmi:id="350" sofa="9" begin="1821" end="4573" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="363" sectionId="s0005" depth="0"/><types:Section xmi:id="499" sofa="9" begin="4573" end="6628" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="512" sectionId="s0010" depth="0"/><types:Section xmi:id="521" sofa="9" begin="4581" end="5618" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="534" sectionId="s0015" depth="1"/><types:Section xmi:id="550" sofa="9" begin="5618" end="6628" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="563" sectionId="s0020" depth="1"/><types:Section xmi:id="586" sofa="9" begin="6628" end="10212" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="599" sectionId="s0025" depth="0"/><types:Section xmi:id="835" sofa="9" begin="10212" end="18569" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="848" sectionId="s0030" depth="0"/><types:Section xmi:id="1036" sofa="9" begin="15660" end="17701" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1049" sectionId="s0035" depth="1"/><types:Section xmi:id="1081" sofa="9" begin="17701" end="18569" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1094" sectionId="s0040" depth="1"/><types:Section xmi:id="1110" sofa="9" begin="18569" end="19418" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1123" sectionId="s0045" depth="0"/><types:Section xmi:id="1139" sofa="9" begin="19418" end="19460" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1152" sectionId="s0050" depth="0"/><types:Section xmi:id="1168" sofa="9" begin="19460" end="19935" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1181" sectionId="s0055" depth="0"/><types:Section xmi:id="1197" sofa="9" begin="19935" end="19998" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1210" sectionId="s0060" depth="0"/><types:Caption xmi:id="632" sofa="9" begin="7321" end="7424" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="694" sofa="9" begin="8470" end="8538" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="756" sofa="9" begin="9665" end="9808" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Caption xmi:id="793" sofa="9" begin="9816" end="10205" componentId="de.julielab.jcore.reader.pmc.PMCReader" captionType="table"/><types:Table xmi:id="648" sofa="9" begin="7320" end="7432" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="t0005" objectLabel="Table 1" objectCaption="632" objectTitle="624"/><types:Table xmi:id="710" sofa="9" begin="8469" end="8546" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="t0010" objectLabel="Table 2" objectCaption="694" objectTitle="686"/><types:Table xmi:id="772" sofa="9" begin="9664" end="9816" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="t0015" objectLabel="Table 3" objectCaption="756" objectTitle="748"/><types:Figure xmi:id="823" sofa="9" begin="9816" end="10212" componentId="de.julielab.jcore.reader.pmc.PMCReader" objectId="f0005" objectLabel="Fig. 1" objectCaption="793" objectTitle="785"/><tcas:DocumentAnnotation xmi:id="1251" sofa="9" begin="0" end="19998" language="x-unspecified"/><cas:Sofa xmi:id="9" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Impact of COVID-19 on pediatric clinical research&#10;Purpose&#10;Many public institutions and settings have taken action to limit exposure to and slow the spread of the novel coronavirus (COVID-19). We sought to characterize the impact of stay-at-home orders on our study of cerebral autoregulation and its association with developmental delays in infants with congenital heart disease compared with healthy controls.&#10;Design and methods&#10;We calculated the number of participants recruited (i.e., not enrolled in the study) and assessed (i.e., currently enrolled) before March 2020 (pre-COVID-19) and the number of participants that we could not recruit or assess between March and July 2020 (missed due to COVID-19), separately for congenital heart disease and healthy control infants, in reference to the impacts of COVID-19. We used negative binomial regressions to determine incidence rate ratios which compared participants recruited and assessed pre-COVID-19 and missed due to COVID-19.&#10;Results&#10;Recruitment and assessments significantly decreased following the pandemic, i.e., participants were more likely to be recruited or be assessed pre-COVID-19 compared to during the pandemic. Study participants were 3.3 times as likely to have assessments performed pre-COVID-19 compared to during the COVID-19 pandemic (p &lt; 0.001).&#10;Clinical implications&#10;Clinical research teams may consider making protocol modifications such as virtual visits or video recordings explaining the study, for example, to adjust to the restrictions caused by COVID-19.&#10;Conclusion&#10;The COVID-19 pandemic drastically reduced recruitment and assessments completed in our study. Study teams will need to continue to modify procedures for recruitment and assessments that align with COVID-19 regulations to facilitate research progress during the pandemic.&#10;Introduction&#10;The first laboratory-confirmed case of severe acute respiratory syndrome coronavirus 2&#10;(SARS-CoV-2) or the coronavirus disease (COVID-19) reported to the Centers for Disease Control and Prevention (CDC) was documented in December 2019 (Fan et al., 2020; Stokes et al., 2020). The United States reported its first confirmed case of COVID-19 on January 20th, 2020 (Holshue et al., 2020; Omary &amp; Hassan, 2020). By March 2020, the World Health Organization declared COVID-19 a pandemic (Ghebreyesus, 2020) and the United States declared COVID-19 as a national emergency. Due to the rapid spread of COVID-19, businesses and organizations throughout the world had to close unexpectedly. The reinforcement of physical and social distancing and the implementation of stay-at-home orders put research operations on hold. Research studies were highly affected due to the inability to collect data.&#10;The COVID-19 pandemic has curtailed most clinical research, pausing trials for new enrollment to minimize participants' risk of exposure to COVID-19. From March 1st to April 26th, 2020, 905 clinical trials reported the COVID-19 pandemic as the reason for suspension (Asaad et al., 2020). The suspension of these trials posed significant clinical implications on patients as it slowed potential treatments for non-COVID-19 diseases, e.g., participants with end-stage diseases where drugs offered by clinical trials may be their only potential hope for treatment (Asaad et al., 2020). The pandemic was especially devastating for oncology patients in clinical trials because they could not receive the investigational treatments outside of the clinical research unit, nor could clinical researchers provide investigational treatments to their participants outside of their facilities due to research regulations (de Miguel et al., 2020).&#10;Health care facilities quickly implemented new procedures and restrictions that prevented clinical researchers from obtaining their data. Research, including those that required in-person visits, was deemed nonessential and came to a halt (Omary et al., 2020). Researchers could no longer perform experiments, assessments, and recruitment of participants (Weiner et al., 2020), resulting in a pause of data collection, which caused gaps in many research studies. Many medical institutions nationwide encouraged the stay-at-home order, causing most nonessential staff to work remotely at the beginning of March 2020. Although remote research can be performed, many studies cannot function for extended periods without direct contact with participants (Colbert et al., 2020). The purpose of this paper is to evaluate the impact of the COVID-19 pandemic on our research study at Children's Hospital Los Angeles (CHLA).&#10;Methods&#10;Data collection&#10;We collected data on the number of participants that we recruited (i.e., eligible infants not enrolled in the study) and assessed (i.e., infants currently enrolled in the study) pre-COVID-19, missed due to COVID-19, and post-COVID-19. This equaled 6 time points: (1) participant recruitment before March 2020 (pre-COVID-19); (2) participant assessments pre-COVID-19; (3) participant recruitment missed between March 2020 and July 2020 (missed due to COVID-19); (4) participant assessments missed due to COVID-19; (5) participant recruitment after July 2020 (post-COVID-19) when the county lifted stay-at-home orders; and (6) participant assessments post-COVID-19. Though we collected data on participants post-COVID-19, we did not run any statistical analyses using post-COVID-19 data because we could not successfully recruit or assess participants post-COVID-19 (as of August 2020). We then categorized participants into 2 groups: infants diagnosed with congenital heart disease (CHD) and healthy control (HC) infants.&#10;Statistical analyses&#10;Stratified descriptive analyses (based on group and assessment time points) were conducted and summarized as means with standard deviations for continuous variables and frequencies with proportions for categorical variables. Pearson's chi-square tests evaluated the difference in categorical variables versus CHD status at each assessment time point. We conducted Fisher's Exact test if any value within the category was less than five. Two-sample independent t-tests determined the difference between the means of 2 independent groups of infants at each time point of assessment for continuous variables.&#10;We used negative binomial regressions to estimate incidence rate ratios with 95% confidence intervals. We conducted separate regression analyses to test the association between pre-COVID-19 and missed due to COVID-19 recruitment and assessment for each group. P-values ≤0.05 were considered statistically significant. We conducted all statistical analyses on IBM SPSS Statistics 27.&#10;Results&#10;We recruited a total of 104 (33%) of the 315 infants that were eligible to participate in our study before March 2020 (pre-COVID-19), which included 57 HC and 47 infants diagnosed with CHD. We could not recruit a total of 127 eligible participants, 105 HC and 22 CHD infants, due to COVID-19. We did not recruit any infants between July to August 2020 (post-COVID-19). We performed a total of 160 assessments pre-COVID-19 (68 on HC and 92 on CHD infants). We had 49 infants (35 HC and 14 CHD infants) miss their study assessments due to COVID-19. From July to August 2020, post-COVID-19, we performed 6 HC assessments and 0 CHD assessments. Detailed findings can be found in Table 1&#10;.&#10;Number of participants recruited and assessed pre-COVID-19, missed due to COVID-19, and post-COVID-19.&#10;Table 1&#10;Pre-COVID-19, we performed assessments on 48 infants (19 HC, 29 CHD infants) at the neonatal age time point, 44 infants (21 HC, 23 CHD infants) at 3-months of age, 40 infants (16 HC, 24 CHD infants) at 6-months of age, and 30 infants (10 HC, 20 CHD infants) at 9-months of age. Detailed findings can be found in Table 2&#10;. We found a higher proportion of females in the CHD group throughout most age time points (69.2%, 54.2%, 60.0% for neonatal, 6-, 9-months respectively), which was statistically significant (χ2 = 4.7, p = 0.031). We had a high prevalence of Latino participants throughout all age time points (64.6%, 68.2%, 67.5%, 73.3% for neonatal, 3-, 6-, 9-months respectively), especially in the CHD group. The mean gestational age at the time of the neonatal exam was 40.7 weeks (SD ± 0.5). Additionally, we found a statistically significant difference in mean gestational age between the HC and CHD infants at the neonatal time point (t = 5.8, 95%CI: 0.4, 0.9, p &lt; 0.001). Detailed demographic findings can be found in Table 2.&#10;Pre-COVID-19 demographics with descriptive analyses between groups.&#10;Table 2&#10;A negative binomial regression analysis of recruitment and assessments at the different COVID-19 time points among the study participants is shown in Table 3&#10;and Fig. 1&#10;. CHD infants were 2.1 times as likely to be recruited into our study pre-COVID-19 compared to during COVID-19 (missed due to COVID-19) (β = 0.8 exponentiated, 95%CI: 1.9, 3.5, p = 0.003). HC infants were 1.8 times as likely to be recruited into the study pre-COVID-19 compared to during COVID-19 (missed due to COVID-19) (β = 0.6 exponentiated, 95%CI: 1.3, 2.5, p &lt; 0.001). Study participants were 3.3 times as likely to have assessments performed pre-COVID-19 (β = 1.2 exponentiated, 95%CI: 2.4, 4.5, p &lt; 0.001) compared to during the COVID-19 pandemic (missed due to COVID-19). CHD infants were 6.6 times as likely to have an assessment performed pre-COVID-19 compared to during COVID-19 (missed due to COVID-19) (β = 1.9 exponentiated, 95%CI: 3.7, 11.5, p &lt; 0.001). HC infants were 1.9 times as likely to have an assessment performed pre-COVID-19 (β = 0.7 exponentiated, 95%CI: 1.3, 2.9, p = 0.001) than during COVID-19 (missed due to COVID-19).&#10;Incidence rate ratios for the associations of recruiting and assessing infants pre-COVID-19 compared to missed due to COVID-19 between groups.&#10;Table 3&#10;Infant recruitment and assessments at different COVID-19 time points.&#10;Note. This figure depicts the number of infants we recruited and assessed before March 2020 (pre-COVID-19) compared to the number of infants we failed to recruit and assess during March 2020 and July 2020 due to COVID-19 restrictions (missed due to COVID-19).&#10;a CHD = Congenital Heart Disease. b HC = Healthy Controls.&#10;Fig. 1&#10;Discussion&#10;We found significantly decreased participant enrollment and follow-up study assessments due to COVID-19. Participants enrolled pre-COVID-19 were less willing to return for their follow-up appointments in fear of unnecessary COVID-19 exposure in the hospital setting. Many of our study participants received care at our institution's specialty clinics and preferred to schedule study assessments on the same day as their clinic visits. Since our institution now provides telehealth visits, many families chose that platform rather than in-person visits. In addition, we could not reschedule many of our missed study assessments as the participants exceeded the age range for that particular assessment time point.&#10;Our results were similar to other studies that described the impact of COVID-19 on research operations and participation. An online survey of 40 oncologists with ongoing clinical trials reported that 35 of the 40 (87.5%) oncologists stopped screening new participants and 25 (62.5%) oncologists stopped recruiting overall (Parikh et al., 2020). Additionally, 11 (27.5%) oncologists reported participant withdrawal from the study due to concerns about COVID-19 (Parikh et al., 2020). We did not experience any participant withdrawals due to COVID-19, but we could not recruit 127 potential participants due to the pandemic. Since we could not recruit 127 potential participants due to COVID-19 and recruited 0 participants within the 4 months of returning to research post-COVID-19, it was clear that the pandemic reduced our enrollment numbers. Furthermore, the number of assessments performed pre-COVID-19 was 27 times higher than the number of assessments performed post-COVID-19.&#10;Other pediatric studies reported similar obstacles to our study due to COVID-19. An online survey asked 91 members of the Pediatric Asthma in Real Life Think Tank and World Allergy Organization Pediatric Asthma Committee around the world to report the challenges they faced in their pediatric asthma clinics due to COVID-19 (Papadopoulos et al., 2020). Seventy-five percent of respondents reported decreased assessments and consultations (half the usual number) during the pandemic (Papadopoulos et al., 2020). A survey asked 34 pediatric oncology centers, in the Middle East, North Africa, and West Asia, about barriers to pediatric oncology management during the pandemic (Saab et al., 2020). Sixty-five percent of respondents indicated that patients canceled their off-therapy visits and 47% of respondents indicated that patients refused to come in for essential visits due to fear of contracting the virus (Saab et al., 2020). Thus, the number of clinical and research visits decreased because parents did not want to expose their vulnerable children to COVID-19.&#10;However, the COVID-19 pandemic did not affect other researchers as they retained enrollment rates similar to the pre-pandemic rates. These investigators modified their protocols for recruitment and data collection to accommodate the new COVID-19 restrictions. Some examples of recruitment methods included online recruiting through social media platforms and digital health media, and obtaining informed consent remotely (Gaba &amp; Bhatt, 2020; Singh &amp; Chaturvedi, 2020). New data collection methods can include in-home visits or virtual monitoring instead of the traditional visits to the research lab (Gaba &amp; Bhatt, 2020; Singh &amp; Chaturvedi, 2020; Weiner et al., 2020). Specifically, researchers conducted visits remotely, incorporated home-based monitoring technologies, and provided delivery or curbside pickup for samples or products (Tuttle, 2020). Aside from minimal virtual recruitment, we have not yet incorporated those types of technologies into our study. Many of these changes to protocol require additional financial resources, which are limited in our project. We are in the process of applying for more research funding to implement some of these measures.&#10;Institutions and researchers enforced strict health precautions and protocols to prevent and reduce exposure to COVID-19, giving participants the option of an in-person or remote visit in July 2020 after re-opening in-person research visits (Tuttle, 2020). Many research studies have had success with participation after the implementation of these new safety protocols. An ongoing study at Geriatric Research Education and Clinical Center reported that 40 of the 51 participants scheduled during the COVID-19 pandemic reported feeling safe coming in for their research appointments (Padala et al., 2020). Furthermore, their participants stated the medical center was safe and prepared because of the implementation of COVID-19 screenings (Padala et al., 2020). Although the participants in this study were also considered a high-risk group, they had the opportunity to consult with their caregivers and family before deciding to come in for their appointments (Padala et al., 2020). In contrast, when we began to contact families to in July 2020, mothers immediately refused to return for our study assessments, due to their concern of exposing their infant to COVID-19, without consultation with the infant's father or other family members. It is unknown if the mother's decisions would have changed after speaking with other family members, but it seemed to help other researchers. Thus, the target population may play a role in the enrollment and participation rates during and after the pandemic.&#10;Clinical implications&#10;Research studies must modify their protocols and procedures to accommodate COVID-19 precautions and a changing research environment. In order to accommodate physical distancing recommendations, essential research staff are required to work restricted hours with either split schedules or staggered shifts (Omary et al., 2020). We have adjusted the total number of persons allowed in our shared lab at one time and are limited to one study assessment per day. Recruitment activities have been altered to virtual meetings or telephone calls. Other researchers have created recruitment videos or instructional videos for data/specimen collection (Brezing et al.; Izmailova et al.). In order to continue to have successful recruitment and retention, researchers must be flexible and adjust their protocols to accommodate the new COVID-19 era.&#10;Our institution introduced re-entry and recovery plans for researchers in early July 2020. In our study specifically, the suspension of study assessments interrupted our ability to perform neurodevelopmental exams and refer participants to the Regional Centers for possible early intervention services, if applicable. Therefore, those infants affected by CHD may not have received the necessary screening and referrals for interventions to improve their development and future quality of life. Pre-COVID-19, families preferred to schedule a research appointment on the same day that they had a clinical visit. However, since most clinics offered telehealth visits, parents were less likely to come in-person for a research appointment or non-essential hospital visit. To help ease their concerns, we emphasized the safety of the research visits with: (1) wearing personal protective equipment; (2) routine hand hygiene; (3) assessment performed in a private room in a building across the street from the hospital; (4) and following hospital regulations, including COVID-19 screenings and using standard precautions. These actions have facilitated our follow-up study assessments.&#10;Limitations&#10;Our study had a few limitations. We could not determine how our recruitment and assessments were affected post-COVID-19 compared to during COVID-19 (missed due to COVID-19) and pre-COVID-19 as we were unsuccessful in recruiting and assessing eligible infants from July to August 2020 when stay-at-home restrictions were loosened. Another limitation is the assumption that our sample size was large enough to use negative binomial regression. We believe we had a sufficient sample size, since we detected significant differences between timeframes. In addition, the precision of our results might be compromised as the use of negative binomial regressions indicated that we had overdispersion in our data; variances were much larger than the means. Nevertheless, our results shed light on how the COVID-19 pandemic has impacted pediatric clinical research.&#10;Conclusion&#10;The COVID-19 pandemic greatly impacted our study with a drastic decrease in participant recruitment and assessment. Alternative or contingency plans, e.g., virtual or telephone recruitment and visits, may be beneficial for researchers (who have the financial and personnel means) to implement these types of changes immediately. The restrictions on study progress can negatively impact investigators by limiting their ability to collect data in their projected timeframe or may be harmful to the participant who may need the life-saving investigative treatment. Investigators must be flexible and be able to quickly adapt to the constraints caused by the COVID-19 pandemic. For the foreseeable future, modifying protocols to the changing environment of the COVID-19 pandemic will facilitate the progress of research and patient outcomes.&#10;Declarations of Competing Interest&#10;&#10;None.&#10;Author statement&#10;N.T. conceived of the idea for the manuscript. M.T. performed and wrote the methods, statistical analyses, and results. J.L. wrote the introduction and discussion. J.W. wrote the abstract and highlights. J.N. revised and edited the manuscript. M.B. verified the analytical methods and supervised the findings and revisions of this work. N.T. performed the final revisions and edits. All authors discussed the results and contributed to the final manuscript.&#10;Uncited references&#10;&#10;Brezing et al., n.d&#10;&#10;Izmailova et al., n.d&#10;"/><cas:FSArray xmi:id="285" elements="292 301 310 319 328"/><types:Keyword xmi:id="292" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="COVID-19"/><types:Keyword xmi:id="301" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="Congenital heart disease"/><types:Keyword xmi:id="310" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="Pediatrics"/><types:Keyword xmi:id="319" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="Clinical research"/><types:Keyword xmi:id="328" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="Stay-at-home order"/><cas:FSArray xmi:id="277" elements="205 217 229 241 253 265"/><types:AuthorInfo xmi:id="205" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Nhu N." affiliation="af0005" lastName="Tran"/><types:AuthorInfo xmi:id="217" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Michelle" affiliation="af0010" lastName="Tran"/><types:AuthorInfo xmi:id="229" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Jeraldine" affiliation="af0015" lastName="Lopez"/><types:AuthorInfo xmi:id="241" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Jessica" affiliation="af0020" lastName="Woon"/><types:AuthorInfo xmi:id="253" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Jennifer" affiliation="af0020" lastName="Nguyen"/><types:AuthorInfo xmi:id="265" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Mary-Lynn" affiliation="af0025" lastName="Brecht"/><types:Journal xmi:id="175" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" pubDate="195" ISSN="0882-5963" title="Journal of Pediatric Nursing"/><types:Date xmi:id="195" sofa="9" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" day="15" month="6" year="2021"/><cas:View sofa="9" members="337 1 624 686 748 785 32 57 82 107 132 42 67 92 117 142 372 379 431 465 543 572 579 608 641 661 703 723 765 802 809 816 857 864 889 932 993 1058 1074 1103 1132 1161 1190 1219 1235 24 49 74 99 124 16 156 363 512 534 563 599 848 1049 1094 1123 1152 1181 1210 386 395 404 413 422 438 447 456 472 481 490 615 668 677 730 739 871 880 896 905 914 923 939 948 957 966 975 984 1000 1009 1018 1027 1065 1226 1242 350 499 521 550 586 835 1036 1081 1110 1139 1168 1197 632 694 756 793 648 710 772 823 1251"/></xmi:XMI>
\ No newline at end of file

From 56fdb2157e1dbcdb1032718b36a3b8e46cbb42fb Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 9 Jul 2021 15:34:18 +0200
Subject: [PATCH 081/269] JTBD: Fixed an issue where a regular expression match
 took days without finishing.

The issue occurred with document PMC7575323. The respective regex, `(.*[\W].*){5,}`, has an unbounded number of matches allowed. In a first try, I introduced an upper bound of 50 but this did still take several minutes without finishing. Instead, the expression is now only applied for "superunits" (I'm actually not sure what those consist of) with a length of at most 200 characters. With that change, the document in question was finished within seconds.
---
 .../main/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipe.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipe.java b/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipe.java
index c52e1ad12..833f97e8f 100755
--- a/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipe.java
+++ b/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/Sentence2TokenPipe.java
@@ -524,7 +524,7 @@ else if (superUnitRep.length() <= 8)
 
 			// check whether superunit might be a chemical
 			// therefor we check the number typical special characters contained
-			if ((superUnitRep.length() > 6)
+			if ((superUnitRep.length() > 6 && superUnitRep.length() < 200)
 					&& superUnitRep.matches("(.*[\\W].*){5,}")
 					&& !superUnitRep.contains("-->"))
 				token.setFeatureValue("SU_isChemical", 1);

From ff922166592ffcc5c7155809a742f4d89223ad7c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 12 Jul 2021 14:04:29 +0200
Subject: [PATCH 082/269] Adding the first GitHub Actions workflow, see #122.

---
 .github/workflows/maven.yml | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 .github/workflows/maven.yml

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
new file mode 100644
index 000000000..34492a386
--- /dev/null
+++ b/.github/workflows/maven.yml
@@ -0,0 +1,25 @@
+# This workflow will build a Java project with Maven
+# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-maven
+
+name: Java CI with Maven
+
+on:
+  push:
+    branches: [ master, v2.6 ]
+  pull_request:
+    branches: [ master, v2.6 ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK 11
+        uses: actions/setup-java@v2
+        with:
+          java-version: '11'
+          distribution: 'adopt'
+      - name: Build with Maven
+        run: mvn -B package --file pom.xml

From e5659b0ef63564dcceeda61e5d613e13ae5967a2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 12 Jul 2021 14:11:16 +0200
Subject: [PATCH 083/269] Enhancing GitHubActions: Python & snapshot
 repository.

---
 .github/maven-settings.xml  | 24 ++++++++++++++++++++++++
 .github/workflows/maven.yml | 10 +++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 .github/maven-settings.xml

diff --git a/.github/maven-settings.xml b/.github/maven-settings.xml
new file mode 100644
index 000000000..9c8a6c405
--- /dev/null
+++ b/.github/maven-settings.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" ?>
+<settings>
+    <profiles>
+        <profile>
+            <id>sonatype-snapshots</id>
+            <repositories>
+                <repository>
+                    <id>sonatype-nexus-snapshots</id>
+                    <name>Sonatype Nexus Snapshots</name>
+                    <url>https://oss.sonatype.org/content/repositories/snapshots</url>
+                    <releases>
+                        <enabled>false</enabled>
+                    </releases>
+                    <snapshots>
+                        <enabled>true</enabled>
+                    </snapshots>
+                </repository>
+            </repositories>
+        </profile>
+    </profiles>
+    <activeProfiles>
+        <activeProfile>sonatype-snapshots</activeProfile>
+    </activeProfiles>
+</settings>
\ No newline at end of file
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 34492a386..2c3de94b2 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -15,6 +15,14 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.7
+      - name: Install python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install flair==0.6.1
       - uses: actions/checkout@v2
       - name: Set up JDK 11
         uses: actions/setup-java@v2
@@ -22,4 +30,4 @@ jobs:
           java-version: '11'
           distribution: 'adopt'
       - name: Build with Maven
-        run: mvn -B package --file pom.xml
+        run: mvn -B package --file pom.xml --settings .github/maven-settings.xml

From dbcc1bfc5adac2fd23659a06858916d7cb04e7b4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 12 Jul 2021 14:37:42 +0200
Subject: [PATCH 084/269] Using https for the BioNLP Maven repository.

It was just 'http' previously which is blocked in newer Maven versions.
---
 jcore-biolemmatizer-ae/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-biolemmatizer-ae/pom.xml b/jcore-biolemmatizer-ae/pom.xml
index b5e089a8f..51834d192 100644
--- a/jcore-biolemmatizer-ae/pom.xml
+++ b/jcore-biolemmatizer-ae/pom.xml
@@ -47,7 +47,7 @@
 	<repositories>
     	<repository>
       		<id>BioNLP Repository</id>
-      		<url>http://svn.code.sf.net/p/bionlp/code/repo</url>
+      		<url>https://svn.code.sf.net/p/bionlp/code/repo</url>
     	</repository>
   	</repositories>
 	<licenses>

From 27fbc4a34b090953decdc1b81130397103dfda7b Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 12 Jul 2021 14:45:50 +0200
Subject: [PATCH 085/269] Adding another repository with https protocol in
 context of BioLemmatizer.

BioLemmatizer uses MorphAdorner and has a repository for it in its own pom.xml. However, it again has no https and is blocked by Maven. I try to add the repository in the jcore-biolemmatizer-ae pom.xml in the hope that this repository will then be used instead of the original one.
---
 jcore-biolemmatizer-ae/pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/jcore-biolemmatizer-ae/pom.xml b/jcore-biolemmatizer-ae/pom.xml
index 51834d192..cc11be3b1 100644
--- a/jcore-biolemmatizer-ae/pom.xml
+++ b/jcore-biolemmatizer-ae/pom.xml
@@ -49,6 +49,12 @@
       		<id>BioNLP Repository</id>
       		<url>https://svn.code.sf.net/p/bionlp/code/repo</url>
     	</repository>
+		<!-- This repo makes available the MorphAdorner dependency -->
+		<repository>
+			<id>maven.aksw.internal</id>
+			<name>AKSW Internal Release Repository</name>
+			<url>https://maven.aksw.org/repository/internal</url>
+		</repository>
   	</repositories>
 	<licenses>
 		<license>

From 3c6571102dda3398cf9bbfee4c19d565565eb96e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 27 Jul 2021 11:36:11 +0200
Subject: [PATCH 086/269] ES Consumer: Adding a Lucene index cache for the Map-
 and AddonProviders.

When the mapping files get very large (in my case over 30M lines) loading all of it into memory can cause memory issues. Those can be solved frequently by using internalization where all Strings are uniquely stored in the String pool. However, this pool is also of a finite size (which can be set via the -XX:StringTableSize=<size> JVM option). In the case that most probably only a part of the whole map will ever be used, an alternative way are the new, abstract `PersistentIndexAddonTermsProvider` and `PersistentStringIndexMapProvider` classes. Both have one concrete sub class which uses a Lucene index to store the files in an indexed format. A Guava cache is used to load from this index and keep the actually required elements in memory. This relieved memory requirements a lot when dealing with very large resource files. Resolves #123.
---
 jcore-elasticsearch-consumer/README.md        |   6 +-
 jcore-elasticsearch-consumer/pom.xml          |   1 -
 .../consumer/es/AbstractFieldGenerator.java   |   4 +-
 .../consumer/es/filter/AddonTermsFilter.java  |  53 +++---
 .../es/filter/SingleAddonTermsFilter.java     |  35 ++++
 .../sharedresources/AbstractMapProvider.java  |  34 ++--
 .../sharedresources/AddonTermsProvider.java   |  25 ++-
 .../es/sharedresources/LuceneIndex.java       | 160 +++++++++++++++++
 .../MapDBReversedDoubleMapProvider.java       |  70 --------
 .../es/sharedresources/MapProvider.java       |  14 ++
 .../PersistentIndexAddonTermsProvider.java    | 156 +++++++++++++++++
 ...rsistentLuceneIndexAddonTermsProvider.java |  17 ++
 .../PersistentLuceneStringMapProvider.java    |  17 ++
 .../PersistentStringIndexMapProvider.java     | 163 ++++++++++++++++++
 .../ReversedDoubleMapProvider.java            |  10 ++
 .../es/sharedresources/StringIndex.java       |  25 +++
 16 files changed, 671 insertions(+), 119 deletions(-)
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/SingleAddonTermsFilter.java
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
 delete mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapDBReversedDoubleMapProvider.java
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneIndexAddonTermsProvider.java
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneStringMapProvider.java
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/StringIndex.java

diff --git a/jcore-elasticsearch-consumer/README.md b/jcore-elasticsearch-consumer/README.md
index a034187c7..c8e69c1da 100644
--- a/jcore-elasticsearch-consumer/README.md
+++ b/jcore-elasticsearch-consumer/README.md
@@ -1,9 +1,9 @@
 # JCoRe ElasticSearchConsumer
 
-**Descriptor Path**:
+**Descriptor Paths**:
 ```
-.jcore-elasticsearch-consumer.src.main.resources.de.julielab.jcore.consumer.es.desc.jcore-elasticsearch-consumer
-.jcore-elasticsearch-consumer.src.main.resources.de.julielab.jcore.consumer.es.desc.jcore-json-consumer
+de.julielab.jcore.consumer.es.desc.jcore-elasticsearch-consumer
+de.julielab.jcore.consumer.es.desc.jcore-json-consumer
 ```
 
 ### Objective
diff --git a/jcore-elasticsearch-consumer/pom.xml b/jcore-elasticsearch-consumer/pom.xml
index 57f9452c2..cd72f8d4f 100644
--- a/jcore-elasticsearch-consumer/pom.xml
+++ b/jcore-elasticsearch-consumer/pom.xml
@@ -76,7 +76,6 @@
             <groupId>org.mapdb</groupId>
             <artifactId>mapdb</artifactId>
             <version>3.0.7</version>
-            <scope>provided</scope>
         </dependency>
         <dependency>
             <groupId>org.testng</groupId>
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java
index 54e1c91d7..287cd68ea 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java
@@ -499,8 +499,8 @@ else if (fieldValues.size() == 1)
 	 * featurePaths and reset once per featurePath.
 	 * 
 	 * @param a
-	 * @param featurePath
-	 * @param f
+	 * @param featurePaths
+	 * @param filters
 	 * @return
 	 * @throws CASException
 	 */
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/AddonTermsFilter.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/AddonTermsFilter.java
index b37e52348..9114109be 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/AddonTermsFilter.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/AddonTermsFilter.java
@@ -6,33 +6,36 @@
 
 public class AddonTermsFilter extends AbstractFilter {
 
-	private Map<String, String[]> addonTerms;
+    private Map<String, String[]> addonTerms;
 
-	public AddonTermsFilter(Map<String, String[]> addonTerms) {
-		this.addonTerms = addonTerms;
-	}
+    public AddonTermsFilter(Map<String, String[]> addonTerms) {
+        this.addonTerms = addonTerms;
+    }
 
-	@Override
-	public List<String> filter(String input) {
-		newOutput();
-		if (null != input) {
-			output.add(input);
-			String[] hypernymArray = addonTerms.get(input);
-			if (null != hypernymArray) {
-				output = new ArrayList<>(hypernymArray.length + 1);
-				output.add(input);
-				for (int i = 0; i < hypernymArray.length; i++) {
-					String hypernym = hypernymArray[i];
-					output.add(hypernym);
-				}
-			}
-		}
-		return output;
-	}
+    @Override
+    public List<String> filter(String input) {
+        newOutput();
+        if (null != input) {
+            output.add(input);
+            String[] addonArray = addonTerms.get(input);
+            if (null != addonArray) {
+                // Only create a new output array when the default ArrayList size can't hold all the elements
+                if (addonArray.length >= 10) {
+                    output = new ArrayList<>(addonArray.length + 1);
+                    output.add(input);
+                }
+                for (int i = 0; i < addonArray.length; i++) {
+                    String addonTerm = addonArray[i];
+                    output.add(addonTerm);
+                }
+            }
+        }
+        return output;
+    }
 
-	@Override
-	public Filter copy() {
-		return new AddonTermsFilter(addonTerms);
-	}
+    @Override
+    public Filter copy() {
+        return new AddonTermsFilter(addonTerms);
+    }
 
 }
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/SingleAddonTermsFilter.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/SingleAddonTermsFilter.java
new file mode 100644
index 000000000..1e83f2b9f
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/SingleAddonTermsFilter.java
@@ -0,0 +1,35 @@
+package de.julielab.jcore.consumer.es.filter;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ * <p>Like {@link AddonTermsFilter} but accepts single string values instead of string arrays.</p>
+ */
+public class SingleAddonTermsFilter extends AbstractFilter {
+
+    private Map<String, String> addonTerms;
+
+    public SingleAddonTermsFilter(Map<String, String> addonTerms) {
+        this.addonTerms = addonTerms;
+    }
+
+    @Override
+    public List<String> filter(String input) {
+        newOutput();
+        if (null != input) {
+            output.add(input);
+            String addonTerm = addonTerms.get(input);
+            if (null != addonTerm) {
+                output.add(addonTerm);
+            }
+        }
+        return output;
+    }
+
+    @Override
+    public Filter copy() {
+        return new SingleAddonTermsFilter(addonTerms);
+    }
+
+}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
index fdc15aaa1..7a181d55a 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
@@ -4,7 +4,6 @@
 import org.apache.uima.resource.DataResource;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
 import java.io.IOException;
@@ -12,11 +11,25 @@
 import java.util.HashMap;
 import java.util.Map;
 
-
+/**
+ * <p>Base class for resources that map one term to another. Uses a HashMap. The trivial instantiable subclass is {@link MapProvider}.</p>
+ * <p>This class is abstract because it is generic. To work with other data types than strings, the {@link #getKey(String)} and {@link #getValue(String)}
+ * methods are overridden by subclasses to deliver the correct data types from the string input.</p>
+ * <p>Subclasses deal with maps where the keys and/or values are not strings but numbers. Other subclasses deal with
+ * String but use a persistent data structure to deal with very large maps.</p>
+ *
+ * @param <K>
+ * @param <V>
+ */
 public abstract class AbstractMapProvider<K, V> implements IMapProvider<K, V> {
-    private final static Logger log = LoggerFactory.getLogger(AbstractMapProvider.class);
+    protected final Logger log;
     protected boolean reverse = false;
-    private HashMap<K, V> map;
+    protected Map<K, V> map;
+
+    public AbstractMapProvider(Logger log) {
+        this.log = log;
+        map = new HashMap<>();
+    }
 
     @Override
     public void load(DataResource aData) throws ResourceInitializationException {
@@ -44,16 +57,11 @@ public void load(DataResource aData) throws ResourceInitializationException {
                     throw new IllegalArgumentException("Format error in map file: Expected format is 'originalValue<tab>mappedValue' but the input line '" + line
                             + "' has " + split.length + " columns.");
                 if (reverse)
-                    map.put(getKey(split[1]), getValue(split[0]));
+                    put(getKey(split[1]), getValue(split[0]));
                 else
-                    map.put(getKey(split[0]), getValue(split[1]));
+                    put(getKey(split[0]), getValue(split[1]));
             }
-            log.info("Finished reading resource {}", aData.getUri());
-            log.info("Copying {} values into a fresh HashMap of the exact correct size", map.size());
-            HashMap<K, V> tmp = new HashMap<>(map.size(), 1f);
-            tmp.putAll(map);
-            map = tmp;
-            log.info("Done.");
+            log.info("Finished reading resource {} and got {} elements.", aData.getUri(), map.size());
         } catch (IOException e) {
             throw new ResourceInitializationException(e);
         } finally {
@@ -66,6 +74,8 @@ public void load(DataResource aData) throws ResourceInitializationException {
         }
     }
 
+    protected abstract void put(K key, V value);
+
     protected abstract V getValue(String valueString);
 
     protected abstract K getKey(String keyString);
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AddonTermsProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AddonTermsProvider.java
index 7b4adb2d0..5118d8be4 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AddonTermsProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AddonTermsProvider.java
@@ -6,24 +6,37 @@
 import org.apache.uima.resource.DataResource;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.net.URI;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+/**
+ * <p>Base class for addon terms (i.e. terms to be added to some key term, like synonyms or hypernyms) that uses a HashMap.</p>
+ * <p>Subclasses of this class use other data structures to store and retrieve the addon terms. Useful for large numbers of such terms.</p>
+ */
 public class AddonTermsProvider implements IAddonTermsProvider {
-    Logger log = LoggerFactory.getLogger(AddonTermsProvider.class);
+    protected final Logger log;
 
-    private Map<String, String[]> addonTerms;
+    protected Map<String, String[]> addonTerms;
+
+    public AddonTermsProvider(Logger log) {
+        this.log = log;
+        addonTerms = new HashMap<>();
+    }
+
+    protected void put(String term, String[] addonArray) {
+        addonTerms.put(term, addonArray);
+    }
 
     @Override
     public void load(DataResource aData) throws ResourceInitializationException {
         try {
-            addonTerms = new HashMap<>();
-            log.info("Loading addon terms from " + aData.getUri());
+            URI uri = aData.getUri();
+            log.info("Loading addon terms from " + uri);
             int addons = 0;
             InputStream inputStream;
             try {
@@ -56,7 +69,7 @@ public void load(DataResource aData) throws ResourceInitializationException {
                     addonArray[i] = trimmedAddon.intern();
                     addons++;
                 }
-                addonTerms.put(term, addonArray);
+                put(term, addonArray);
             }
             log.info("Loaded {} addons for {} terms.", addons, addonTerms.size());
         } catch (IOException e) {
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
new file mode 100644
index 000000000..204f07abb
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
@@ -0,0 +1,160 @@
+package de.julielab.jcore.consumer.es.sharedresources;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.NIOFSDirectory;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Arrays;
+
+public class LuceneIndex implements StringIndex {
+    private final static Logger log = LoggerFactory.getLogger(LuceneIndex.class);
+    private  IndexWriter iw;
+    private final FSDirectory directory;
+    private IndexSearcher searcher;
+
+    public LuceneIndex(String indexDirectory) {
+        try {
+            Path lucene = Path.of(indexDirectory);
+            directory = NIOFSDirectory.open(lucene);
+            // Do not open a writer to an existing index. This causes locking issues when starting multiple
+            // pipelines in parallel.
+            // Of course, the first pipeline still needs to create the index, so this must be a one-time effort
+            // that has to be completed before the other pipelines are started.
+            if (!lucene.toFile().exists()) {
+                IndexWriterConfig iwc = new IndexWriterConfig();
+                iw = new IndexWriter(directory, iwc);
+            }
+        } catch (IOException e) {
+            log.error("could not initialize Lucene index", e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public String get(String key) {
+        TermQuery tq = new TermQuery(new Term("key", key));
+        BooleanQuery.Builder b = new BooleanQuery.Builder();
+        b.add(tq, BooleanClause.Occur.FILTER);
+        BooleanQuery q = b.build();
+        try {
+            TopDocs topDocs = searcher.search(q, 1);
+            if (topDocs.scoreDocs.length > 0) {
+                Document doc = searcher.getIndexReader().document(topDocs.scoreDocs[0].doc);
+                return doc.getField("value").stringValue();
+            }
+        } catch (IOException e) {
+            log.error("Could not retrieve results for '{}' in Lucene index.", key, e);
+            throw new IllegalStateException(e);
+        }
+        return null;
+    }
+
+    @Override
+    public String[] getArray(String key) {
+        TermQuery tq = new TermQuery(new Term("key", key));
+        BooleanQuery.Builder b = new BooleanQuery.Builder();
+        b.add(tq, BooleanClause.Occur.FILTER);
+        BooleanQuery q = b.build();
+        try {
+            TopDocs topDocs = searcher.search(q, 1);
+            if (topDocs.scoreDocs.length > 0) {
+                Document doc = searcher.getIndexReader().document(topDocs.scoreDocs[0].doc);
+                return Arrays.stream(doc.getFields("value")).map(IndexableField::stringValue).toArray(String[]::new);
+            }
+        } catch (IOException e) {
+            log.error("Could not retrieve results for '{}' in Lucene index.", key, e);
+            throw new IllegalStateException(e);
+        }
+        return null;
+    }
+
+    @Override
+    public void put(String key, String value) {
+        Field keyField = new StringField("key", key, Field.Store.NO);
+        Field valueField = new StoredField("value", value);
+        Document doc = new Document();
+        doc.add(keyField);
+        doc.add(valueField);
+        try {
+            iw.addDocument(doc);
+        } catch (IOException e) {
+            log.error("Could not index key-value pair {}:{} with Lucene", key, value, e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public void put(String key, String[] value) {
+        Field keyField = new StringField("key", key, Field.Store.NO);
+        Document doc = new Document();
+        doc.add(keyField);
+        for (var v : value)
+            doc.add(new StoredField("value", v));
+        try {
+            iw.addDocument(doc);
+        } catch (IOException e) {
+            log.error("Could not index key-value pair {}:{} with Lucene", key, value, e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public void commit() {
+        try {
+            iw.commit();
+        } catch (IOException e) {
+            log.error("Could not commit Lucene index", e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public boolean requiresExplicitCommit() {
+        return true;
+    }
+
+    @Override
+    public void close() {
+        try {
+            if (searcher != null) {
+                searcher.getIndexReader().close();
+                searcher = null;
+            }
+            if (iw != null) {
+                iw.close();
+                iw = null;
+            }
+        } catch (IOException e) {
+            log.error("Could not close Lucene index reader.", e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public void open() {
+        try {
+            searcher = new IndexSearcher(DirectoryReader.open(directory));
+        } catch (IOException e) {
+            log.error("Could not open Lucene index searcher.", e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public int size() {
+        if (iw != null && iw.isOpen())
+            return iw.numDocs();
+        else if (searcher != null)
+            return searcher.getIndexReader().numDocs();
+        return 0;
+    }
+}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapDBReversedDoubleMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapDBReversedDoubleMapProvider.java
deleted file mode 100644
index a12a082a5..000000000
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapDBReversedDoubleMapProvider.java
+++ /dev/null
@@ -1,70 +0,0 @@
-package de.julielab.jcore.consumer.es.sharedresources;
-
-import de.julielab.jcore.utility.JCoReTools;
-import org.apache.uima.resource.DataResource;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.mapdb.DB;
-import org.mapdb.DBMaker;
-import org.mapdb.Serializer;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.Map;
-
-public class MapDBReversedDoubleMapProvider implements IMapProvider<String, Double> {
-
-    private Map<String, Double> map;
-
-    @Override
-    public void load(DataResource aData) throws ResourceInitializationException {
-        BufferedReader br = null;
-        try {
-            final DB filedb = DBMaker.tempFileDB().fileMmapEnableIfSupported().cleanerHackEnable().closeOnJvmShutdownWeakReference().make();
-            map = filedb.hashMap("JCoReElasticSearchReverseMapProvider").
-                    keySerializer(Serializer.STRING).valueSerializer(Serializer.DOUBLE).
-                    create();
-            InputStreamReader is;
-            try {
-                is = new InputStreamReader(JCoReTools.resolveExternalResourceGzipInputStream(aData));
-            } catch (Exception e) {
-                throw new IOException("Resource " + aData.getUri() + " not found");
-            }
-            br = new BufferedReader(is);
-            String line;
-            String splitExpression = "\t";
-            while ((line = br.readLine()) != null) {
-                if (line.trim().length() == 0 || line.startsWith("#"))
-                    continue;
-                String[] split = line.split(splitExpression);
-                if (split.length != 2) {
-                    splitExpression = "\\s+";
-                    split = line.split(splitExpression);
-                }
-                if (split.length != 2)
-                    throw new IllegalArgumentException("Format error in map file: Expected format is 'originalValue<tab>mappedValue' but the input line '" + line
-                            + "' has " + split.length + " columns.");
-                map.put(split[1].trim(), Double.parseDouble(split[0].trim()));
-            }
-        } catch (IOException e) {
-            throw new ResourceInitializationException(e);
-        } finally {
-            try {
-                if (null != br)
-                    br.close();
-            } catch (IOException e) {
-                throw new ResourceInitializationException(e);
-            }
-        }
-
-    }
-
-    /**
-     * Returns the loaded map. All strings - keys and values - are internalized.
-     */
-    @Override
-    public Map<String, Double> getMap() {
-        return map;
-    }
-
-}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapProvider.java
index ebd90f8ed..0b8393ed7 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/MapProvider.java
@@ -1,6 +1,20 @@
 package de.julielab.jcore.consumer.es.sharedresources;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 public class MapProvider extends AbstractMapProvider<String, String> {
+    private final static Logger log = LoggerFactory.getLogger(MapProvider.class);
+
+    public MapProvider() {
+        super(log);
+    }
+
+    @Override
+    protected void put(String key, String value) {
+        map.put(key, value);
+    }
+
     @Override
     protected String getValue(String valueString) {
         return valueString.trim().intern();
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
new file mode 100644
index 000000000..b20d466ef
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
@@ -0,0 +1,156 @@
+package de.julielab.jcore.consumer.es.sharedresources;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.uima.resource.DataResource;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.time.Duration;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+
+/**
+ * Reads the original input file and converts it into a persistent index. This index is re-used in subsequent pipeline runs.
+ */
+abstract public class PersistentIndexAddonTermsProvider extends AddonTermsProvider {
+    public static final int MAXIMUM_MEMCACHE_SIZE = 10000;
+    private final LoadingCache<String, Optional<String[]>> cache;
+    private StringIndex index;
+
+    public PersistentIndexAddonTermsProvider(Logger log) {
+        super(log);
+        addonTerms = new Map<>() {
+            @Override
+            public int size() {
+                return index.size();
+            }
+
+            @Override
+            public boolean isEmpty() {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public boolean containsKey(Object key) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public boolean containsValue(Object value) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public String[] get(Object key) {
+                try {
+                    return cache.get((String) key).orElse(null);
+                } catch (ExecutionException e) {
+                    log.error("Could not retrieve value from the cache for key '{}'.", key);
+                    throw new IllegalStateException();
+                }
+            }
+
+            @Nullable
+            @Override
+            public String[] put(String key, String[] value) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public String[] remove(Object key) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public void putAll(@NotNull Map<? extends String, ? extends String[]> m) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public void clear() {
+                throw new NotImplementedException();
+            }
+
+            @NotNull
+            @Override
+            public Set<String> keySet() {
+                throw new NotImplementedException();
+            }
+
+            @NotNull
+            @Override
+            public Collection<String[]> values() {
+                throw new NotImplementedException();
+            }
+
+            @NotNull
+            @Override
+            public Set<Entry<String, String[]>> entrySet() {
+                throw new NotImplementedException();
+            }
+        };
+        cache = CacheBuilder.newBuilder().maximumSize(MAXIMUM_MEMCACHE_SIZE).expireAfterAccess(Duration.ofHours(1)).build(new CacheLoader<>() {
+            @Override
+            public Optional<String[]> load(String s) {
+                return Optional.ofNullable(index.getArray(s));
+            }
+        });
+    }
+
+    protected abstract StringIndex initializeIndex(String cachePath);
+
+    @Override
+    public void load(DataResource aData) throws ResourceInitializationException {
+        // prepare the persistent index
+        URI uri = aData.getUri();
+        File indexFile;
+        boolean loadData = true;
+        try {
+            File resourceFile = new File(uri);
+            String resourceFileName = FilenameUtils.getName(uri.toURL().getPath());
+            indexFile = new File("es-consumer-cache", resourceFileName);
+            if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {
+                log.info("Resource file {} is newer than the existing cached index at {}. Creating new index.", resourceFile, indexFile);
+                indexFile.delete();
+            } else {
+                boolean indexFileExisted = indexFile.exists();
+                if (!indexFileExisted) {
+                    log.info("Creating persistent cache for resource {} at {}.", uri, indexFile);
+                }
+                else {
+                    log.info("Using existing persistent cache {} for resource {}.", indexFile, uri);
+                    loadData = false;
+                }
+            }
+            index = initializeIndex(indexFile.getAbsolutePath());
+        } catch (MalformedURLException e) {
+            log.error("Could obtain file name from resource URI '{}'", uri, e);
+            throw new IllegalStateException(e);
+        }
+        if (loadData) {
+            super.load(aData);
+            if (index.requiresExplicitCommit())
+                index.commit();
+        }
+        index.close();
+        index.open();
+        log.info("There are {} entries in the cache at {}.", index.size(), indexFile);
+    }
+
+    @Override
+    protected void put(String term, String[] addonArray) {
+        index.put(term, addonArray);
+    }
+}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneIndexAddonTermsProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneIndexAddonTermsProvider.java
new file mode 100644
index 000000000..40ac75e83
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneIndexAddonTermsProvider.java
@@ -0,0 +1,17 @@
+package de.julielab.jcore.consumer.es.sharedresources;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class PersistentLuceneIndexAddonTermsProvider extends PersistentIndexAddonTermsProvider{
+    private final static Logger log = LoggerFactory.getLogger(PersistentLuceneIndexAddonTermsProvider.class);
+
+    public PersistentLuceneIndexAddonTermsProvider() {
+        super(log);
+    }
+
+    @Override
+    protected StringIndex initializeIndex(String cachePath) {
+        return new LuceneIndex(cachePath);
+    }
+}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneStringMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneStringMapProvider.java
new file mode 100644
index 000000000..c49ed7350
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentLuceneStringMapProvider.java
@@ -0,0 +1,17 @@
+package de.julielab.jcore.consumer.es.sharedresources;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class PersistentLuceneStringMapProvider extends PersistentStringIndexMapProvider {
+    private final static Logger log = LoggerFactory.getLogger(PersistentLuceneStringMapProvider.class);
+
+    public PersistentLuceneStringMapProvider() {
+        super(log);
+    }
+
+    @Override
+    protected StringIndex initializeIndex(String cachePath) {
+        return new LuceneIndex(cachePath);
+    }
+}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
new file mode 100644
index 000000000..93dd296f2
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
@@ -0,0 +1,163 @@
+package de.julielab.jcore.consumer.es.sharedresources;
+
+import com.google.common.cache.CacheBuilder;
+import com.google.common.cache.CacheLoader;
+import com.google.common.cache.LoadingCache;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.uima.resource.DataResource;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+import org.slf4j.Logger;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.time.Duration;
+import java.util.Collection;
+import java.util.Map;
+import java.util.Optional;
+import java.util.Set;
+import java.util.concurrent.ExecutionException;
+
+abstract public class PersistentStringIndexMapProvider extends AbstractMapProvider<String, String> {
+    public static final int MAXIMUM_MEMCACHE_SIZE = 10000;
+    private final LoadingCache<String, Optional<String>> cache;
+    private StringIndex index;
+
+    public PersistentStringIndexMapProvider(Logger log) {
+        super(log);
+        map = new Map<>() {
+            @Override
+            public int size() {
+                return index.size();
+            }
+
+            @Override
+            public boolean isEmpty() {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public boolean containsKey(Object key) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public boolean containsValue(Object value) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public String get(Object key) {
+                try {
+                    return cache.get((String) key).orElse(null);
+                } catch (ExecutionException e) {
+                    log.error("Could not retrieve value from the cache for key '{}'.", key);
+                    throw new IllegalStateException();
+                }
+            }
+
+            @Nullable
+            @Override
+            public String put(String key, String value) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public String remove(Object key) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public void putAll(@NotNull Map<? extends String, ? extends String> m) {
+                throw new NotImplementedException();
+            }
+
+            @Override
+            public void clear() {
+                throw new NotImplementedException();
+            }
+
+            @NotNull
+            @Override
+            public Set<String> keySet() {
+                throw new NotImplementedException();
+            }
+
+            @NotNull
+            @Override
+            public Collection<String> values() {
+                throw new NotImplementedException();
+            }
+
+            @NotNull
+            @Override
+            public Set<Map.Entry<String, String>> entrySet() {
+                throw new NotImplementedException();
+            }
+        };
+        cache = CacheBuilder.newBuilder().maximumSize(MAXIMUM_MEMCACHE_SIZE).expireAfterAccess(Duration.ofHours(1)).build(new CacheLoader<>() {
+            @Override
+            public Optional<String> load(String s) {
+                return Optional.ofNullable(index.get(s));
+            }
+        });
+    }
+
+    @Override
+    protected void put(String key, String value) {
+        index.put(key, value);
+    }
+
+    protected abstract StringIndex initializeIndex(String cachePath);
+
+    @Override
+    public void load(DataResource aData) throws ResourceInitializationException {
+        // prepare the persistent index
+        URI uri = aData.getUri();
+        File indexFile;
+        boolean loadData = true;
+        try {
+            File resourceFile = new File(uri);
+            String resourceFileName = FilenameUtils.getName(uri.toURL().getPath());
+            indexFile = new File("es-consumer-cache", resourceFileName);
+            if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {
+                log.info("Resource file {} is newer than the existing cached index at {}. Creating new index.", resourceFile, indexFile);
+                indexFile.delete();
+            } else {
+                boolean indexFileExisted = indexFile.exists();
+                if (!indexFileExisted) {
+                    log.info("Creating persistent cache for resource {} at {}.", uri, indexFile);
+                }
+                else {
+                    log.info("Using existing persistent cache {} for resource {}.", indexFile, uri);
+                    loadData = false;
+                }
+            }
+            index = initializeIndex(indexFile.getAbsolutePath());
+        } catch (MalformedURLException e) {
+            log.error("Could obtain file name from resource URI '{}'", uri, e);
+            throw new IllegalStateException(e);
+        }
+        if (loadData) {
+            super.load(aData);
+            if (index.requiresExplicitCommit())
+                index.commit();
+        }
+        index.close();
+        index.open();
+        log.info("There are {} entries in the cache at {}.", index.size(), indexFile);
+    }
+
+    @Override
+    protected String getValue(String valueString) {
+        return valueString;
+    }
+
+    @Override
+    protected String getKey(String keyString) {
+        return keyString;
+    }
+}
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/ReversedDoubleMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/ReversedDoubleMapProvider.java
index d9caa600a..fc1184319 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/ReversedDoubleMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/ReversedDoubleMapProvider.java
@@ -1,11 +1,21 @@
 package de.julielab.jcore.consumer.es.sharedresources;
 
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
 public class ReversedDoubleMapProvider extends AbstractMapProvider<String, Double> {
+    private final static Logger log = LoggerFactory.getLogger(ReversedDoubleMapProvider.class);
 
     public ReversedDoubleMapProvider() {
+        super(log);
         this.reverse = true;
     }
 
+    @Override
+    protected void put(String key, Double value) {
+        map.put(key, value);
+    }
+
     @Override
     protected Double getValue(String valueString) {
         return Double.parseDouble(valueString.trim());
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/StringIndex.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/StringIndex.java
new file mode 100644
index 000000000..733dcc213
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/StringIndex.java
@@ -0,0 +1,25 @@
+package de.julielab.jcore.consumer.es.sharedresources;
+
+public interface StringIndex {
+    String get(String key);
+
+    String[] getArray(String key);
+
+    void put(String key, String value);
+
+    void put(String key, String[] value);
+
+    void commit();
+
+    boolean requiresExplicitCommit();
+
+    void close();
+
+    void open();
+
+    int size();
+
+    default String getName() {
+        return getClass().getSimpleName();
+    }
+}

From efaa7e1d7c693a77344af0b797e00515a35cdead Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 28 Jul 2021 14:33:46 +0200
Subject: [PATCH 087/269] DocumentReleaseCheckpoint: Fixed a bug where
 documents were not marked as being processed.

The issue was that we counted how often a document was released and compared that number to the number of registered components. However, when a component de-registered itself but had released its documents before, their count was actually too high. The remedy - without introducing other sources of error - was to not just count the number of releases per document but to explicitly track the componentIds that had released each documents.
---
 .../jcore/ae/checkpoint/DBCheckpointAE.java   | 19 +++++++++++---
 .../checkpoint/DocumentReleaseCheckpoint.java | 26 +++++++++++++------
 .../es/sharedresources/LuceneIndex.java       |  8 +++++-
 3 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
index 264c32999..cc9b29c8d 100644
--- a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
+++ b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
@@ -109,7 +109,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
     @Override
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
-        log.debug("BatchProcessComplete called, stashing {} documents to be ready for marked as being finished", docIds.size());
+        log.debug("CollectionProcessComplete called, stashing {} documents to be ready for marked as being finished", docIds.size());
         if (indicateFinished)
             docReleaseCheckpoint.release(jedisSyncKey, docIds.stream());
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
@@ -121,6 +121,7 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
     }
 
     private void customBatchProcessingComplete() throws AnalysisEngineProcessException {
+        log.debug("CustomBatchProcessComplete called, stashing {} documents to be ready for marked as being finished", docIds.size());
         if (indicateFinished)
             docReleaseCheckpoint.release(jedisSyncKey, docIds.stream());
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
@@ -199,13 +200,24 @@ private void setLastComponent(CoStoSysConnection conn, String
             sqlMarkIsProcessed = String.format("UPDATE %s SET %s='%s', %s=TRUE, %s=FALSE WHERE %s", subsetTableName, Constants.LAST_COMPONENT, componentDbName, Constants.IS_PROCESSED, Constants.IN_PROCESS, primaryKeyPsString);
 
         if (!documentIdsToSetLastComponent.isEmpty()) {
-            log.debug("Setting the last component to {} for {} documents", componentDbName, documentIdsToSetLastComponent.size());
+            log.debug("Setting the last component to '{}' for {} documents", componentDbName, documentIdsToSetLastComponent.size());
             updateSubsetTable(conn, documentIdsToSetLastComponent, sqlSetLastComponent);
         }
         if (markIsProcessed) {
-            log.debug("Marking {} documents to having been processed by component \"{}\".", documentIdsToSetLastComponent.size(), componentDbName);
+            log.debug("Marking {} documents to having been processed by component \"{}\".", processedDocumentIds.size(), componentDbName);
+            log.debug("SQL: {}", sqlMarkIsProcessed);
             updateSubsetTable(conn, processedDocumentIds, sqlMarkIsProcessed);
         }
+        try {
+            log.debug("Connection is auto commit: {}", conn.getAutoCommit());
+            if (!conn.getAutoCommit()) {
+                log.debug("Committing changes");
+                conn.commit();
+            }
+        } catch (SQLException e) {
+            log.error("Could not commit the document processing status changes.", e);
+            throw new AnalysisEngineProcessException(e);
+        }
     }
 
     private void updateSubsetTable(CoStoSysConnection conn, Collection<DocumentId> documentIdsToMark, String sql) throws AnalysisEngineProcessException {
@@ -222,6 +234,7 @@ private void updateSubsetTable(CoStoSysConnection conn, Collection<DocumentId> d
                     ps.addBatch();
                 }
                 try {
+                    log.debug("Executing SQL command batch for being processed.");
                     ps.executeBatch();
                 } catch (BatchUpdateException e) {
                     if (e.getMessage().contains("deadlock detected")) {
diff --git a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
index cb94a8aa3..fd40fa5e1 100644
--- a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
+++ b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
@@ -1,11 +1,11 @@
 package de.julielab.jcore.ae.checkpoint;
 
-import com.google.common.collect.HashMultiset;
-import com.google.common.collect.Multiset;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
@@ -33,18 +33,18 @@ public class DocumentReleaseCheckpoint {
             "This is useful when document data is sent batchwise to the database by multiple components: In the case of a crash or manual cancellation of a pipeline run without synchronization is might happen " +
             "that some components have sent their data and others haven't at the time of termination. To avoid an inconsistent database state," +
             "a document will only be marked as finished " +
-            "processed in the JeDIS subset table if all synchronied components in the pipeline have released the document. " +
+            "processed in the JeDIS subset table if all synchronized components in the pipeline have released the document. " +
             "This is done by the DBCheckpointAE which must be at the end of the pipeline and have the 'IndicateFinished' parameter set to 'true'. " +
             "Synchronized components are those that disclose this parameter and have a value set to it.";
     public static final String PARAM_JEDIS_SYNCHRONIZATION_KEY = "JedisSynchronizationKey";
     private final static Logger log = LoggerFactory.getLogger(DocumentReleaseCheckpoint.class);
     private static DocumentReleaseCheckpoint checkpoint;
-    private Multiset<DocumentId> releasedDocuments;
+    private Map<DocumentId, Set<String>> releasedDocuments;
     private Set<String> registeredComponents;
     private long lastwarning = 1000;
 
     private DocumentReleaseCheckpoint() {
-        releasedDocuments = HashMultiset.create();
+        releasedDocuments = new HashMap<>();
         registeredComponents = new HashSet<>();
     }
 
@@ -83,7 +83,15 @@ public void release(String componentKey, Stream<DocumentId> releasedDocumentIds)
         if (!registeredComponents.contains(componentKey))
             throw new IllegalArgumentException("No component is registered for key " + componentKey);
         synchronized (releasedDocuments) {
-            releasedDocumentIds.forEach(d -> releasedDocuments.add(d));
+            releasedDocumentIds.forEach(d -> releasedDocuments.compute(d, (k, v) -> {
+                if (v == null) {
+                    Set<String> ret = new HashSet<>();
+                    ret.add(componentKey);
+                    return ret;
+                }
+                v.add(componentKey);
+                return v;
+            }));
         }
     }
 
@@ -100,9 +108,11 @@ public Set<DocumentId> getReleasedDocumentIds() {
         // Get all documents released by all components
         Set<DocumentId> returnedIds;
         synchronized (releasedDocuments) {
-            returnedIds = this.releasedDocuments.elementSet().stream().filter(e -> this.releasedDocuments.count(e) == getNumberOfRegisteredComponents()).collect(Collectors.toSet());
+            log.trace("The following {} components are registered for document release: {}", getNumberOfRegisteredComponents(), registeredComponents);
+            log.trace("Released document counts: {}", this.releasedDocuments);
+            returnedIds = this.releasedDocuments.keySet().stream().filter(k -> this.releasedDocuments.get(k).containsAll(this.registeredComponents)).collect(Collectors.toSet());
             // Remove the completely released documents from the pool of potentially not yet completely released documents.
-            returnedIds.forEach(id -> this.releasedDocuments.remove(id, Integer.MAX_VALUE));
+            returnedIds.forEach(id -> this.releasedDocuments.remove(id));
         }
         log.debug("Returning {} documents released by all registered components. {} document IDs remain that have not yet been released by all registered components.", returnedIds.size(), this.releasedDocuments.size());
         if (this.releasedDocuments.size() > lastwarning) {
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
index 204f07abb..a28c0a5c1 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
@@ -11,6 +11,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.File;
 import java.io.IOException;
 import java.nio.file.Path;
 import java.util.Arrays;
@@ -24,14 +25,19 @@ public class LuceneIndex implements StringIndex {
     public LuceneIndex(String indexDirectory) {
         try {
             Path lucene = Path.of(indexDirectory);
+            File directoryFile = lucene.toFile();
+            boolean indexExists = directoryFile.exists() && directoryFile.isDirectory() && directoryFile.list().length != 0;
             directory = NIOFSDirectory.open(lucene);
             // Do not open a writer to an existing index. This causes locking issues when starting multiple
             // pipelines in parallel.
             // Of course, the first pipeline still needs to create the index, so this must be a one-time effort
             // that has to be completed before the other pipelines are started.
-            if (!lucene.toFile().exists()) {
+            if (!indexExists) {
+                log.debug("Creating index writer for index directory {}.", indexDirectory);
                 IndexWriterConfig iwc = new IndexWriterConfig();
                 iw = new IndexWriter(directory, iwc);
+            } else {
+                log.debug("Index directory {} already");
             }
         } catch (IOException e) {
             log.error("could not initialize Lucene index", e);

From 273c876671e831f431f83f11d61cf2d15f599b07 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 29 Jul 2021 16:56:17 +0200
Subject: [PATCH 088/269] XMIDbWriter: Fixed a bug where old annotation values
 were not set to `null` in the DB table.

When in a former run an annotation value was created, e.g. because some named entities were found, and in the new run no entities were found, the old value was not removed. This is fixed now.
---
 .../jcore/reader/xmi/XmiDBMultiplier.java         |  5 +++--
 .../julielab/jcore/consumer/xmi/XMIDBWriter.java  |  2 +-
 .../jcore/consumer/xmi/XmiDataInserter.java       | 15 +++++++++------
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
index a29dcb8dd..cb0306216 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
@@ -90,8 +90,9 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                 populateCas(jCas);
             }
         } catch (Throwable throwable) {
-            log.error("Error while reading document from the database: ", throwable);
-            throw throwable;
+            log.error("Error while reading document from the database. Releasing the CAS. ", throwable);
+            jCas.release();
+            throw new AnalysisEngineProcessException(throwable);
         }
         return jCas;
     }
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index 3596db300..b9594dda3 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -737,7 +737,7 @@ private void createAnnotationModules() throws AnalysisEngineProcessException {
                 // adapt the map keys to table names (currently, the keys are the
                 // Java type names)
                 splitXmiData = convertModuleLabelsToColumnNames(splitXmiData);
-
+                log.trace("The following columns have XMI data: {}", splitXmiData.keySet());
 
                 for (String columnName : splitXmiData.keySet()) {
                     boolean isBaseDocumentColumn = columnName.equals(XmiSplitConstants.BASE_DOC_COLUMN);
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
index 080ffd613..1a75f474e 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
@@ -19,6 +19,7 @@
 import java.util.*;
 import java.util.function.Function;
 import java.util.stream.Collectors;
+import java.util.stream.Stream;
 
 public class XmiDataInserter {
 
@@ -65,16 +66,16 @@ public XmiDataInserter(Set<String> annotationModuleColumnNames,
      * @throws AnalysisEngineProcessException
      */
     public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> annotationModules, String subsetTableName, Boolean storeBaseDocument, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
-        if (log.isTraceEnabled()) {
-            log.trace("Sending XMI data for {} tables to the database", annotationModules.size());
-            log.trace("Sending {} XMI data items", annotationModules.size());
-        }
+        log.trace("Sending {} XMI data items", annotationModules.size());
         final Map<DocumentId, List<XmiData>> dataByDoc = annotationModules.stream().collect(Collectors.groupingBy(XmiData::getDocId));
         // Collect all document IDs we want to add something for into the database. This can be annotations or the hash.
-        final Set<DocumentId> documentIdsWithValues = shaMap != null ? Sets.union(dataByDoc.keySet(), shaMap.keySet()) : dataByDoc.keySet();
+         final Set<DocumentId> documentIdsWithData = shaMap != null ? Sets.union(dataByDoc.keySet(), shaMap.keySet()) : dataByDoc.keySet();
+        log.trace("There are {} documents with values to be updated in the database.", documentIdsWithData.size());
         class RowIterator implements Iterator<Map<String, Object>> {
 
-            private Iterator<DocumentId> docIdIterator = documentIdsWithValues.iterator();
+            // Add documents that have been processed but no data. We need to do this to override potentially existing
+            // annotation values with null to remove them.
+            private Iterator<DocumentId> docIdIterator = Stream.concat(documentIdsWithData.stream(), processedDocumentIds.stream()).distinct().iterator();
             private FieldConfig fieldConfig = dbc.getFieldConfiguration(schemaDocument);
             private List<Map<String, String>> fields = fieldConfig.getFields();
 
@@ -141,7 +142,9 @@ public Map<String, Object> next() {
                 // Set columns without a value to null to delete a potentially existing value.
                 if (updateMode) {
                     Set<String> annotationColumnsWithValues = dataList.stream().map(XmiData::getColumnName).collect(Collectors.toSet());
+                    log.trace("Annotation columns with values: {}", annotationColumnsWithValues);
                     final Sets.SetView<String> columnsWithoutValues = Sets.difference(annotationModuleColumnNames, annotationColumnsWithValues);
+                    log.trace("Annotation columns without values: {}", columnsWithoutValues);
                     columnsWithoutValues.forEach(col -> {
                         row.put(col, null);
                         log.trace("{}=null", col);

From d6dd6513cb4e99b727191cc2f74e2dc375876ae4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 20 Sep 2021 13:30:26 +0200
Subject: [PATCH 089/269] Support feature values for annotation adder. Fixes
 #124

---
 jcore-annotation-adder-ae/README.md           |  2 +-
 .../AnnotationAdderHelper.java                | 33 +++++++++++--
 .../TextAnnotationListAdder.java              |  3 +-
 .../annotationformat/AnnotationFormat.java    |  2 +
 .../DocumentClassAnnotationFormat.java        |  5 ++
 .../SimpleTSVEntityAnnotationFormat.java      | 21 +++++++-
 ...tyWithDocumentTextShaAnnotationFormat.java |  5 ++
 .../ExternalTextAnnotation.java               |  6 +++
 .../FileAnnotationSource.java                 |  3 +-
 .../InMemoryFileTextAnnotationProvider.java   |  3 ++
 .../AnnotationAdderAnnotatorTest.java         | 48 ++++++++++++++++++-
 .../AnnotationAdderHelperTest.java            | 24 ++++++++++
 .../SimpleTSVEntityAnnotationFormatTest.java  | 27 +++++++++++
 ...eannotations_character_offsets_payload.tsv |  4 ++
 14 files changed, 176 insertions(+), 10 deletions(-)
 create mode 100644 jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelperTest.java
 create mode 100644 jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java
 create mode 100644 jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets_payload.tsv

diff --git a/jcore-annotation-adder-ae/README.md b/jcore-annotation-adder-ae/README.md
index bf3d32b2c..cf0a558ff 100644
--- a/jcore-annotation-adder-ae/README.md
+++ b/jcore-annotation-adder-ae/README.md
@@ -28,7 +28,7 @@ For document class annotations, no offset mode is required, obviously. Whether t
 
 **3. External Resource Dependencies**
 
-This component requires an external resource given with the `AnnotationSource` key. This dependency definition is present in the provided default descriptor.
+This component requires an external resource given with the `AnnotationSource` key. This dependency definition is pre-configured in the provided default descriptor and must be added to point to the correct annotation source.
 
 The external dependency may currently be a file which is read completely into an in-memory map by the `de.julielab.jcore.ae.annotationadder.annotationsources.InMemoryFileTextAnnotationProvider` class for textual annotations with offsets or by the `de.julielab.jcore.ae.annotationadder.annotationsources.InMemoryFileDocumentClassAnnotationProvider` class for document classes. Both provider classes implement the required external resource interface `de.julielab.jcore.ae.annotationadder.annotationsources.AnnotationProvider`.
 
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
index 97a2d8447..a3c87e749 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
@@ -1,8 +1,10 @@
 package de.julielab.jcore.ae.annotationadder;
 
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.TextAnnotation;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.fit.util.JCasUtil;
@@ -11,10 +13,9 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -29,7 +30,10 @@ public class AnnotationAdderHelper {
     private Map<Sentence, List<Token>> tokensBySentences;
     private Matcher wsFinder = Pattern.compile("\\s").matcher("");
     private Matcher nonWsMatcher = Pattern.compile("[^\\s]+").matcher("");
-
+    /**
+     * Caches methods for feature
+     */
+    private Map<String, Method> featureSetters;
 
     public void setAnnotationOffsetsRelativeToDocument(Annotation annotation, TextAnnotation a, AnnotationAdderConfiguration configuration) throws CASException, AnnotationOffsetException {
         if (configuration.getOffsetMode() == AnnotationAdderAnnotator.OffsetMode.CHARACTER) {
@@ -140,4 +144,23 @@ public List<Token> createTokenList(JCas jCas, AnnotationAdderConfiguration confi
         }
         return tokenList;
     }
+
+    public void setAnnotationPayloadsToFeatures(Annotation annotation, ExternalTextAnnotation a) {
+        Collection<String> keys = a.getPayloadKeys();
+        if (!keys.isEmpty())
+            featureSetters = new HashMap<>();
+        try {
+            for (String key : keys) {
+                Object value = a.getPayload(key);
+                Method setter = featureSetters.get(key);
+                if (setter == null) {
+                    setter = annotation.getClass().getMethod("set" + StringUtils.capitalize(key), value.getClass());
+                    featureSetters.put(key, setter);
+                }
+                setter.invoke(annotation, value);
+            }
+        } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
+            e.printStackTrace();
+        }
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
index d249cf906..40436c2cb 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
@@ -43,7 +43,7 @@ public boolean addAnnotations(AnnotationData data, AnnotationAdderHelper helper,
                 throw new IllegalArgumentException("The entity annotation type " + uimaType + " does not exist in the type system.");
             try {
                 // The sha check is supposed to compare the document text on which the annotation was made with the
-                // document text the current CAS has. If the differ, the annotations will most likely have
+                // document text the current CAS has. If they differ, the annotations will most likely have
                 // offset discrepancies which is why they won't be added and a warning will be issued.
                 final String shaFromAnnotation = (String) a.getPayload("sha");
                 boolean shaMatches = true;
@@ -60,6 +60,7 @@ public boolean addAnnotations(AnnotationData data, AnnotationAdderHelper helper,
                     if (a.getStart() >= 0) {
                         final Annotation annotation = JCoReAnnotationTools.getAnnotationByClassName(jCas, uimaType);
                         helper.setAnnotationOffsetsRelativeToDocument(annotation, a, configuration);
+                        helper.setAnnotationPayloadsToFeatures(annotation, a);
                         annotation.addToIndexes();
                     } else {
                         log.trace("ExternalAnnotation for document {} has no entity offsets or offsets < 0, not adding anything to the CAS.", a.getDocumentId());
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
index cb28d7d9f..46d652dcf 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
@@ -4,4 +4,6 @@
 
 public interface AnnotationFormat<T extends AnnotationData> {
     T parse(String data);
+
+    void withHeader(boolean withHeader);
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
index 6376e803d..48f03d136 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
@@ -17,4 +17,9 @@ public ExternalDocumentClassAnnotation parse(String data) {
         String type = null;
         return new ExternalDocumentClassAnnotation(docId, documentClass, confidence, componentId);
     }
+
+    @Override
+    public void withHeader(boolean withHeader) {
+        // does nothing
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
index b35e4f26c..1a71edfcc 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
@@ -3,6 +3,9 @@
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
 
 public class SimpleTSVEntityAnnotationFormat implements AnnotationFormat<ExternalTextAnnotation> {
+    private String[] header;
+    private boolean withHeader;
+
     @Override
     public ExternalTextAnnotation parse(String data) {
         if (data == null || data.startsWith("#"))
@@ -10,12 +13,28 @@ public ExternalTextAnnotation parse(String data) {
         final String[] record = data.split("\t");
         if (record.length < 3)
             throw new IllegalArgumentException("Expected a 3 or 4-column format providing document ID, begin, end and UIMA type (optional if the default type is set to the AnnotationAdderAnnotator) for the annotation but got " + record.length + " columns: " + data);
+        if (withHeader && header == null) {
+            header = record;
+            return null;
+        }
         String docId = record[0];
         int begin = Integer.parseInt(record[1]);
         int end = Integer.parseInt(record[2]);
         String type = null;
         if (record.length > 3)
             type = record[3];
-        return new ExternalTextAnnotation(docId, begin, end, type);
+        ExternalTextAnnotation externalTextAnnotation = new ExternalTextAnnotation(docId, begin, end, type);
+        if (record.length > 4) {
+            if (header == null)
+                throw new IllegalStateException("There are columns exceeding the default 4-column format but no header was given to deliver their names.");
+            for (int i = 4; i < record.length; i++)
+                externalTextAnnotation.addPayload(header[i], record[i]);
+        }
+        return externalTextAnnotation;
+    }
+
+    @Override
+    public void withHeader(boolean withHeader) {
+        this.withHeader = withHeader;
     }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
index f46893595..9332a9d93 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
@@ -21,4 +21,9 @@ public ExternalTextAnnotation parse(String data) {
         externalTextAnnotation.addPayload("sha", sha);
         return externalTextAnnotation;
     }
+
+    @Override
+    public void withHeader(boolean withHeader) {
+        // does nothing
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
index bd1408f47..7c1dd7c03 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
@@ -1,5 +1,7 @@
 package de.julielab.jcore.ae.annotationadder.annotationrepresentations;
 
+import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -59,4 +61,8 @@ public void addPayload(String key, Object value) {
     public Object getPayload(String key) {
         return payload != null ? payload.get(key) : null;
     }
+
+    public Collection<String> getPayloadKeys() {
+        return payload != null ? payload.keySet() : Collections.emptySet();
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
index 4e6ba0a88..c2a4cb586 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
@@ -12,6 +12,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.Map;
+import java.util.Objects;
 import java.util.stream.Collectors;
 
 public class FileAnnotationSource<T extends AnnotationData> implements AnnotationSource<AnnotationList<T>> {
@@ -25,7 +26,7 @@ public FileAnnotationSource(AnnotationFormat<T> format) {
 
     public void loadAnnotations(File annotationfile) {
         try (BufferedReader br = FileUtilities.getReaderFromFile(annotationfile)) {
-            entitiesByDocId = br.lines().map(format::parse).collect(Collectors.groupingBy(AnnotationData::getDocumentId, Collectors.toCollection(AnnotationList::new)));
+            entitiesByDocId = br.lines().map(format::parse).filter(Objects::nonNull).collect(Collectors.groupingBy(AnnotationData::getDocumentId, Collectors.toCollection(AnnotationList::new)));
         } catch (IOException e) {
             e.printStackTrace();
         }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
index 6de11f4d3..411223e98 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
@@ -15,6 +15,7 @@
 
 public class InMemoryFileTextAnnotationProvider implements AnnotationProvider<AnnotationList> {
     public static final String PARAM_ANNOTATION_FORMAT = "AnnotationFormatClass";
+    public static final String PARAM_WITH_HEADER = "WithHeader";
     private final static Logger log = LoggerFactory.getLogger(InMemoryFileTextAnnotationProvider.class);
     private AnnotationSource<AnnotationList> annotationSource;
 
@@ -27,9 +28,11 @@ public AnnotationList<ExternalTextAnnotation> getAnnotations(String id) {
     public void load(DataResource dataResource) throws ResourceInitializationException {
         final ConfigurationParameterSettings parameterSettings = dataResource.getMetaData().getConfigurationParameterSettings();
         final String formatClassName = (String) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_ANNOTATION_FORMAT)).orElse(SimpleTSVEntityAnnotationFormat.class.getCanonicalName());
+        final boolean withHeader = (boolean) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_WITH_HEADER)).orElse(false);
         AnnotationFormat<ExternalTextAnnotation> format;
         try {
             format = (AnnotationFormat<ExternalTextAnnotation>) Class.forName(formatClassName).getDeclaredConstructor().newInstance();
+            format.withHeader(withHeader);
         } catch (NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException | ClassNotFoundException e) {
             log.error("Could not instantiate class {}", formatClassName);
             throw new ResourceInitializationException(e);
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
index a7f76f786..6aad5e94a 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
@@ -47,7 +47,52 @@ public void testCharacterOffsets() throws Exception {
         assertThat(genes.get(1).getBegin()).isEqualTo(5);
         assertThat(genes.get(1).getEnd()).isEqualTo(10);
 
-        // Test doc2 (no gene annotations)
+        // Test doc2 (no gene annotations, there will be a warning on DEBUG level)
+        jCas.reset();
+        jCas.setDocumentText("There are no gene mentions in here");
+        Header h2 = new Header(jCas);
+        h2.setDocId("doc2");
+        h2.addToIndexes();
+        engine.process(jCas);
+        assertThat(JCasUtil.exists(jCas, Gene.class)).isFalse();
+
+        // Test doc3 (one gene annotation)
+        jCas.reset();
+        jCas.setDocumentText("PRKAVI does not exist, I think. But this is just a test so it doesn't matter.");
+        Header h3 = new Header(jCas);
+        h3.setDocId("doc3");
+        h3.addToIndexes();
+        engine.process(jCas);
+        final Gene gene = JCasUtil.selectSingle(jCas, Gene.class);
+        assertThat(gene.getBegin()).isEqualTo(0);
+        assertThat(gene.getEnd()).isEqualTo(6);
+    }
+
+    @Test
+    public void testPayload() throws Exception {
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
+        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/geneannotations_character_offsets_payload.tsv"), InMemoryFileTextAnnotationProvider.PARAM_WITH_HEADER, true);
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine(AnnotationAdderAnnotator.class, AnnotationAdderAnnotator.KEY_ANNOTATION_SOURCE, externalResourceDescription);
+        // Test doc1 (two gene annotations)
+        jCas.setDocumentText("BRCA PRKII are the genes of this sentence.");
+        final Header h = new Header(jCas);
+        h.setDocId("doc1");
+        h.addToIndexes();
+
+        engine.process(jCas);
+
+        final List<Gene> genes = new ArrayList<>(JCasUtil.select(jCas, Gene.class));
+        assertThat(genes).hasSize(2);
+
+        assertThat(genes.get(0).getBegin()).isEqualTo(0);
+        assertThat(genes.get(0).getEnd()).isEqualTo(4);
+        assertThat(genes.get(0).getSpecificType()).isEqualTo("protein");
+
+        assertThat(genes.get(1).getBegin()).isEqualTo(5);
+        assertThat(genes.get(1).getEnd()).isEqualTo(10);
+        assertThat(genes.get(1).getSpecificType()).isEqualTo("dna");
+
+        // Test doc2 (no gene annotations, there will be a warning on DEBUG level)
         jCas.reset();
         jCas.setDocumentText("There are no gene mentions in here");
         Header h2 = new Header(jCas);
@@ -66,6 +111,7 @@ public void testCharacterOffsets() throws Exception {
         final Gene gene = JCasUtil.selectSingle(jCas, Gene.class);
         assertThat(gene.getBegin()).isEqualTo(0);
         assertThat(gene.getEnd()).isEqualTo(6);
+        assertThat(gene.getComponentId()).isEqualTo("GoldData");
     }
 
     @Test
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelperTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelperTest.java
new file mode 100644
index 000000000..bcb96ec08
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelperTest.java
@@ -0,0 +1,24 @@
+package de.julielab.jcore.ae.annotationadder;
+
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
+import de.julielab.jcore.types.Gene;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+class AnnotationAdderHelperTest {
+
+    @Test
+    void setAnnotationPayloadsToFeatures() throws UIMAException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        Gene gene = new Gene(jCas);
+        ExternalTextAnnotation extAnnotation = new ExternalTextAnnotation("1", 0, 1, "dummy");
+        extAnnotation.addPayload("specificType", "protein");
+        AnnotationAdderHelper helper = new AnnotationAdderHelper();
+        helper.setAnnotationPayloadsToFeatures(gene, extAnnotation);
+        assertEquals("protein", gene.getSpecificType());
+    }
+}
\ No newline at end of file
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java
new file mode 100644
index 000000000..eb646e0e2
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java
@@ -0,0 +1,27 @@
+package de.julielab.jcore.ae.annotationadder.annotationformat;
+
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+class SimpleTSVEntityAnnotationFormatTest {
+
+    @Test
+    void parse() {
+        SimpleTSVEntityAnnotationFormat format = new SimpleTSVEntityAnnotationFormat();
+        format.withHeader(true);
+        // should be ignored
+        assertNull(format.parse("# comment"));
+        // should be stored as header but not return something
+        assertNull(format.parse("docId\tbegin\tend\ttype\tspecificType\tcomponentId"));
+        ExternalTextAnnotation extAnnotation = format.parse("123\t0\t5\tde.julielab.jcore.types.Gene\tprotein\tGoldAnnotation");
+        assertEquals("123", extAnnotation.getDocumentId());
+        assertEquals(0, extAnnotation.getStart());
+        assertEquals(5, extAnnotation.getEnd());
+        assertEquals("de.julielab.jcore.types.Gene", extAnnotation.getUimaType());
+        assertEquals("protein", extAnnotation.getPayload("specificType"));
+        assertEquals("GoldAnnotation", extAnnotation.getPayload("componentId"));
+    }
+}
\ No newline at end of file
diff --git a/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets_payload.tsv b/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets_payload.tsv
new file mode 100644
index 000000000..7606678d6
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets_payload.tsv
@@ -0,0 +1,4 @@
+docId	begin	end	uimaType	specificType	componentId
+doc1	0	4	de.julielab.jcore.types.Gene	protein	GoldData
+doc1	5	10	de.julielab.jcore.types.Gene	dna	GoldData
+doc3	0	6	de.julielab.jcore.types.Gene	gene	GoldData
\ No newline at end of file

From 166e9d026c924fbc03ea091993a488b12b71f52a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 20 Sep 2021 13:46:06 +0200
Subject: [PATCH 090/269] Add the `WithHeader` parameter to the descriptor.

---
 .../ae/annotationadder/desc/jcore-annotation-adder-ae.xml  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
index 2a72b89f9..e30b428f9 100644
--- a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
+++ b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
@@ -79,6 +79,13 @@
                                 <multiValued>false</multiValued>
                                 <mandatory>false</mandatory>
                             </configurationParameter>
+                            <configurationParameter>
+                                <name>WithHeader</name>
+                                <description>Indicates whether the the input TSV file has a header line.</description>
+                                <type>Boolean</type>
+                                <multiValued>false</multiValued>
+                                <mandatory>false</mandatory>
+                            </configurationParameter>
                         </configurationParameters>
                         <configurationParameterSettings>
                             <nameValuePair>

From e58543db2db1e848a4b5924efeaaf431a9d95df5 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 23 Sep 2021 14:07:25 +0200
Subject: [PATCH 091/269] Allow the specification of column names in the
 descriptor.

Thus, it is not required to add the source file to provide the TSV header.
---
 .../AnnotationAdderAnnotator.java             |  2 +
 .../annotationformat/AnnotationFormat.java    |  4 +-
 .../DocumentClassAnnotationFormat.java        |  9 +++-
 .../SimpleTSVEntityAnnotationFormat.java      | 17 ++++---
 ...tyWithDocumentTextShaAnnotationFormat.java |  9 +++-
 .../InMemoryFileTextAnnotationProvider.java   | 11 ++--
 .../desc/jcore-annotation-adder-ae.xml        |  9 +++-
 .../AnnotationAdderAnnotatorTest.java         | 51 ++++++++++++++++++-
 .../SimpleTSVEntityAnnotationFormatTest.java  |  2 +-
 .../geneannotations_character_offsets.tsv     |  6 +--
 10 files changed, 99 insertions(+), 21 deletions(-)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
index b31fc7d05..802206a63 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
@@ -39,6 +39,7 @@ public enum OffsetMode {CHARACTER, TOKEN}
 	@ConfigurationParameter(name = PARAM_PREVENT_PROCESSED_MARK, mandatory = false, description = "This setting is only in effect if an input format is used that contains document text SHA256 digests while also writing the annotation results into a JeDIS database. If then a CAS document text, to which annotations should be added, does not match the digest given by an annotation, this CAS will not marked as being finished processing by DBCheckpointAE that may follow in the pipeline. The idea is that the mismatched documents require a reprocessing of the original annotation creation algorithm because their text has been changed relative to the annotation on file. By not setting the document as being finished processed, it is straightforward to process only those documents again that failed to add one or multiple annotations.")
     private boolean preventProcessedOnDigestMismatch;
 
+
     private List<AnnotationAdder> annotationAdders = Arrays.asList(new TextAnnotationListAdder(), new DocumentClassAnnotationAdder());
 
     /**
@@ -49,6 +50,7 @@ public enum OffsetMode {CHARACTER, TOKEN}
 	public void initialize(final UimaContext aContext) throws ResourceInitializationException {
         offsetMode = OffsetMode.valueOf(Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_OFFSET_MODE)).orElse(OffsetMode.CHARACTER.name()));
         defaultUimaType = (String) aContext.getConfigParameterValue(PARAM_DEFAULT_UIMA_TYPE);
+        preventProcessedOnDigestMismatch = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_PREVENT_PROCESSED_MARK)).orElse(false);
         try {
             annotationProvider = (AnnotationProvider<? extends AnnotationData>) aContext.getResourceObject(KEY_ANNOTATION_SOURCE);
         } catch (ResourceAccessException e) {
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
index 46d652dcf..a0c31a52f 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
@@ -5,5 +5,7 @@
 public interface AnnotationFormat<T extends AnnotationData> {
     T parse(String data);
 
-    void withHeader(boolean withHeader);
+    void hasHeader(boolean withHeader);
+
+    void setColumnNames(String[] header);
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
index 48f03d136..bc24816e3 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
@@ -19,7 +19,12 @@ public ExternalDocumentClassAnnotation parse(String data) {
     }
 
     @Override
-    public void withHeader(boolean withHeader) {
-        // does nothing
+    public void hasHeader(boolean withHeader) {
+        // does nothing right now
+    }
+
+    @Override
+    public void setColumnNames(String[] header) {
+        // does nothing right now
     }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
index 1a71edfcc..bee28da11 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
@@ -8,7 +8,7 @@ public class SimpleTSVEntityAnnotationFormat implements AnnotationFormat<Externa
 
     @Override
     public ExternalTextAnnotation parse(String data) {
-        if (data == null || data.startsWith("#"))
+            if (data == null || data.startsWith("#"))
             return null;
         final String[] record = data.split("\t");
         if (record.length < 3)
@@ -25,16 +25,21 @@ public ExternalTextAnnotation parse(String data) {
             type = record[3];
         ExternalTextAnnotation externalTextAnnotation = new ExternalTextAnnotation(docId, begin, end, type);
         if (record.length > 4) {
-            if (header == null)
-                throw new IllegalStateException("There are columns exceeding the default 4-column format but no header was given to deliver their names.");
-            for (int i = 4; i < record.length; i++)
-                externalTextAnnotation.addPayload(header[i], record[i]);
+            if (header != null) {
+                for (int i = 4; i < record.length; i++)
+                    externalTextAnnotation.addPayload(header[i], record[i]);
+            }
         }
         return externalTextAnnotation;
     }
 
     @Override
-    public void withHeader(boolean withHeader) {
+    public void hasHeader(boolean withHeader) {
         this.withHeader = withHeader;
     }
+
+    @Override
+    public void setColumnNames(String[] header) {
+        this.header = header;
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
index 9332a9d93..0c1c10824 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
@@ -23,7 +23,12 @@ public ExternalTextAnnotation parse(String data) {
     }
 
     @Override
-    public void withHeader(boolean withHeader) {
-        // does nothing
+    public void hasHeader(boolean withHeader) {
+        // does nothing right now
+    }
+
+    @Override
+    public void setColumnNames(String[] header) {
+        // does nothing right now
     }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
index 411223e98..1f6914340 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
@@ -15,9 +15,11 @@
 
 public class InMemoryFileTextAnnotationProvider implements AnnotationProvider<AnnotationList> {
     public static final String PARAM_ANNOTATION_FORMAT = "AnnotationFormatClass";
-    public static final String PARAM_WITH_HEADER = "WithHeader";
+    public static final String PARAM_INPUT_HAS_HEADER = "InputHasHeader";
+    public static final String PARAM_COLUMN_NAMES = "ColumnNames";
     private final static Logger log = LoggerFactory.getLogger(InMemoryFileTextAnnotationProvider.class);
     private AnnotationSource<AnnotationList> annotationSource;
+    private AnnotationFormat<ExternalTextAnnotation> format;
 
     @Override
     public AnnotationList<ExternalTextAnnotation> getAnnotations(String id) {
@@ -28,11 +30,12 @@ public AnnotationList<ExternalTextAnnotation> getAnnotations(String id) {
     public void load(DataResource dataResource) throws ResourceInitializationException {
         final ConfigurationParameterSettings parameterSettings = dataResource.getMetaData().getConfigurationParameterSettings();
         final String formatClassName = (String) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_ANNOTATION_FORMAT)).orElse(SimpleTSVEntityAnnotationFormat.class.getCanonicalName());
-        final boolean withHeader = (boolean) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_WITH_HEADER)).orElse(false);
-        AnnotationFormat<ExternalTextAnnotation> format;
+        final boolean hasHeader = (boolean) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_INPUT_HAS_HEADER)).orElse(false);
+        final String[] columnNames = (String[])parameterSettings.getParameterValue(PARAM_COLUMN_NAMES);
         try {
             format = (AnnotationFormat<ExternalTextAnnotation>) Class.forName(formatClassName).getDeclaredConstructor().newInstance();
-            format.withHeader(withHeader);
+            format.hasHeader(hasHeader);
+            format.setColumnNames(columnNames);
         } catch (NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException | ClassNotFoundException e) {
             log.error("Could not instantiate class {}", formatClassName);
             throw new ResourceInitializationException(e);
diff --git a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
index e30b428f9..71e138a6c 100644
--- a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
+++ b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
@@ -80,12 +80,19 @@
                                 <mandatory>false</mandatory>
                             </configurationParameter>
                             <configurationParameter>
-                                <name>WithHeader</name>
+                                <name>InputHasHeader</name>
                                 <description>Indicates whether the the input TSV file has a header line.</description>
                                 <type>Boolean</type>
                                 <multiValued>false</multiValued>
                                 <mandatory>false</mandatory>
                             </configurationParameter>
+                            <configurationParameter>
+                                <name>ColumnNames</name>
+                                <description>For column formats without a header. Required when the columns should be mapped to annotation type features. Then, the headers but correspond to the feature names and are case sensitive. When specified, the number of elements for this parameter must equal the number of columns in the input file. Then, the i-th parameter value will be set as the name of the i-th column.</description>
+                                <type>String</type>
+                                <multiValued>true</multiValued>
+                                <mandatory>false</mandatory>
+                            </configurationParameter>
                         </configurationParameters>
                         <configurationParameterSettings>
                             <nameValuePair>
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
index 6aad5e94a..83f2aa54d 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
@@ -71,7 +71,7 @@ public void testCharacterOffsets() throws Exception {
     @Test
     public void testPayload() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
-        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/geneannotations_character_offsets_payload.tsv"), InMemoryFileTextAnnotationProvider.PARAM_WITH_HEADER, true);
+        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/geneannotations_character_offsets_payload.tsv"), InMemoryFileTextAnnotationProvider.PARAM_INPUT_HAS_HEADER, true);
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(AnnotationAdderAnnotator.class, AnnotationAdderAnnotator.KEY_ANNOTATION_SOURCE, externalResourceDescription);
         // Test doc1 (two gene annotations)
         jCas.setDocumentText("BRCA PRKII are the genes of this sentence.");
@@ -114,6 +114,55 @@ public void testPayload() throws Exception {
         assertThat(gene.getComponentId()).isEqualTo("GoldData");
     }
 
+    @Test
+    public void testHeaderParameter() throws Exception {
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
+        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/geneannotations_character_offsets.tsv"), InMemoryFileTextAnnotationProvider.PARAM_COLUMN_NAMES, new String[]{"docId", "begin", "end", "uimaType", "specificType", "componentId"});
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine(AnnotationAdderAnnotator.class, AnnotationAdderAnnotator.KEY_ANNOTATION_SOURCE, externalResourceDescription);
+        // Test doc1 (two gene annotations)
+        jCas.setDocumentText("BRCA PRKII are the genes of this sentence.");
+        final Header h = new Header(jCas);
+        h.setDocId("doc1");
+        h.addToIndexes();
+
+        engine.process(jCas);
+
+        final List<Gene> genes = new ArrayList<>(JCasUtil.select(jCas, Gene.class));
+        assertThat(genes).hasSize(2);
+
+        assertThat(genes.get(0).getBegin()).isEqualTo(0);
+        assertThat(genes.get(0).getEnd()).isEqualTo(4);
+        assertThat(genes.get(0).getSpecificType()).isEqualTo("additionalColumn1");
+        assertThat(genes.get(0).getComponentId()).isEqualTo("additionalColumn2");
+
+        assertThat(genes.get(1).getBegin()).isEqualTo(5);
+        assertThat(genes.get(1).getEnd()).isEqualTo(10);
+        assertThat(genes.get(1).getSpecificType()).isEqualTo("additionalColumn1");
+        assertThat(genes.get(1).getComponentId()).isEqualTo("additionalColumn2");
+
+        // Test doc2 (no gene annotations, there will be a warning on DEBUG level)
+        jCas.reset();
+        jCas.setDocumentText("There are no gene mentions in here");
+        Header h2 = new Header(jCas);
+        h2.setDocId("doc2");
+        h2.addToIndexes();
+        engine.process(jCas);
+        assertThat(JCasUtil.exists(jCas, Gene.class)).isFalse();
+
+        // Test doc3 (one gene annotation)
+        jCas.reset();
+        jCas.setDocumentText("PRKAVI does not exist, I think. But this is just a test so it doesn't matter.");
+        Header h3 = new Header(jCas);
+        h3.setDocId("doc3");
+        h3.addToIndexes();
+        engine.process(jCas);
+        final Gene gene = JCasUtil.selectSingle(jCas, Gene.class);
+        assertThat(gene.getBegin()).isEqualTo(0);
+        assertThat(gene.getEnd()).isEqualTo(6);
+        assertThat(gene.getSpecificType()).isEqualTo("additionalColumn1");
+        assertThat(gene.getComponentId()).isEqualTo("additionalColumn2");
+    }
+
     @Test
     public void testTokenOffsets() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java
index eb646e0e2..848526c03 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormatTest.java
@@ -11,7 +11,7 @@ class SimpleTSVEntityAnnotationFormatTest {
     @Test
     void parse() {
         SimpleTSVEntityAnnotationFormat format = new SimpleTSVEntityAnnotationFormat();
-        format.withHeader(true);
+        format.hasHeader(true);
         // should be ignored
         assertNull(format.parse("# comment"));
         // should be stored as header but not return something
diff --git a/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv b/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv
index a3b4799ab..33babd2dc 100644
--- a/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv
+++ b/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv
@@ -1,3 +1,3 @@
-doc1	0	4	de.julielab.jcore.types.Gene
-doc1	5	10	de.julielab.jcore.types.Gene
-doc3	0	6	de.julielab.jcore.types.Gene
\ No newline at end of file
+doc1	0	4	de.julielab.jcore.types.Gene	additionalColumn1	additionalColumn2
+doc1	5	10	de.julielab.jcore.types.Gene	additionalColumn1	additionalColumn2
+doc3	0	6	de.julielab.jcore.types.Gene	additionalColumn1	additionalColumn2
\ No newline at end of file

From 0aa2bd92c1d5a16f6422998bdb5ea7fc0d936ea6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 23 Sep 2021 18:49:28 +0200
Subject: [PATCH 092/269] Add logger messages in error cases.

---
 .../acronymtagger/main/AcronymAnnotator.java  | 19 +++++++------------
 .../consumer/acronyms/AcronymWriter.java      |  8 +++++++-
 .../FileAnnotationSource.java                 | 10 +++++-----
 .../desc/jcore-annotation-adder-ae.xml        |  2 +-
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java b/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java
index ad7877e80..3bb8fff9e 100644
--- a/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java
+++ b/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java
@@ -158,12 +158,9 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 
 			LOGGER.info(" done");
 
-		} catch (AnnotatorContextException e) {
-			throw new ResourceInitializationException();
-		} catch (AnnotatorConfigurationException e) {
-			throw new ResourceInitializationException();
-		} catch (ResourceProcessException e) {
-			throw new ResourceInitializationException();
+		} catch (AnnotatorContextException | AnnotatorConfigurationException| ResourceProcessException e) {
+			LOGGER.error("Could not initialize acronym annotator", e);
+			throw new ResourceInitializationException(e);
 		}
 
 	}
@@ -242,14 +239,16 @@ public void process(JCas aJCas) {
 				ConsistencyAnnotator ca = new ConsistencyAnnotator();
 				ca.consistencyAnnotate(aJCas);
 			}
-			
+
 			if (postprocessing) {
 				Postprocessing.doPostprocessing(aJCas);
 			}
-			
+
 
 		} catch (StringIndexOutOfBoundsException e) {
 			LOGGER.error("typical Error in AcronymAnnotator.process() : StringIndexOutOfBounds");
+		} catch (Throwable t) {
+			LOGGER.error("Acronym resolution error: ", t);
 		}
 	}
 
@@ -557,10 +556,6 @@ private int findFullformStart(String potFF, String acro) {
 	/**
 	 * looks for the 'best' position in the sentence to start looking for a fullform
 	 * 
-	 * @param sentence
-	 * @param acroStart
-	 * @param maxTokens
-	 * @return
 	 */
 	private int getPotFullformStart(String sentence, int acroStart, int acroLength) {
 
diff --git a/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java b/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java
index b1aabca29..a406021b9 100644
--- a/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java
+++ b/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java
@@ -15,6 +15,8 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.File;
 import java.io.IOException;
@@ -24,7 +26,7 @@
 
 @ResourceMetaData(name = "JCoRe Acronym Writer", description = "Writes acronym annotation to a text file.")
 public class AcronymWriter extends JCasAnnotator_ImplBase {
-
+private final static Logger log = LoggerFactory.getLogger(AcronymWriter.class);
 	public static final String PARAM_OUTPUTFILE = "OutputFile";
 
 	@ConfigurationParameter(name = PARAM_OUTPUTFILE)
@@ -38,6 +40,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 		try {
 			os = FileUtilities.getOutputStreamToFile(new File(outputFile));
 		} catch (IOException e) {
+			log.error("Could not initialize acronym writer", e);
 			throw new ResourceInitializationException(e);
 		}
 	}
@@ -70,7 +73,10 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
 				++abbrCount;
 			}
 		} catch (CASRuntimeException | IOException e) {
+			log.error("Exception while writing acronyms", e);
 			throw new AnalysisEngineProcessException(e);
+		} catch (Throwable t) {
+			log.error("Exception while writing acronyms", t);
 		}
 	}
 
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
index c2a4cb586..69958d586 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
@@ -1,6 +1,6 @@
 package de.julielab.jcore.ae.annotationadder.annotationsources;
 
-import de.julielab.java.utilities.FileUtilities;
+import de.julielab.java.utilities.UriUtilities;
 import de.julielab.jcore.ae.annotationadder.annotationformat.AnnotationFormat;
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationData;
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationList;
@@ -9,8 +9,8 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
-import java.io.File;
 import java.io.IOException;
+import java.net.URI;
 import java.util.Map;
 import java.util.Objects;
 import java.util.stream.Collectors;
@@ -24,8 +24,8 @@ public FileAnnotationSource(AnnotationFormat<T> format) {
         this.format = format;
     }
 
-    public void loadAnnotations(File annotationfile) {
-        try (BufferedReader br = FileUtilities.getReaderFromFile(annotationfile)) {
+    private void loadAnnotations(URI annotationUri) {
+        try (BufferedReader br = UriUtilities.getReaderFromUri(annotationUri)) {
             entitiesByDocId = br.lines().map(format::parse).filter(Objects::nonNull).collect(Collectors.groupingBy(AnnotationData::getDocumentId, Collectors.toCollection(AnnotationList::new)));
         } catch (IOException e) {
             e.printStackTrace();
@@ -35,7 +35,7 @@ public void loadAnnotations(File annotationfile) {
     @Override
     public void initialize(DataResource dataResource) {
         log.info("Loading entity annotations from {}", dataResource.getUri());
-        loadAnnotations(new File(dataResource.getUri()));
+        loadAnnotations(dataResource.getUri());
     }
 
     @Override
diff --git a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
index 71e138a6c..20ea1f3d1 100644
--- a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
+++ b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
@@ -88,7 +88,7 @@
                             </configurationParameter>
                             <configurationParameter>
                                 <name>ColumnNames</name>
-                                <description>For column formats without a header. Required when the columns should be mapped to annotation type features. Then, the headers but correspond to the feature names and are case sensitive. When specified, the number of elements for this parameter must equal the number of columns in the input file. Then, the i-th parameter value will be set as the name of the i-th column.</description>
+                                <description>For column formats without a header. Required when the columns should be mapped to annotation type features. Then, the headers must correspond to the feature names and are case sensitive. When specified, the number of elements for this parameter must equal the number of columns in the input file. Then, the i-th parameter value will be set as the name of the i-th column.</description>
                                 <type>String</type>
                                 <multiValued>true</multiValued>
                                 <mandatory>false</mandatory>

From 88f20530cf5a152c8b4171cf4814b923a36078cf Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 24 Sep 2021 11:02:52 +0200
Subject: [PATCH 093/269] Add more logging in error cases.

---
 .../julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java  | 2 +-
 .../java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java b/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java
index 3bb8fff9e..a8e588af9 100644
--- a/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java
+++ b/jcore-acronym-ae/src/main/java/de/julielab/jcore/ae/acronymtagger/main/AcronymAnnotator.java
@@ -234,7 +234,7 @@ public void process(JCas aJCas) {
 				annotate(sentenceText, aJCas, sentence.getBegin());
 			}
 
-			// if extra annotation is whished, do so :-)
+			// if extra annotation is wished, do so :-)
 			if (consistencyAnno) {
 				ConsistencyAnnotator ca = new ConsistencyAnnotator();
 				ca.consistencyAnnotate(aJCas);
diff --git a/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java b/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java
index a406021b9..ddc1ba416 100644
--- a/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java
+++ b/jcore-acronym-writer/src/main/java/de/julielab/jcore/consumer/acronyms/AcronymWriter.java
@@ -43,10 +43,12 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 			log.error("Could not initialize acronym writer", e);
 			throw new ResourceInitializationException(e);
 		}
+		log.trace("AcronymWriter successfully initialized.");
 	}
 
 	@Override
 	public void process(JCas jcas) throws AnalysisEngineProcessException {
+		log.trace("Processing with AcronymWriter");
 		try {
 			String pubmedId = JCoReTools.getDocId(jcas);
 			FSIterator<Annotation> it = jcas.getAnnotationIndex(Abbreviation.type).iterator();

From e86a2ad1f6ce4d4efa20a765fad34a5be00314bc Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 24 Sep 2021 12:14:13 +0200
Subject: [PATCH 094/269] Make removal of file name extension for docId
 optional in FileReader.

---
 .../AnnotationAdderAnnotator.java             | 38 +++++++++++--------
 .../AnnotationAdderAnnotatorTest.java         | 19 ++++++++++
 .../jcore/reader/file/main/FileReader.java    | 11 ++++--
 3 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
index 802206a63..00245937d 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
@@ -6,6 +6,7 @@
 import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ExternalResource;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
@@ -67,23 +68,28 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
 	 * is where the actual work happens.
 	 */
 	@Override
-	public void process(final JCas aJCas) {
-        final String docId = JCoReTools.getDocId(aJCas);
-        if (docId == null)
-            log.error("The current document does not have a header. Cannot add external annotations.");
-        final AnnotationData annotations = annotationProvider.getAnnotations(docId);
-        final AnnotationAdderHelper helper = new AnnotationAdderHelper();
-        if (annotations != null) {
-            boolean success = false;
-            int adderNum = 0;
-            // We are now iterating through the available annotation adders for the one that handles the obtained annotation data
-            while (adderNum < annotationAdders.size() && !(success = annotationAdders.get(adderNum).addAnnotations(annotations, helper, adderConfiguration, aJCas, preventProcessedOnDigestMismatch))) {
-                ++adderNum;
+	public void process(final JCas aJCas) throws AnalysisEngineProcessException {
+        try {
+            final String docId = JCoReTools.getDocId(aJCas);
+            if (docId == null)
+                log.error("The current document does not have a header. Cannot add external annotations.");
+            final AnnotationData annotations = annotationProvider.getAnnotations(docId);
+            final AnnotationAdderHelper helper = new AnnotationAdderHelper();
+            if (annotations != null) {
+                boolean success = false;
+                int adderNum = 0;
+                // We are now iterating through the available annotation adders for the one that handles the obtained annotation data
+                while (adderNum < annotationAdders.size() && !(success = annotationAdders.get(adderNum).addAnnotations(annotations, helper, adderConfiguration, aJCas, preventProcessedOnDigestMismatch))) {
+                    ++adderNum;
+                }
+                if (!success)
+                    throw new IllegalArgumentException("There was no annotation adder to handle the annotation data of class " + annotations.getClass().getCanonicalName());
+            } else {
+                log.debug("No external annotations were delivered for document ID {}", docId);
             }
-            if (!success)
-                throw new IllegalArgumentException("There was no annotation adder to handle the annotation data of class " + annotations.getClass().getCanonicalName());
-        } else {
-            log.debug("No external annotations were delivered for document ID {}", docId);
+        } catch (Throwable t) {
+            log.error("Could not add annotations due to exception.", t);
+            throw new AnalysisEngineProcessException(t);
         }
     }
 
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
index 83f2aa54d..48ee699e7 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
@@ -68,6 +68,25 @@ public void testCharacterOffsets() throws Exception {
         assertThat(gene.getEnd()).isEqualTo(6);
     }
 
+    @Test
+    public void testCharacterOffsets2() throws Exception {
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
+        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/test.txt"));
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine(AnnotationAdderAnnotator.class, AnnotationAdderAnnotator.KEY_ANNOTATION_SOURCE, externalResourceDescription);
+        // Test doc1 (two gene annotations)
+        jCas.setDocumentText("BRCA PRKII are the genes of this sentence.");
+        final Header h = new Header(jCas);
+        h.setDocId("10022127.txt");
+        h.addToIndexes();
+
+        engine.process(jCas);
+
+        final List<Gene> genes = new ArrayList<>(JCasUtil.select(jCas, Gene.class));
+        for (Gene g : genes) {
+            System.out.println(g);
+        }
+    }
+
     @Test
     public void testPayload() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
diff --git a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
index dee16f1d7..3ea69e29e 100644
--- a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
+++ b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
@@ -86,6 +86,8 @@ public class FileReader extends CollectionReader_ImplBase {
      */
     public static final String ORIG_FILES_EXT = "OriginalFileExt";
 
+    public static final String REMOVE_FILE_NAME_EXTENSION_FOR_DOC_ID = "RemoveFileNameExtensionForDocId";
+
     private ArrayList<File> files;
 
     private int fileIndex;
@@ -110,6 +112,8 @@ public class FileReader extends CollectionReader_ImplBase {
     private File origFolder;
     @ConfigurationParameter(name = ORIG_FILES_EXT, mandatory = false)
     private String origFileExt;
+    @ConfigurationParameter(name = REMOVE_FILE_NAME_EXTENSION_FOR_DOC_ID, mandatory = false, defaultValue = "true")
+    private boolean removeFileNameExtensionForDocId;
 
     /**
      * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
@@ -149,6 +153,7 @@ public void initialize() throws ResourceInitializationException {
         } else {
             useFilenameAsDocId = filenameAsDocId;
         }
+        removeFileNameExtensionForDocId = Optional.ofNullable((Boolean) getConfigParameterValue(REMOVE_FILE_NAME_EXTENSION_FOR_DOC_ID)).orElse(true);
 
         allowedExtensionsArray = (String[]) getConfigParameterValue(ALLOWED_FILE_EXTENSIONS);
         final Set<String> allowedExtensions = new HashSet<>();
@@ -225,7 +230,7 @@ public void getNext(CAS aCAS) throws IOException, CollectionException {
 
         String origText = null;
         if (origFolder != null) {
-            File origFile = new File(origFolder, getFileName(file) + "." + origFileExt);
+            File origFile = new File(origFolder, getFileName(file, true) + "." + origFileExt);
             origText = IOStreamUtilities.getStringFromInputStream(FileUtilities.getInputStreamFromFile(origFile));
         }
 
@@ -329,7 +334,7 @@ public void getNext(CAS aCAS) throws IOException, CollectionException {
 
         if (useFilenameAsDocId) {
 
-            String filename = getFileName(file);
+            String filename = getFileName(file, removeFileNameExtensionForDocId);
 
             Header header = new Header(jcas);
 
@@ -415,7 +420,7 @@ private void createFileListByType(File inputDirectory, final Set<String> allowed
                 .forEach(files::add);
     }
 
-    private String getFileName(File fi) {
+    private String getFileName(File fi, boolean removeExtension) {
         String filename = fi.getName();
         int extDotIndex = filename.lastIndexOf('.');
         if (extDotIndex > 0) {

From 65317026804e73a80ebb6180d2aff6a168aa9e7f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 27 Sep 2021 09:44:12 +0200
Subject: [PATCH 095/269] Add the RemoveFileNameExtensionForDocId parameter to
 the FileReader descriptor.

---
 .../jcore/reader/file/desc/jcore-file-reader.xml   | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
index f5b30ff00..bda1bb0e5 100644
--- a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
+++ b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
@@ -74,6 +74,12 @@
         <multiValued>false</multiValued>
         <mandatory>false</mandatory>
       </configurationParameter>
+      <configurationParameter>
+          <name>RemoveFileNameExtensionForDocId</name>
+          <type>Boolean</type>
+          <multiValued>false</multiValued>
+          <mandatory>false</mandatory>
+      </configurationParameter>
     </configurationParameters>
     <configurationParameterSettings>
       <nameValuePair>
@@ -118,11 +124,17 @@
           <string>txt</string>
         </value>
       </nameValuePair>
+      <nameValuePair>
+          <name>RemoveFileNameExtensionForDocId</name>
+          <value>
+              <boolean>true</boolean>
+          </value>
+      </nameValuePair>
     </configurationParameterSettings>
     <typeSystemDescription>
       <imports>
         <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-      <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+        <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
       </imports>
     </typeSystemDescription>
     <typePriorities />

From 52c2aa3e01cc9501b7259e8d24515ff50676aa45 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 27 Sep 2021 10:57:11 +0200
Subject: [PATCH 096/269] Set XMIWriter log message "Wrote file ..." to debug
 level.

---
 .../java/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-xmi-writer/src/main/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.java b/jcore-xmi-writer/src/main/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.java
index 6a33348dd..4762f809e 100644
--- a/jcore-xmi-writer/src/main/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.java
+++ b/jcore-xmi-writer/src/main/java/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.java
@@ -295,7 +295,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
 		String fileName = outFileName.toString();
 		try {
 			writeXmi(jcas.getCas(), fileName);
-			LOGGER.info(" Wrote file " + fileName);
+			LOGGER.debug(" Wrote file " + fileName);
 		} catch (IOException e) {
 			try {
 				throw new ResourceProcessException(e);

From c92516f7258dc607a45fe0cf7069c7c9f5a8fa48 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 27 Sep 2021 10:59:08 +0200
Subject: [PATCH 097/269] Make actual use of the "removeExtension" parameter in
 FileReader.

---
 .../jcore/reader/file/main/FileReader.java    | 248 +++++++++---------
 1 file changed, 129 insertions(+), 119 deletions(-)

diff --git a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
index 3ea69e29e..564ec30f0 100644
--- a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
+++ b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
@@ -25,7 +25,6 @@
 import de.julielab.jcore.types.pubmed.Header;
 import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
 import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.CASException;
 import org.apache.uima.collection.CollectionException;
 import org.apache.uima.collection.CollectionReader_ImplBase;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
@@ -33,6 +32,8 @@
 import org.apache.uima.resource.ResourceInitializationException;
 import org.apache.uima.util.Progress;
 import org.apache.uima.util.ProgressImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.*;
 import java.nio.file.FileVisitOption;
@@ -44,7 +45,6 @@
 import java.util.stream.Stream;
 
 public class FileReader extends CollectionReader_ImplBase {
-
     /**
      *
      */
@@ -85,9 +85,8 @@ public class FileReader extends CollectionReader_ImplBase {
      *
      */
     public static final String ORIG_FILES_EXT = "OriginalFileExt";
-
     public static final String REMOVE_FILE_NAME_EXTENSION_FOR_DOC_ID = "RemoveFileNameExtensionForDocId";
-
+    private final static Logger log = LoggerFactory.getLogger(FileReader.class);
     private ArrayList<File> files;
 
     private int fileIndex;
@@ -213,138 +212,143 @@ public boolean hasNext() {
      * @see org.apache.uima.collection.CollectionReader#getNext(org.apache.uima.cas.CAS)
      */
     @Override
-    public void getNext(CAS aCAS) throws IOException, CollectionException {
-        JCas jcas;
+    public void getNext(CAS aCAS) throws CollectionException {
+        log.trace("Reading next file, if present");
+        File file = null;
         try {
-            jcas = aCAS.getJCas();
-        } catch (CASException e) {
-            throw new CollectionException(e);
-        }
+            JCas jcas = aCAS.getJCas();
 
-        // open input stream to file
-        File file = files.get(fileIndex++);
+            // open input stream to file
+            file = files.get(fileIndex++);
+            log.trace("Got next file: {}", file);
 
-        String text = IOStreamUtilities.getStringFromInputStream(FileUtilities.getInputStreamFromFile(file));
+            String text = IOStreamUtilities.getStringFromInputStream(FileUtilities.getInputStreamFromFile(file));
 
-        Pattern nws = Pattern.compile("[^\\s]+", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
+            Pattern nws = Pattern.compile("[^\\s]+", Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
 
-        String origText = null;
-        if (origFolder != null) {
-            File origFile = new File(origFolder, getFileName(file, true) + "." + origFileExt);
-            origText = IOStreamUtilities.getStringFromInputStream(FileUtilities.getInputStreamFromFile(origFile));
-        }
+            String origText = null;
+            if (origFolder != null) {
+                File origFile = new File(origFolder, getFileName(file, true) + "." + origFileExt);
+                origText = IOStreamUtilities.getStringFromInputStream(FileUtilities.getInputStreamFromFile(origFile));
+            }
 
-        // sentence per line mode
-        if (sentencePerLine) {
-            BufferedReader rdr = new BufferedReader(new StringReader(text));
-            List<String> lines = new ArrayList<String>();
-            List<Integer> start = new ArrayList<Integer>();
-            List<Integer> end = new ArrayList<Integer>();
-            Integer tmp = 0;
-            String line;
-            while ((line = rdr.readLine()) != null) {
-                if (!Pattern.matches("\\s*", line)) {
-                    lines.add(line);
-                    start.add(tmp);
-                    end.add(tmp + line.length());
+            // sentence per line mode
+            if (sentencePerLine) {
+                log.trace("Reading input file as one sentence per line.");
+                BufferedReader rdr = new BufferedReader(new StringReader(text));
+                List<String> lines = new ArrayList<String>();
+                List<Integer> start = new ArrayList<Integer>();
+                List<Integer> end = new ArrayList<Integer>();
+                Integer tmp = 0;
+                String line;
+                while ((line = rdr.readLine()) != null) {
+                    if (!Pattern.matches("\\s*", line)) {
+                        lines.add(line);
+                        start.add(tmp);
+                        end.add(tmp + line.length());
+                    }
+                    tmp += (line.length() + 1);
                 }
-                tmp += (line.length() + 1);
-            }
-            rdr.close();
-
-            int index_tmp = 0;
-            Optional<String> newLine;
-            for (Integer i = 0; i < lines.size(); i++) {
-                boolean addSent2index = true;
-                Sentence sent = new Sentence(jcas);
-                if (origText != null) {
-                    newLine = Stream
-                            .of(lines.get(i).split("\\s+"))
-                            .map(x -> Pattern.quote(x))
-                            .reduce((x, y) -> x + "\\s*" + y);
-                    Pattern p = Pattern.compile(newLine.get(), Pattern.UNICODE_CHARACTER_CLASS);
-                    Matcher m = p.matcher(origText);
-                    if (m.find(index_tmp)) {
-                        int newStart = m.start();
-                        int newEnd = m.end();
-                        index_tmp = m.end() + 1;
-                        sent.setBegin(newStart);
-                        sent.setEnd(newEnd);
+                rdr.close();
+
+                int index_tmp = 0;
+                Optional<String> newLine;
+                for (Integer i = 0; i < lines.size(); i++) {
+                    boolean addSent2index = true;
+                    Sentence sent = new Sentence(jcas);
+                    if (origText != null) {
+                        newLine = Stream
+                                .of(lines.get(i).split("\\s+"))
+                                .map(x -> Pattern.quote(x))
+                                .reduce((x, y) -> x + "\\s*" + y);
+                        Pattern p = Pattern.compile(newLine.get(), Pattern.UNICODE_CHARACTER_CLASS);
+                        Matcher m = p.matcher(origText);
+                        if (m.find(index_tmp)) {
+                            int newStart = m.start();
+                            int newEnd = m.end();
+                            index_tmp = m.end() + 1;
+                            sent.setBegin(newStart);
+                            sent.setEnd(newEnd);
+                        } else {
+                            addSent2index = false;
+                        }
                     } else {
-                        addSent2index = false;
+                        sent.setBegin(start.get(i));
+                        sent.setEnd(end.get(i));
+                    }
+                    sent.setComponentId(this.getClass().getName() + " : Sentence per Line Mode");
+                    if (addSent2index) {
+                        sent.addToIndexes();
                     }
-                } else {
-                    sent.setBegin(start.get(i));
-                    sent.setEnd(end.get(i));
-                }
-                sent.setComponentId(this.getClass().getName() + " : Sentence per Line Mode");
-                if (addSent2index) {
-                    sent.addToIndexes();
                 }
             }
-        }
-        //token by token mode
-        if (tokenByToken) {
-            List<String> tokensList = new ArrayList<>();
-            List<Integer> tokStart = new ArrayList<>();
-            List<Integer> tokEnd = new ArrayList<>();
-
-
-            Integer numberOfTokens = 0;
-            Matcher m = nws.matcher(text);
-            while (m.find()) {
-                String token = m.group();
-                int start = m.start();
-                int end = m.end();
-                tokensList.add(token);
-                tokStart.add(start);
-                tokEnd.add(end);
-                numberOfTokens++;
-            }
+            //token by token mode
+            if (tokenByToken) {
+                log.trace("Reading input file as tokenized text with whitespace as token separator.");
+                List<String> tokensList = new ArrayList<>();
+                List<Integer> tokStart = new ArrayList<>();
+                List<Integer> tokEnd = new ArrayList<>();
+
+
+                Integer numberOfTokens = 0;
+                Matcher m = nws.matcher(text);
+                while (m.find()) {
+                    String token = m.group();
+                    int start = m.start();
+                    int end = m.end();
+                    tokensList.add(token);
+                    tokStart.add(start);
+                    tokEnd.add(end);
+                    numberOfTokens++;
+                }
 
 
-            int index_tmp = 0;
-            for (Integer j = 0; j < tokensList.size(); j++) {
-                boolean addToken2index = true;
-                Token token = new Token(jcas);
-                if (origText != null) {
-                    String tok = tokensList.get(j);
-                    int newStart = origText.indexOf(tok, index_tmp);
-                    int newEnd = newStart + tok.length();
-                    index_tmp = newEnd;
-                    token.setBegin(newStart);
-                    token.setEnd(newEnd);
-                } else {
-                    token.setBegin(tokStart.get(j));
-                    token.setEnd(tokEnd.get(j));
-                }
-                token.setComponentId(this.getClass().getName() + " : Tokenized Mode");
-                if (addToken2index) {
-                    token.addToIndexes();
+                int index_tmp = 0;
+                for (Integer j = 0; j < tokensList.size(); j++) {
+                    boolean addToken2index = true;
+                    Token token = new Token(jcas);
+                    if (origText != null) {
+                        String tok = tokensList.get(j);
+                        int newStart = origText.indexOf(tok, index_tmp);
+                        int newEnd = newStart + tok.length();
+                        index_tmp = newEnd;
+                        token.setBegin(newStart);
+                        token.setEnd(newEnd);
+                    } else {
+                        token.setBegin(tokStart.get(j));
+                        token.setEnd(tokEnd.get(j));
+                    }
+                    token.setComponentId(this.getClass().getName() + " : Tokenized Mode");
+                    if (addToken2index) {
+                        token.addToIndexes();
+                    }
                 }
             }
-        }
 
-        // put document in CAS
-        if (origText != null) {
-            jcas.setDocumentText(origText);
-        } else {
-            jcas.setDocumentText(text);
-        }
-
-        if (useFilenameAsDocId) {
+            // put document in CAS
+            if (origText != null) {
+                jcas.setDocumentText(origText);
+            } else {
+                jcas.setDocumentText(text);
+            }
 
-            String filename = getFileName(file, removeFileNameExtensionForDocId);
+            if (useFilenameAsDocId) {
+                String filename = getFileName(file, removeFileNameExtensionForDocId);
+                log.trace("Setting the file name {} as docId to a new Header annotation.", filename);
 
-            Header header = new Header(jcas);
+                Header header = new Header(jcas);
 
-            // set ID
-            header.setDocId(filename);
+                // set ID
+                header.setDocId(filename);
 
-            // set publication date
-            addDateForID(header, jcas, filename);
+                // set publication date
+                addDateForID(header, jcas, filename);
 
-            header.addToIndexes();
+                header.addToIndexes();
+            }
+        } catch (Throwable t) {
+            log.error("Could not read file {}", file, t);
+            throw new CollectionException(t);
         }
     }
 
@@ -414,7 +418,11 @@ public Progress[] getProgress() {
 
     private void createFileListByType(File inputDirectory, final Set<String> allowedExtensions) throws IOException {
         Files.walk(inputDirectory.toPath(), useSubDirs ? Integer.MAX_VALUE : 1, FileVisitOption.FOLLOW_LINKS)
-                .filter(p -> { if (allowedExtensions.isEmpty()) return true; for (String ext : allowedExtensions) if (p.toString().endsWith(ext)) return true; return false;})
+                .filter(p -> {
+                    if (allowedExtensions.isEmpty()) return true;
+                    for (String ext : allowedExtensions) if (p.toString().endsWith(ext)) return true;
+                    return false;
+                })
                 .map(Path::toFile)
                 .filter(File::isFile)
                 .forEach(files::add);
@@ -422,9 +430,11 @@ private void createFileListByType(File inputDirectory, final Set<String> allowed
 
     private String getFileName(File fi, boolean removeExtension) {
         String filename = fi.getName();
-        int extDotIndex = filename.lastIndexOf('.');
-        if (extDotIndex > 0) {
-            filename = filename.substring(0, extDotIndex);
+        if (removeExtension) {
+            int extDotIndex = filename.lastIndexOf('.');
+            if (extDotIndex > 0) {
+                filename = filename.substring(0, extDotIndex);
+            }
         }
         if (fileNameSplitUnderscore) {
             int extUnderScoreIndex = filename.lastIndexOf('_');

From 4ac6bc49af2b361e7f2836f43282046aa4ce422c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 27 Sep 2021 12:04:36 +0200
Subject: [PATCH 098/269] Ignore the UIMA type in column 4 if it is not found
 in the type system for AnnotationAdder.

---
 .../TextAnnotationListAdder.java              |  8 ++++++--
 .../AnnotationAdderAnnotatorTest.java         | 19 -------------------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
index 40436c2cb..8ae202449 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
@@ -36,8 +36,12 @@ public boolean addAnnotations(AnnotationData data, AnnotationAdderHelper helper,
         String jCasDocTextSha = null;
         boolean shaMismatchWasReported = false;
         for (ExternalTextAnnotation a : annotationList) {
-            String uimaType = a.getUimaType() == null ? configuration.getDefaultUimaType() : a.getUimaType();
-            if (uimaType == null)
+            String uimaType;
+            if (a.getUimaType() != null && jCas.getTypeSystem().getType(a.getUimaType()) != null)
+                uimaType = a.getUimaType();
+            else if (configuration.getDefaultUimaType() != null)
+                uimaType = configuration.getDefaultUimaType();
+            else
                 throw new IllegalArgumentException("Missing annotation type: Neither the annotation of document " + a.getDocumentId() + " with offsets " + a.getStart() + "-" + a.getEnd() + " provides a type nor is the default type set.");
             if (jCas.getTypeSystem().getType(uimaType) == null)
                 throw new IllegalArgumentException("The entity annotation type " + uimaType + " does not exist in the type system.");
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
index 48ee699e7..83f2aa54d 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
@@ -68,25 +68,6 @@ public void testCharacterOffsets() throws Exception {
         assertThat(gene.getEnd()).isEqualTo(6);
     }
 
-    @Test
-    public void testCharacterOffsets2() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
-        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/test.txt"));
-        final AnalysisEngine engine = AnalysisEngineFactory.createEngine(AnnotationAdderAnnotator.class, AnnotationAdderAnnotator.KEY_ANNOTATION_SOURCE, externalResourceDescription);
-        // Test doc1 (two gene annotations)
-        jCas.setDocumentText("BRCA PRKII are the genes of this sentence.");
-        final Header h = new Header(jCas);
-        h.setDocId("10022127.txt");
-        h.addToIndexes();
-
-        engine.process(jCas);
-
-        final List<Gene> genes = new ArrayList<>(JCasUtil.select(jCas, Gene.class));
-        for (Gene g : genes) {
-            System.out.println(g);
-        }
-    }
-
     @Test
     public void testPayload() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");

From 574001d570d09f5a3b9e33f616282e963642d7a5 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 27 Sep 2021 13:11:39 +0200
Subject: [PATCH 099/269] Add more logging to the classes of the
 AnnotationAdder.

---
 .../AnnotationAdderAnnotator.java             |  1 +
 .../TextAnnotationListAdder.java              |  5 ++--
 .../AnnotationList.java                       | 29 ++++++++++++++++++-
 .../annotationsources/AnnotationSource.java   |  7 ++++-
 .../FileAnnotationSource.java                 |  9 +++---
 ...ryFileDocumentClassAnnotationProvider.java |  8 ++++-
 .../InMemoryFileTextAnnotationProvider.java   |  7 ++++-
 7 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
index 00245937d..ceaac7535 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotator.java
@@ -76,6 +76,7 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
             final AnnotationData annotations = annotationProvider.getAnnotations(docId);
             final AnnotationAdderHelper helper = new AnnotationAdderHelper();
             if (annotations != null) {
+                log.trace("Found annotations for document ID {}.", docId);
                 boolean success = false;
                 int adderNum = 0;
                 // We are now iterating through the available annotation adders for the one that handles the obtained annotation data
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
index 8ae202449..e6c433ce6 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
@@ -5,7 +5,6 @@
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
 import de.julielab.jcore.types.ext.DBProcessingMetaData;
 import de.julielab.jcore.utility.JCoReAnnotationTools;
-import de.julielab.jcore.utility.JCoReTools;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.digest.DigestUtils;
 import org.apache.uima.cas.CASException;
@@ -65,14 +64,14 @@ else if (configuration.getDefaultUimaType() != null)
                         final Annotation annotation = JCoReAnnotationTools.getAnnotationByClassName(jCas, uimaType);
                         helper.setAnnotationOffsetsRelativeToDocument(annotation, a, configuration);
                         helper.setAnnotationPayloadsToFeatures(annotation, a);
+                        log.trace("Adding annotation of type {} with offsets {}-{} to document with ID {}", uimaType, annotation.getBegin(), annotation.getEnd(), annotationList.getDocId());
                         annotation.addToIndexes();
                     } else {
                         log.trace("ExternalAnnotation for document {} has no entity offsets or offsets < 0, not adding anything to the CAS.", a.getDocumentId());
                     }
                 } else {
                     if (!shaMismatchWasReported) {
-                        final String docId = JCoReTools.getDocId(jCas);
-                        log.warn("The document with ID '{}' has a differing document text hash from a given annotation. The annotation will not be added to the document. Annotation hash: {}, current document text hash: {}", docId, shaFromAnnotation, jCasDocTextSha);
+                        log.warn("The document with ID '{}' has a differing document text hash from a given annotation. The annotation will not be added to the document. Annotation hash: {}, current document text hash: {}", annotationList.getDocId(), shaFromAnnotation, jCasDocTextSha);
                         shaMismatchWasReported = true;
                         if (preventProcessedOnDigestMismatch) {
                             try {
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/AnnotationList.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/AnnotationList.java
index afa5e074d..44da0c57c 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/AnnotationList.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/AnnotationList.java
@@ -1,8 +1,34 @@
 package de.julielab.jcore.ae.annotationadder.annotationrepresentations;
 
 import java.util.ArrayList;
+import java.util.Collection;
 
 public class AnnotationList<T extends AnnotationData> extends ArrayList<T> implements AnnotationData {
+    @Override
+    public boolean add(T t) {
+        setDocId(t.getDocumentId());
+        return super.add(t);
+    }
+
+    @Override
+    public void add(int index, T element) {
+        setDocId(element.getDocumentId());
+        super.add(index, element);
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends T> c) {
+        if (c != null)
+            c.stream().findAny().ifPresent(annotation -> setDocId(annotation.getDocumentId()));
+        return super.addAll(c);
+    }
+
+    @Override
+    public boolean addAll(int index, Collection<? extends T> c) {
+        if (c != null)
+            c.stream().findAny().ifPresent(annotation -> setDocId(annotation.getDocumentId()));
+        return super.addAll(index, c);
+    }
 
     private String docId;
 
@@ -11,11 +37,12 @@ public String getDocId() {
     }
 
     public void setDocId(String docId) {
+        if (docId != null && this.docId != null && !docId.equals(this.docId))
+            throw new IllegalArgumentException("This annotation list already contains annotations for document with ID " + this.docId + " but the document ID should now be set to " + docId + ".");
         this.docId = docId;
     }
 
     @Override
-
     public String getDocumentId() {
         return docId;
     }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/AnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/AnnotationSource.java
index d7a1daad9..5a18be30e 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/AnnotationSource.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/AnnotationSource.java
@@ -3,7 +3,12 @@
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationData;
 import org.apache.uima.resource.DataResource;
 
+import java.io.IOException;
+import java.net.URI;
+
 public interface AnnotationSource<T extends AnnotationData> {
-    void initialize(DataResource dataResource);
+    void loadAnnotations(URI annotationUri) throws IOException;
+
+    void initialize(DataResource dataResource) throws IOException;
     T getAnnotations(String id);
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
index 69958d586..845c42c95 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
@@ -24,16 +24,17 @@ public FileAnnotationSource(AnnotationFormat<T> format) {
         this.format = format;
     }
 
-    private void loadAnnotations(URI annotationUri) {
+    @Override
+    public void loadAnnotations(URI annotationUri) throws IOException {
         try (BufferedReader br = UriUtilities.getReaderFromUri(annotationUri)) {
             entitiesByDocId = br.lines().map(format::parse).filter(Objects::nonNull).collect(Collectors.groupingBy(AnnotationData::getDocumentId, Collectors.toCollection(AnnotationList::new)));
-        } catch (IOException e) {
-            e.printStackTrace();
         }
+        if (log.isTraceEnabled())
+            log.trace("Loaded {} entity annotations for {} document IDs.", entitiesByDocId.values().stream().flatMap(AnnotationList::stream).count(), entitiesByDocId.size());
     }
 
     @Override
-    public void initialize(DataResource dataResource) {
+    public void initialize(DataResource dataResource) throws IOException {
         log.info("Loading entity annotations from {}", dataResource.getUri());
         loadAnnotations(dataResource.getUri());
     }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java
index ab95d5759..731f114ce 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java
@@ -6,6 +6,8 @@
 import org.apache.uima.resource.DataResource;
 import org.apache.uima.resource.ResourceInitializationException;
 
+import java.io.IOException;
+
 public class InMemoryFileDocumentClassAnnotationProvider implements AnnotationProvider<AnnotationList> {
     private AnnotationSource<AnnotationList<ExternalDocumentClassAnnotation>> annotationSource;
 
@@ -18,7 +20,11 @@ public AnnotationList<ExternalDocumentClassAnnotation> getAnnotations(String id)
     public void load(DataResource dataResource) throws ResourceInitializationException {
         // This logic could be made configurable if required so in the future.
         annotationSource = new FileAnnotationSource(new DocumentClassAnnotationFormat());
-        annotationSource.initialize(dataResource);
+        try {
+            annotationSource.initialize(dataResource);
+        } catch (IOException e) {
+            throw new ResourceInitializationException(e);
+        }
     }
 
 
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
index 1f6914340..ac89d5b1e 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
@@ -10,6 +10,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.lang.reflect.InvocationTargetException;
 import java.util.Optional;
 
@@ -41,7 +42,11 @@ public void load(DataResource dataResource) throws ResourceInitializationExcepti
             throw new ResourceInitializationException(e);
         }
         annotationSource = new FileAnnotationSource(format);
-        annotationSource.initialize(dataResource);
+        try {
+            annotationSource.initialize(dataResource);
+        } catch (IOException e) {
+            throw new ResourceInitializationException(e);
+        }
     }
 
 
From d8fb38c379feebbf95d903d88512bc7edb8d9445 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 27 Sep 2021 13:20:29 +0200
Subject: [PATCH 100/269] Set the component ID for annotations created by the
 AnnotationAdder.

---
 .../jcore/ae/annotationadder/TextAnnotationListAdder.java       | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
index e6c433ce6..7626dce18 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/TextAnnotationListAdder.java
@@ -62,6 +62,8 @@ else if (configuration.getDefaultUimaType() != null)
                     // that the SHA was the same as it was at time of the original entity tagging.
                     if (a.getStart() >= 0) {
                         final Annotation annotation = JCoReAnnotationTools.getAnnotationByClassName(jCas, uimaType);
+                        if (annotation instanceof de.julielab.jcore.types.Annotation)
+                            ((de.julielab.jcore.types.Annotation)annotation).setComponentId(AnnotationAdderAnnotator.class.getSimpleName());
                         helper.setAnnotationOffsetsRelativeToDocument(annotation, a, configuration);
                         helper.setAnnotationPayloadsToFeatures(annotation, a);
                         log.trace("Adding annotation of type {} with offsets {}-{} to document with ID {}", uimaType, annotation.getBegin(), annotation.getEnd(), annotationList.getDocId());

From beb319b6f0d6bd204a11a4682de9e726d633c917 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 28 Sep 2021 08:20:28 +0200
Subject: [PATCH 101/269] Add support for JATS 1.3 tagset for PMC reader.

Following the 1.2 to 1.3 change description at https://jats.nlm.nih.gov/publishing/tag-library/1.3/chapter/version-1.3-chg.html this doesn't seem to concern us. The 1.3 tagset is backward compatible to previous JATS versions and does not seem to bring extensions that we would actually use right now.
---
 .../jcore/multiplier/pmc/PMCMultiplier.java   |  5 +++-
 .../jcore/reader/pmc/CasPopulator.java        | 10 ++++++--
 .../reader/pmc/NoDataAvailableException.java  | 23 +++++++++++++++++++
 .../julielab/jcore/reader/pmc/PMCReader.java  |  4 +++-
 .../jcore/reader/pmc/parser/FrontParser.java  |  2 +-
 .../reader/pmc/parser/NxmlDocumentParser.java |  8 +++++++
 6 files changed, 47 insertions(+), 5 deletions(-)
 create mode 100644 jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NoDataAvailableException.java

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
index b723f6215..38d52f4b8 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
@@ -1,6 +1,7 @@
 package de.julielab.jcore.multiplier.pmc;
 
 import de.julielab.jcore.reader.pmc.CasPopulator;
+import de.julielab.jcore.reader.pmc.NoDataAvailableException;
 import de.julielab.jcore.reader.pmc.parser.ElementParsingException;
 import de.julielab.jcore.types.casmultiplier.JCoReURI;
 import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
@@ -37,7 +38,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
         try {
             casPopulator = new CasPopulator(currentUriBatch);
         } catch (IOException e) {
-            log.error("Exception occurred when trying to inizialize the NXML parser", e);
+            log.error("Exception occurred when trying to initialize the NXML parser", e);
             throw new AnalysisEngineProcessException(e);
         }
     }
@@ -60,6 +61,8 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                 return cas;
             } catch (ElementParsingException e) {
                 log.error("Exception occurred why trying to parse {}", next, e);
+            } catch (NoDataAvailableException e) {
+                log.error("Could not populate the CAS due to preceding error. Returning null.");
             }
         }
         return null;
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index ff3a1e0f0..61e2851a5 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -21,7 +21,7 @@ public CasPopulator(Iterator<URI> nxmlIterator) throws IOException {
         nxmlDocumentParser.loadElementPropertyFile("/de/julielab/jcore/reader/pmc/resources/elementproperties.yml");
     }
 
-    public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException {
+    public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, NoDataAvailableException {
         ElementParsingResult result = null;
         URI currentUri = nxmlUri;
         while (currentUri != null && result == null) {
@@ -30,7 +30,13 @@ public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException {
                 result = nxmlDocumentParser.parse();
             } catch (DocumentParsingException e) {
                 log.warn("Error occurred when trying to read from URI {} (ASCII string: {}): {}. Skipping document.", currentUri, currentUri.toASCIIString(), e.getMessage());
-                currentUri = nxmlIterator.next();
+                if (nxmlIterator.hasNext()) {
+                    currentUri = nxmlIterator.next();
+                } else {
+                    String msg = "Cannot just skip the errored document because there is no next document currently available. Returning without adding any data to the CAS.";
+                    log.warn(msg);
+                    throw new NoDataAvailableException(msg);
+                }
             }
         }
         StringBuilder sb = populateCas(result, new StringBuilder());
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NoDataAvailableException.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NoDataAvailableException.java
new file mode 100644
index 000000000..41a611d26
--- /dev/null
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NoDataAvailableException.java
@@ -0,0 +1,23 @@
+package de.julielab.jcore.reader.pmc;
+
+public class NoDataAvailableException extends Exception {
+
+    public NoDataAvailableException() {
+    }
+
+    public NoDataAvailableException(String message) {
+        super(message);
+    }
+
+    public NoDataAvailableException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public NoDataAvailableException(Throwable cause) {
+        super(cause);
+    }
+
+    public NoDataAvailableException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+        super(message, cause, enableSuppression, writableStackTrace);
+    }
+}
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java
index d58f3f939..921fc10b5 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java
@@ -54,10 +54,12 @@ public void getNext(JCas cas) throws CollectionException {
             next = pmcFiles.next();
             casPopulator.populateCas(next, cas);
             if (extractIdFromFilename)
-                ((Header)cas.getAnnotationIndex(Header.type).iterator().next()).setDocId(getIdFromFilename(next));
+                ((Header) cas.getAnnotationIndex(Header.type).iterator().next()).setDocId(getIdFromFilename(next));
         } catch (ElementParsingException e) {
             log.error("Exception occurred when trying to parse {}", next, e);
             throw new CollectionException(e);
+        } catch (NoDataAvailableException e) {
+            log.error("Could not populate CAS due to preceding error.");
         }
         completed++;
     }
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index b21a66aec..e1272094d 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -84,7 +84,7 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
 					: getXPathValue("/article/front/journal-meta/journal-title-group/journal-title");
 			// there actually might be several abbreviated titles but here, we
 			// only use the first; our type system currently cannot represent
-			// more anyway. One could try decide for an preferred one since the
+			// more anyway. One could try to decide for a preferred one since the
 			// abbrev-type attribute disposes the source of the abbreviated
 			// title (e.g. publisher or nlm-ta).
 			Optional<String> abbrevJournalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
index 069d038f1..c6a0e837b 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
@@ -94,6 +94,8 @@ private void setTagset() throws NavException, DocTypeNotFoundException, DocTypeN
                     tagset = Tagset.JATS_1_0;
                 else if (docType.contains("JATS-archivearticle1-mathml3.dtd"))
                     tagset = Tagset.JATS_1_2_MATH_ML_3;
+                else if (docType.contains("JATS-archivearticle1-3-mathml3.dtd"))
+                    tagset = Tagset.JATS_1_3;
                 else if (docType.contains("journalpublishing.dtd") || docType.contains("archivearticle.dtd"))
                     tagset = Tagset.NLM_2_3;
                 else if (docType.contains("journalpublishing3.dtd") || docType.contains("archivearticle3.dtd"))
@@ -210,6 +212,12 @@ public enum Tagset {
          * @see <url>https://jats.nlm.nih.gov/publishing/tag-library/1.2/index.html</url>
          */
         JATS_1_2_MATH_ML_3,
+        /**
+         * NISO JATS Version 1.3 (ANSI/NISO Z39.96-2021)
+         *
+         * @see <url>https://jats.nlm.nih.gov/publishing/tag-library/1.3/index.html</url>
+         */
+        JATS_1_3,
         /**
          * NLM Journal Publishing DTD v. 2.3
          *

From 53ae91dece4389120e6e5b8829f9deadc23b168a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 28 Sep 2021 08:26:52 +0200
Subject: [PATCH 102/269] Add fallbacks in case of unknown XML tag sets for PMC
 reader.

Newer JATS versions are - until now - backward compatible with their previous version. Since we don't use any of the special capabilities of any JATS format, we just check for JATS in general and assign the latest version if we haven't handled the exact version.
We do the same for the NLM tagsets.
---
 .../de/julielab/jcore/reader/pmc/parser/FrontParser.java | 4 ++--
 .../jcore/reader/pmc/parser/NxmlDocumentParser.java      | 9 +++++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index e1272094d..af4a2b944 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -79,7 +79,7 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
 			Optional<String> year = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/year", pubType));
 			Optional<String> month = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/month", pubType));
 			Optional<String> day = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/day", pubType));
-			Optional<String> journalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3
+			Optional<String> journalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3 || nxmlDocumentParser.getTagset() == Tagset.NLM_3_0
 					? getXPathValue("/article/front/journal-meta/journal-title")
 					: getXPathValue("/article/front/journal-meta/journal-title-group/journal-title");
 			// there actually might be several abbreviated titles but here, we
@@ -87,7 +87,7 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
 			// more anyway. One could try to decide for a preferred one since the
 			// abbrev-type attribute disposes the source of the abbreviated
 			// title (e.g. publisher or nlm-ta).
-			Optional<String> abbrevJournalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3
+			Optional<String> abbrevJournalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3 || nxmlDocumentParser.getTagset() == Tagset.NLM_3_0
 					? getXPathValue("/article/front/journal-meta/abbrev-journal-title")
 					: getXPathValue("/article/front/journal-meta/journal-title-group/abbrev-journal-title");
 			Optional<String> volume = getXPathValue("/article/front/article-meta/volume");
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
index c6a0e837b..2042b258c 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
@@ -100,8 +100,13 @@ else if (docType.contains("journalpublishing.dtd") || docType.contains("archivea
                     tagset = Tagset.NLM_2_3;
                 else if (docType.contains("journalpublishing3.dtd") || docType.contains("archivearticle3.dtd"))
                     tagset = Tagset.NLM_3_0;
-                else
-                    throw new DocTypeNotSupportedException("Unsupported document type: "  + docType);
+                else if (docType.contains("JATS")) {
+                    log.warn("Unknown document type: {}. Assigning the latest JATS tagset in assumption of backward compatibility.", docType);
+                    tagset = Tagset.JATS_1_3;
+                } else if (docType.contains("journalpublishing") || docType.contains("archivearticle")) {
+                    log.warn("Unknown document type: {}. Assigning the latest NLM tagset in assumption of backward compatibility.", docType);
+                    tagset = Tagset.NLM_3_0;
+                }
                 return;
             }
         }

From 69899e5e51e81c9b53a695adffb1714d28d4b689 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 6 Oct 2021 07:35:06 +0200
Subject: [PATCH 103/269] Log the doc ID for documents with long sentences in
 TokenAnnotator.

---
 jcore-jtbd-ae/pom.xml                                    | 5 +++++
 .../de/julielab/jcore/ae/jtbd/main/TokenAnnotator.java   | 9 +++++++--
 .../chunking/ConfigurableChunkerProviderImplAlt.java     | 4 +++-
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/jcore-jtbd-ae/pom.xml b/jcore-jtbd-ae/pom.xml
index c773cf55d..d4a7430a4 100644
--- a/jcore-jtbd-ae/pom.xml
+++ b/jcore-jtbd-ae/pom.xml
@@ -86,6 +86,11 @@
             <artifactId>jcore-types</artifactId>
             <version>${jcore-types-version}</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
         <dependency>
             <groupId>cc.mallet</groupId>
             <artifactId>mallet</artifactId>
diff --git a/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotator.java b/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotator.java
index 1ddd664f7..c073983a2 100644
--- a/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotator.java
+++ b/jcore-jtbd-ae/src/main/java/de/julielab/jcore/ae/jtbd/main/TokenAnnotator.java
@@ -26,6 +26,7 @@
 import de.julielab.jcore.ae.jtbd.Unit;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -153,8 +154,12 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
 				int length = sentence.getEnd() - sentence
 						.getBegin();
 				LOGGER.debug("going to next sentence having length: " + length);
-                if (length > 1000)
-                    LOGGER.warn("Current sentence has length {}.", length);
+                if (length > 1000) {
+                    if (LOGGER.isWarnEnabled()) {
+                        String docId = JCoReTools.getDocId(aJCas);
+                        LOGGER.warn("Current sentence has length {} (document ID {}).", length, docId);
+                    }
+                }
                 final String text = sentence.getCoveredText();
                 writeTokensToCAS(text, sentence.getBegin(), aJCas);
             }
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
index aa1c07623..8d9e63b44 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
@@ -333,7 +333,7 @@ public boolean getCaseSensitive() {
 
     }
 
-    private InputStream readStreamFromFileSystemOrClassPath(String filePath) {
+    private InputStream readStreamFromFileSystemOrClassPath(String filePath) throws FileNotFoundException {
         InputStream is = null;
         File file = new File(filePath);
         if (file.exists()) {
@@ -351,6 +351,8 @@ private InputStream readStreamFromFileSystemOrClassPath(String filePath) {
             } catch (IOException e) {
                 e.printStackTrace();
             }
+        if (is == null)
+            throw new FileNotFoundException("Could not read contents from " + filePath);
         return is;
     }
 }

From 133c0de8ccdd4ac3d78be339e0bc84f030949a5e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 29 Nov 2021 17:45:13 +0100
Subject: [PATCH 104/269] Activate GNormPlus features and re-add Maven pom
 section to copy dependencies to target/lib.

---
 jcore-banner-ae/pom.xml                       | 20 ++++++++++++++++++-
 .../main/java/banner/tagging/FeatureSet.java  |  6 +++---
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/jcore-banner-ae/pom.xml b/jcore-banner-ae/pom.xml
index 6235ec58d..ec5a25e53 100644
--- a/jcore-banner-ae/pom.xml
+++ b/jcore-banner-ae/pom.xml
@@ -71,7 +71,7 @@
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
         <version>2.6.0-SNAPSHOT</version>
-        <relativePath>..</relativePath>
+        <relativePath>../pom.xml</relativePath>
     </parent>
     <licenses>
         <license>
@@ -79,4 +79,22 @@
             <url>https://opensource.org/licenses/BSD-2-Clause</url>
         </license>
     </licenses>
+    <build>
+        <plugins>
+            <plugin>
+                <artifactId>maven-dependency-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <phase>prepare-package</phase>
+                        <goals>
+                            <goal>copy-dependencies</goal>
+                        </goals>
+                        <configuration>
+                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
 </project>
diff --git a/jcore-banner-ae/src/main/java/banner/tagging/FeatureSet.java b/jcore-banner-ae/src/main/java/banner/tagging/FeatureSet.java
index df6548577..009154e3c 100644
--- a/jcore-banner-ae/src/main/java/banner/tagging/FeatureSet.java
+++ b/jcore-banner-ae/src/main/java/banner/tagging/FeatureSet.java
@@ -111,9 +111,9 @@ private SerialPipes createPipe(TagFormat format, Lemmatiser lemmatiser, dragon.n
 		//siddhartha added these;
 		pipes.add(simFindFilename == null ? new Noop() : new SimFind(simFindFilename));
 		
-//		pipes.add(new ChemicalSuffix("CHEM_SUFF="));
-//		pipes.add(new MentionTypeHint("MENTION_TYPE="));
-//		pipes.add(new ProteinSymbols("PROT_SYM="));
+		pipes.add(new ChemicalSuffix("CHEM_SUFF="));
+		pipes.add(new MentionTypeHint("MENTION_TYPE="));
+		pipes.add(new ProteinSymbols("PROT_SYM="));
 		
 		pipes.add(new OffsetConjunctions(new int[][] { { -2 }, { -1 }, { 1 }, { 2 } }));
 		pipes.add(new TokenSequence2FeatureVectorSequence(true, true));

From 40f6ff6f2128d23dcbbb6cb88cb7999ecf1cfd0f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 14 Jan 2022 11:33:50 +0100
Subject: [PATCH 105/269] Use the recursive deletion of the index file for
 PersistentStringIndexMapProvider.

File.delete() only delete files and as such, Lucene indexes were not deleted albeit they need an update.
---
 .../jcore/consumer/es/sharedresources/LuceneIndex.java   | 2 +-
 .../PersistentStringIndexMapProvider.java                | 9 ++++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
index a28c0a5c1..907c333a6 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
@@ -37,7 +37,7 @@ public LuceneIndex(String indexDirectory) {
                 IndexWriterConfig iwc = new IndexWriterConfig();
                 iw = new IndexWriter(directory, iwc);
             } else {
-                log.debug("Index directory {} already");
+                log.debug("Index directory {} already exists.", indexDirectory);
             }
         } catch (IOException e) {
             log.error("could not initialize Lucene index", e);
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
index 93dd296f2..3a9334cb9 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
@@ -3,6 +3,7 @@
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.lang3.NotImplementedException;
 import org.apache.uima.resource.DataResource;
@@ -125,13 +126,15 @@ public void load(DataResource aData) throws ResourceInitializationException {
             indexFile = new File("es-consumer-cache", resourceFileName);
             if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {
                 log.info("Resource file {} is newer than the existing cached index at {}. Creating new index.", resourceFile, indexFile);
-                indexFile.delete();
+                if (indexFile.isDirectory())
+                    FileUtils.deleteQuietly(indexFile);
+                else
+                    indexFile.delete();
             } else {
                 boolean indexFileExisted = indexFile.exists();
                 if (!indexFileExisted) {
                     log.info("Creating persistent cache for resource {} at {}.", uri, indexFile);
-                }
-                else {
+                } else {
                     log.info("Using existing persistent cache {} for resource {}.", indexFile, uri);
                     loadData = false;
                 }

From bc4c0bb0ac01d9f9f2e7eedd66e25aa9ce76ad02 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 17 Jan 2022 09:59:03 +0100
Subject: [PATCH 106/269] Fix #125.

The bug actually was in JCoReCondensedDocumentText rather than JSBD itself. New tests have been added to ensure the correct behavior.
---
 .../jcore/ae/jsbd/main/SentenceAnnotator.java |  2 +-
 .../utility/JCoReCondensedDocumentText.java   | 21 +++++++--
 .../JCoReCondensedDocumentTextTest.java       | 47 ++++++++++++++++++-
 3 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
index fe5cbd833..220eea9bb 100644
--- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
+++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
@@ -193,7 +193,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
                         int end = borders.get(i);
 
                         // skip leading whites spaces
-                        while (start < end && Character.isWhitespace(aJCas.getDocumentText().charAt(start)))
+                        while (start < end && (Character.isWhitespace(documentText.getCodensedText().charAt(start))))
                             ++start;
 
                         // get the string between the current annotation borders and recognized sentences
diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
index 7067539ad..34b0e1f93 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
@@ -109,21 +109,26 @@ public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundExc
                 // Adapt offsets to remove superfluous white spaces from the condensed text
                 boolean precedingCharacterIsWS = lastBegin == 0 || Character.isWhitespace(cas.getDocumentText().charAt(lastBegin - 1));
                 boolean succeedingCharacterIsWS = lastEnd < cas.getDocumentText().length() && Character.isWhitespace(cas.getDocumentText().charAt(lastEnd));
-                if (precedingCharacterIsWS && succeedingCharacterIsWS)
+                boolean extendLastEnd = precedingCharacterIsWS && succeedingCharacterIsWS;
+                if (extendLastEnd)
                     ++lastEnd;
                 if (precedingCharacterIsWS && end >= cas.getDocumentText().length())
                     --begin;
                 // The current cut away annotation begins after the previous cut away annotation, thus there is no
                 // overlap and we can add the current state to the maps.
                 cutSum += lastEnd - lastBegin;
-                int condensedPosition = lastEnd - cutSum + 1;
-                condensedPos2SumCutMap.put(condensedPosition, cutSum);
+                int condensedPosition = lastEnd - cutSum;
+                if (condensedPosition == lastBegin && !extendLastEnd)
+                    ++condensedPosition;
                 // For original offsets we need to be able to know where the begin and the end of
                 // the cut away annotation was. This is exploited in getCondensedOffsetForOriginalOffset()
                 originalPos2SumCutMap.put(lastBegin, lastCutSum);
                 originalPos2SumCutMap.put(lastEnd, cutSum);
                 lastBegin = begin;
                 lastCutSum = cutSum;
+                if (condensedPosition + cutSum >= cas.getDocumentText().length())
+                    cutSum = cas.getDocumentText().length() -1 - condensedPosition;
+                condensedPos2SumCutMap.put(condensedPosition, cutSum);
                 sb.append(cas.getDocumentText(), lastEnd, begin);
             } else if (lastEnd < 0) {
                 // This is the first annotation
@@ -146,10 +151,16 @@ public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundExc
             if (precedingCharacterIsWS && (succeedingCharacterIsWS || lastEnd >= cas.getDocumentText().length()))
                 ++lastEnd;
             cutSum += lastEnd - lastBegin;
-            int condensedPosition = lastEnd - cutSum + 1;
-            condensedPos2SumCutMap.put(condensedPosition, cutSum);
+            int condensedPosition = lastEnd - cutSum;
             originalPos2SumCutMap.put(lastBegin, lastCutSum);
             originalPos2SumCutMap.put(lastEnd, cutSum);
+            // Avoid the situation where the computed original position includes the last cut away annotation.
+            // This can happen when a cut away annotation appears at the very end of the text. Then, the cutSum
+            // accounts for this last annotation at the end of the condensed text which would result in an original
+            // position _after_ the cut away annotation.
+            if (condensedPosition + cutSum >= cas.getDocumentText().length())
+                cutSum = cas.getDocumentText().length() -1 - condensedPosition;
+            condensedPos2SumCutMap.put(condensedPosition, cutSum);
         }
         // If lastEnd is still -1, we just did not find any of the cut away annotations. Thus, we just copy the whole text.
         if (lastEnd == -1)
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 86ef54bf9..470baa250 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -30,7 +30,7 @@ public void testReduce() throws Exception {
 		assertEquals(13, condensedText.getOriginalOffsetForCondensedOffset(13));
 		assertEquals(15, condensedText.getOriginalOffsetForCondensedOffset(14));
 		assertEquals(30, condensedText.getOriginalOffsetForCondensedOffset(29));
-		
+
 		assertEquals(0, condensedText.getCondensedOffsetForOriginalOffset(0));
 		assertEquals(13, condensedText.getCondensedOffsetForOriginalOffset(13));
 		assertEquals(14, condensedText.getCondensedOffsetForOriginalOffset(15));
@@ -54,7 +54,7 @@ public void testReduce2() throws Exception {
 		assertEquals(13, condensedText.getOriginalOffsetForCondensedOffset(13));
 		assertEquals(15, condensedText.getOriginalOffsetForCondensedOffset(14));
 		assertEquals(31, condensedText.getOriginalOffsetForCondensedOffset(29));
-		
+
 		assertEquals(0, condensedText.getCondensedOffsetForOriginalOffset(0));
 		assertEquals(13, condensedText.getCondensedOffsetForOriginalOffset(13));
 		assertEquals(14, condensedText.getCondensedOffsetForOriginalOffset(15));
@@ -85,6 +85,49 @@ public void testReduce3() throws Exception {
 		assertEquals("This sentence has multiple references. This is a second sentence.", condensedText.getCodensedText());
 	}
 
+	@Test
+	public void testReduce4() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("This sentence\n1\nhas references.");
+		InternalReference ref1 = new InternalReference(jcas, 14, 15);
+		ref1.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())));
+		assertEquals("This sentence\nhas references.", condensedText.getCodensedText());
+		assertEquals(0, condensedText.getOriginalOffsetForCondensedOffset(0));
+		assertEquals(16, condensedText.getOriginalOffsetForCondensedOffset(14));
+	}
+
+	@Test
+	public void testReduce5() throws Exception {
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Leptin is an adipose-derived protein secreted by adipocytes and is expressed in adipose tissue.\n" +
+				"1\n" +
+				"It has the role of being a key regulator of several physiological pathways including body weight and regulation of food intake, inflammation, endocrine function, energy homeostasis, bone metabolism and immunity.\n" +
+				"2\n" +
+				"3\n" +
+				"Results from various studies indicate that leptin may play a significant role in bone physiology, independent of the central nervous system.\n");
+		InternalReference ref1 = new InternalReference(jcas, 96, 97);
+		ref1.addToIndexes();
+		InternalReference ref2 = new InternalReference(jcas, 310, 311);
+		ref2.addToIndexes();
+		InternalReference ref3 = new InternalReference(jcas, 312, 313);
+		ref3.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())));
+		System.out.println(condensedText.getCodensedText());
+		assertEquals("Leptin is an adipose-derived protein secreted by adipocytes and is expressed in adipose tissue.\n" +
+				"It has the role of being a key regulator of several physiological pathways including body weight and regulation of food intake, inflammation, endocrine function, energy homeostasis, bone metabolism and immunity.\n" +
+				"Results from various studies indicate that leptin may play a significant role in bone physiology, independent of the central nervous system.\n", condensedText.getCodensedText());
+		assertEquals(98, condensedText.getOriginalOffsetForCondensedOffset(96));
+		assertEquals(314, condensedText.getOriginalOffsetForCondensedOffset(308));
+	}
+
+
 	@Test
 	public void testCondensedOffsetsWithinCutawayAnnotations() throws Exception {
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",

From cdff1d5c7f742b71e0dc37e9ec3ed86e12c4fc88 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 17 Jan 2022 10:01:20 +0100
Subject: [PATCH 107/269] Fixes #126 (ESConsumer cache index updates).

---
 .../es/sharedresources/AbstractMapProvider.java   |  6 ++++--
 .../PersistentIndexAddonTermsProvider.java        | 15 +++++++++++++--
 .../PersistentStringIndexMapProvider.java         | 15 +++++++++++----
 3 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
index 7a181d55a..a02b81797 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/AbstractMapProvider.java
@@ -42,12 +42,14 @@ public void load(DataResource aData) throws ResourceInitializationException {
                 throw new IOException("Resource " + aData.getUri() + " not found");
             }
             br = new BufferedReader(is);
-            map = new HashMap<>();
+//            map = new HashMap<>();
             String line;
             String splitExpression = "\t";
+            int numEntries = 0;
             while ((line = br.readLine()) != null) {
                 if (line.trim().length() == 0 || line.startsWith("#"))
                     continue;
+                ++numEntries;
                 String[] split = line.split(splitExpression);
                 if (split.length != 2) {
                     splitExpression = "\\s+";
@@ -61,7 +63,7 @@ public void load(DataResource aData) throws ResourceInitializationException {
                 else
                     put(getKey(split[0]), getValue(split[1]));
             }
-            log.info("Finished reading resource {} and got {} elements.", aData.getUri(), map.size());
+            log.info("Finished reading resource {} and got {} entries.", aData.getUri(), numEntries);
         } catch (IOException e) {
             throw new ResourceInitializationException(e);
         } finally {
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
index b20d466ef..b98514ee3 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
@@ -3,6 +3,7 @@
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
+import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.commons.lang3.NotImplementedException;
 import org.apache.uima.resource.DataResource;
@@ -12,6 +13,7 @@
 import org.slf4j.Logger;
 
 import java.io.File;
+import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.time.Duration;
@@ -115,7 +117,7 @@ public Optional<String[]> load(String s) {
     public void load(DataResource aData) throws ResourceInitializationException {
         // prepare the persistent index
         URI uri = aData.getUri();
-        File indexFile;
+        File indexFile = null;
         boolean loadData = true;
         try {
             File resourceFile = new File(uri);
@@ -123,7 +125,13 @@ public void load(DataResource aData) throws ResourceInitializationException {
             indexFile = new File("es-consumer-cache", resourceFileName);
             if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {
                 log.info("Resource file {} is newer than the existing cached index at {}. Creating new index.", resourceFile, indexFile);
-                indexFile.delete();
+                if (indexFile.isDirectory()) {
+                    log.info("Deleting index directory {}", indexFile);
+                    FileUtils.deleteDirectory(indexFile);
+                } else {
+                    log.info("Deleting index file {}", indexFile);
+                    indexFile.delete();
+                }
             } else {
                 boolean indexFileExisted = indexFile.exists();
                 if (!indexFileExisted) {
@@ -138,6 +146,9 @@ public void load(DataResource aData) throws ResourceInitializationException {
         } catch (MalformedURLException e) {
             log.error("Could obtain file name from resource URI '{}'", uri, e);
             throw new IllegalStateException(e);
+        } catch (IOException e) {
+            log.error("Could not delete index file {}", indexFile, e);
+            throw new ResourceInitializationException(e);
         }
         if (loadData) {
             super.load(aData);
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
index 3a9334cb9..2551cedea 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
@@ -13,6 +13,7 @@
 import org.slf4j.Logger;
 
 import java.io.File;
+import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.time.Duration;
@@ -118,7 +119,7 @@ protected void put(String key, String value) {
     public void load(DataResource aData) throws ResourceInitializationException {
         // prepare the persistent index
         URI uri = aData.getUri();
-        File indexFile;
+        File indexFile = null;
         boolean loadData = true;
         try {
             File resourceFile = new File(uri);
@@ -126,10 +127,13 @@ public void load(DataResource aData) throws ResourceInitializationException {
             indexFile = new File("es-consumer-cache", resourceFileName);
             if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {
                 log.info("Resource file {} is newer than the existing cached index at {}. Creating new index.", resourceFile, indexFile);
-                if (indexFile.isDirectory())
-                    FileUtils.deleteQuietly(indexFile);
-                else
+                if (indexFile.isDirectory()) {
+                    log.info("Deleting index directory {}", indexFile);
+                    FileUtils.deleteDirectory(indexFile);
+                } else {
+                    log.info("Deleting index file {}", indexFile);
                     indexFile.delete();
+                }
             } else {
                 boolean indexFileExisted = indexFile.exists();
                 if (!indexFileExisted) {
@@ -143,6 +147,9 @@ public void load(DataResource aData) throws ResourceInitializationException {
         } catch (MalformedURLException e) {
             log.error("Could obtain file name from resource URI '{}'", uri, e);
             throw new IllegalStateException(e);
+        } catch (IOException e) {
+            log.error("Could not delete index file {}", indexFile, e);
+            throw new ResourceInitializationException(e);
         }
         if (loadData) {
             super.load(aData);

From 2570a478d4e982e7fd16f7ea6c58e796aa4154ce Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 17 Jan 2022 10:14:47 +0100
Subject: [PATCH 108/269] Update to CoStoSys 1.6.1-SNAPSHOT.

---
 jedis-parent/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 3daef871c..0b8807ef9 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -17,7 +17,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>costosys</artifactId>
-                <version>1.6.0-SNAPSHOT</version>
+                <version>1.6.1-SNAPSHOT</version>
             </dependency>
             <dependency>
                 <groupId>de.julielab</groupId>

From 3529c58a9f9739d4b76a8c6b852033402c8a76d4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 17 Jan 2022 10:33:36 +0100
Subject: [PATCH 109/269] Use test model instead of local model for JSBD tests.

The local model was used to create the exact same environment that was present when some error occurred. This is not necessary anymore.
---
 .../de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
index 5a5b23a47..c3d0aa8a9 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
@@ -300,7 +300,7 @@ public void testErrordoc() throws Exception {
         XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC5478802.xmi").toFile()), jCas.getCas());
         JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
         AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-                "/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz",
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz",
                 SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
                 SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
                         "de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},
@@ -322,7 +322,7 @@ public void testErrordoc2() throws Exception {
         XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC8205280.xmi").toFile()), jCas.getCas());
         JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
         AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
-                "/Users/faessler/Coding/git/jcore-projects/jcore-jsbd-ae-biomedical-english/src/main/resources/de/julielab/jcore/ae/jsbd/model/jsbd-biomed-oversampled-abstracts-split-at-punctuation.mod.gz",
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz",
                 SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
                 SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
                         "de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},

From 8c74873cdde6bd19d916cc14b90d273667364dab Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 20 Jan 2022 09:42:23 +0100
Subject: [PATCH 110/269] Add an option to omit internal references with
 letters in them from document text condensation.

References like "Figure 2 shows..." are embedded in the text and should thus not be removed.
---
 .../jcore/ae/jsbd/main/SentenceAnnotator.java |  2 +-
 .../ae/jsbd/main/SentenceAnnotatorTest.java   | 28 +++++++++
 .../test/resources/errordocs/PMC5070457.xmi   |  5 ++
 .../utility/JCoReCondensedDocumentText.java   | 60 ++++++++++++++++++-
 .../JCoReCondensedDocumentTextTest.java       | 25 +++++++-
 5 files changed, 116 insertions(+), 4 deletions(-)
 create mode 100644 jcore-jsbd-ae/src/test/resources/errordocs/PMC5070457.xmi

diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
index 220eea9bb..d89ca98b7 100644
--- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
+++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
@@ -155,7 +155,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             JCoReCondensedDocumentText documentText;
             try {
                 // If there are no cut-away types, the document text will remain unchanged.
-                documentText = new JCoReCondensedDocumentText(aJCas, cutAwayTypes, Set.of(','));
+                documentText = new JCoReCondensedDocumentText(aJCas, cutAwayTypes, Set.of(','), true);
             } catch (ClassNotFoundException e1) {
                 LOGGER.error("Could not create the text without annotations to be cut away in document {}", JCoReTools.getDocId(aJCas), e1);
                 throw new AnalysisEngineProcessException(e1);
diff --git a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
index c3d0aa8a9..1e820d945 100644
--- a/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
+++ b/jcore-jsbd-ae/src/test/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotatorTest.java
@@ -332,5 +332,33 @@ public void testErrordoc2() throws Exception {
         assertThatCode(() -> jsbd.process(jCas.getCas())).doesNotThrowAnyException();
     }
 
+    @Test
+    public void testErrordoc3() throws Exception {
+        // This document has multiple sentences that begin with a Figure reference mention ("Figure 2 shows...").
+        // By cutting away all the internal reference annotation spans for sentence tagging, the "Figure 2" was
+        // ultimately appended to the previous sentence, causing errors. Thus, the option to omit internal references
+        // with letters was added to the condensed document text. This is a test that everything is working as intended.
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types");
+
+        XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "errordocs", "PMC5070457.xmi").toFile()), jCas.getCas());
+        JCasUtil.select(jCas, Sentence.class).forEach(Annotation::removeFromIndexes);
+        AnalysisEngine jsbd = AnalysisEngineFactory.createEngine(SentenceAnnotator.class, SentenceAnnotator.PARAM_MODEL_FILE,
+                "de/julielab/jcore/ae/jsbd/model/test-model.gz",
+                SentenceAnnotator.PARAM_MAX_SENTENCE_LENGTH, 1000,
+                SentenceAnnotator.PARAM_SENTENCE_DELIMITER_TYPES, new String[]{
+                        "de.julielab.jcore.types.Title", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.Zone", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.Figure", "de.julielab.jcore.types.Table"},
+                SentenceAnnotator.PARAM_CUT_AWAY_TYPES, new String[]{de.julielab.jcore.types.pubmed.InternalReference.class.getCanonicalName()}
+        );
+        assertThatCode(() -> jsbd.process(jCas.getCas())).doesNotThrowAnyException();
+        Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+        for (var s : sentences) {
+            String coveredText = s.getCoveredText();
+            if (coveredText.contains("They concluded"))
+                assertThat(coveredText).endsWith("filament19.");
+        }
+    }
+
 }
 
diff --git a/jcore-jsbd-ae/src/test/resources/errordocs/PMC5070457.xmi b/jcore-jsbd-ae/src/test/resources/errordocs/PMC5070457.xmi
new file mode 100644
index 000000000..dd0c227ca
--- /dev/null
+++ b/jcore-jsbd-ae/src/test/resources/errordocs/PMC5070457.xmi
@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<xmi:XMI xmlns:xmi="http://www.omg.org/XMI" xmlns:tcas="http:///uima/tcas.ecore" xmlns:cas="http:///uima/cas.ecore"
+         xmlns:pubmed="http:///de/julielab/jcore/types/pubmed.ecore"
+         xmlns:ext="http:///de/julielab/jcore/types/ext.ecore" xmlns:types="http:///de/julielab/jcore/types.ecore"
+         xmi:version="2.0"><cas:NULL xmi:id="0"/><pubmed:InternalReference xmi:id="1754" sofa="1161" begin="1870" end="1871" reftype="bibliography" refid="b1-8_27"/><pubmed:InternalReference xmi:id="2282" sofa="1161" begin="1914" end="1915" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2097" sofa="1161" begin="1917" end="1918" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="1676" sofa="1161" begin="2015" end="2016" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2115" sofa="1161" begin="2047" end="2048" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="468" sofa="1161" begin="2103" end="2104" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="1562" sofa="1161" begin="2106" end="2107" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="2570" sofa="1161" begin="4163" end="4171" reftype="figure" refid="f1-8_27"/><pubmed:InternalReference xmi:id="2070" sofa="1161" begin="4750" end="4761" reftype="figure" refid="f1-8_27"/><pubmed:InternalReference xmi:id="1024" sofa="1161" begin="5283" end="5294" reftype="figure" refid="f1-8_27"/><pubmed:InternalReference xmi:id="2512" sofa="1161" begin="5408" end="5409" reftype="bibliography" refid="b4-8_27"/><pubmed:InternalReference xmi:id="1242" sofa="1161" begin="5641" end="5642" reftype="bibliography" refid="b4-8_27"/><pubmed:InternalReference xmi:id="3228" sofa="1161" begin="5812" end="5813" reftype="bibliography" refid="b4-8_27"/><pubmed:InternalReference xmi:id="3157" sofa="1161" begin="6474" end="6480" reftype="displayformula" refid="FD1"/><pubmed:InternalReference xmi:id="2158" sofa="1161" begin="6485" end="6486" reftype="displayformula" refid="FD3"/><pubmed:InternalReference xmi:id="3139" sofa="1161" begin="6674" end="6679" reftype="displayformula" refid="FD4"/><pubmed:InternalReference xmi:id="893" sofa="1161" begin="6937" end="6942" reftype="displayformula" refid="FD4"/><pubmed:InternalReference xmi:id="2246" sofa="1161" begin="7037" end="7038" reftype="bibliography" refid="b5-8_27"/><pubmed:InternalReference xmi:id="2034" sofa="1161" begin="7066" end="7067" reftype="bibliography" refid="b6-8_27"/><pubmed:InternalReference xmi:id="947" sofa="1161" begin="7102" end="7103" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="1855" sofa="1161" begin="7110" end="7115" reftype="displayformula" refid="FD5"/><pubmed:InternalReference xmi:id="324" sofa="1161" begin="7343" end="7344" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="2416" sofa="1161" begin="7358" end="7359" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="110" sofa="1161" begin="7820" end="7821" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="573" sofa="1161" begin="8508" end="8509" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="388" sofa="1161" begin="8541" end="8546" reftype="displayformula" refid="FD4"/><pubmed:InternalReference xmi:id="675" sofa="1161" begin="8893" end="8894" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="1033" sofa="1161" begin="9037" end="9042" reftype="displayformula" refid="FD7"/><pubmed:InternalReference xmi:id="2726" sofa="1161" begin="9152" end="9153" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="693" sofa="1161" begin="9164" end="9169" reftype="displayformula" refid="FD9"/><pubmed:InternalReference xmi:id="3197" sofa="1161" begin="9368" end="9373" reftype="displayformula" refid="FD6"/><pubmed:InternalReference xmi:id="2611" sofa="1161" begin="9531" end="9532" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="3046" sofa="1161" begin="9829" end="9834" reftype="displayformula" refid="FD6"/><pubmed:InternalReference xmi:id="1195" sofa="1161" begin="10279" end="10280" reftype="bibliography" refid="b8-8_27"/><pubmed:InternalReference xmi:id="1002" sofa="1161" begin="10298" end="10299" reftype="bibliography" refid="b9-8_27"/><pubmed:InternalReference xmi:id="2486" sofa="1161" begin="10566" end="10568" reftype="bibliography" refid="b10-8_27"/><pubmed:InternalReference xmi:id="684" sofa="1161" begin="10764" end="10766" reftype="bibliography" refid="b11-8_27"/><pubmed:InternalReference xmi:id="658" sofa="1161" begin="10930" end="10932" reftype="bibliography" refid="b12-8_27"/><pubmed:InternalReference xmi:id="3253" sofa="1161" begin="11609" end="11611" reftype="bibliography" refid="b13-8_27"/><pubmed:InternalReference xmi:id="298" sofa="1161" begin="11803" end="11808" reftype="displayformula" refid="FD3"/><pubmed:InternalReference xmi:id="3055" sofa="1161" begin="11947" end="11952" reftype="displayformula" refid="FD2"/><pubmed:InternalReference xmi:id="1" sofa="1161" begin="12009" end="12010" reftype="bibliography" refid="b5-8_27"/><pubmed:InternalReference xmi:id="884" sofa="1161" begin="12182" end="12184" reftype="bibliography" refid="b14-8_27"/><pubmed:InternalReference xmi:id="2753" sofa="1161" begin="12353" end="12355" reftype="bibliography" refid="b15-8_27"/><pubmed:InternalReference xmi:id="1588" sofa="1161" begin="12635" end="12637" reftype="bibliography" refid="b16-8_27"/><pubmed:InternalReference xmi:id="119" sofa="1161" begin="12645" end="12647" reftype="bibliography" refid="b17-8_27"/><pubmed:InternalReference xmi:id="3019" sofa="1161" begin="12665" end="12667" reftype="bibliography" refid="b18-8_27"/><pubmed:InternalReference xmi:id="144" sofa="1161" begin="12683" end="12685" reftype="bibliography" refid="b15-8_27"/><pubmed:InternalReference xmi:id="2142" sofa="1161" begin="13896" end="13898" reftype="bibliography" refid="b19-8_27"/><pubmed:InternalReference xmi:id="615" sofa="1161" begin="13914" end="13916" reftype="bibliography" refid="b20-8_27"/><pubmed:InternalReference xmi:id="1354" sofa="1161" begin="14189" end="14191" reftype="bibliography" refid="b20-8_27"/><pubmed:InternalReference xmi:id="2441" sofa="1161" begin="14309" end="14311" reftype="bibliography" refid="b19-8_27"/><pubmed:InternalReference xmi:id="1478" sofa="1161" begin="14313" end="14321" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="1763" sofa="1161" begin="14399" end="14410" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="67" sofa="1161" begin="14880" end="14891" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="315" sofa="1161" begin="15123" end="15125" reftype="bibliography" refid="b21-8_27"/><pubmed:InternalReference xmi:id="755" sofa="1161" begin="15338" end="15349" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="213" sofa="1161" begin="15623" end="15634" reftype="figure" refid="f1-8_27"/><pubmed:InternalReference xmi:id="85" sofa="1161" begin="15905" end="15916" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="2917" sofa="1161" begin="16555" end="16557" reftype="bibliography" refid="b19-8_27"/><pubmed:InternalReference xmi:id="1519" sofa="1161" begin="16567" end="16569" reftype="bibliography" refid="b20-8_27"/><pubmed:InternalReference xmi:id="1902" sofa="1161" begin="16571" end="16582" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="451" sofa="1161" begin="17020" end="17022" reftype="bibliography" refid="b12-8_27"/><pubmed:InternalReference xmi:id="3314" sofa="1161" begin="17393" end="17401" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="1571" sofa="1161" begin="17505" end="17513" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="2771" sofa="1161" begin="17515" end="17526" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="2735" sofa="1161" begin="17648" end="17659" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="2043" sofa="1161" begin="17699" end="17725" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="649" sofa="1161" begin="17782" end="17800" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="2106" sofa="1161" begin="17909" end="17911" reftype="bibliography" refid="b22-8_27"/><pubmed:InternalReference xmi:id="1074" sofa="1161" begin="17919" end="17930" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="2052" sofa="1161" begin="18077" end="18088" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="528" sofa="1161" begin="18108" end="18119" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="2986" sofa="1161" begin="19005" end="19013" reftype="figure" refid="f4-8_27"/><pubmed:InternalReference xmi:id="2007" sofa="1161" begin="19443" end="19444" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="1965" sofa="1161" begin="19652" end="19663" reftype="figure" refid="f4-8_27"/><pubmed:InternalReference xmi:id="2629" sofa="1161" begin="20314" end="20325" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="3071" sofa="1161" begin="20431" end="20432" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="537" sofa="1161" begin="20542" end="20544" reftype="bibliography" refid="b23-8_27"/><pubmed:InternalReference xmi:id="2744" sofa="1161" begin="20578" end="20580" reftype="bibliography" refid="b24-8_27"/><pubmed:InternalReference xmi:id="2647" sofa="1161" begin="20669" end="20671" reftype="bibliography" refid="b24-8_27"/><pubmed:InternalReference xmi:id="1410" sofa="1161" begin="20941" end="20942" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="10" sofa="1161" begin="21190" end="21201" reftype="figure" refid="f4-8_27"/><pubmed:InternalReference xmi:id="2088" sofa="1161" begin="21283" end="21284" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2943" sofa="1161" begin="21422" end="21423" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2638" sofa="1161" begin="21464" end="21470" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="2881" sofa="1161" begin="21470" end="21470" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="2552" sofa="1161" begin="21592" end="21593" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2848" sofa="1161" begin="21647" end="21655" reftype="figure" refid="f5-8_27"/><pubmed:InternalReference xmi:id="240" sofa="1161" begin="21725" end="21736" reftype="figure" refid="f5-8_27"/><pubmed:InternalReference xmi:id="1938" sofa="1161" begin="21992" end="21998" reftype="displayformula" refid="FD17"/><pubmed:InternalReference xmi:id="1929" sofa="1161" begin="22044" end="22050" reftype="displayformula" refid="FD16"/><pubmed:InternalReference xmi:id="256" sofa="1161" begin="22157" end="22168" reftype="figure" refid="f5-8_27"/><pubmed:InternalReference xmi:id="2602" sofa="1161" begin="22387" end="22393" reftype="displayformula" refid="FD17"/><pubmed:InternalReference xmi:id="956" sofa="1161" begin="22412" end="22418" reftype="displayformula" refid="FD16"/><pubmed:InternalReference xmi:id="802" sofa="1161" begin="22762" end="22768" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="1828" sofa="1161" begin="22768" end="22768" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="3089" sofa="1161" begin="23301" end="23303" reftype="bibliography" refid="b25-8_27"/><pubmed:InternalReference xmi:id="564" sofa="1161" begin="23306" end="23313" reftype="displayformula" refid="FD16"/><pubmed:InternalReference xmi:id="2273" sofa="1161" begin="23314" end="23316" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="2593" sofa="1161" begin="23316" end="23316" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="555" sofa="1161" begin="23601" end="23602" reftype="bibliography" refid="b8-8_27"/><pubmed:InternalReference xmi:id="902" sofa="1161" begin="23620" end="23621" reftype="bibliography" refid="b9-8_27"/><pubmed:InternalReference xmi:id="2959" sofa="1161" begin="23716" end="23718" reftype="bibliography" refid="b26-8_27"/><pubmed:InternalReference xmi:id="1794" sofa="1161" begin="24186" end="24195" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="1122" sofa="1161" begin="24390" end="24398" reftype="figure" refid="f6-8_27"/><pubmed:InternalReference xmi:id="2167" sofa="1161" begin="25003" end="25011" reftype="figure" refid="f9-8_27"/><pubmed:InternalReference xmi:id="3332" sofa="1161" begin="25189" end="25191" reftype="bibliography" refid="b23-8_27"/><pubmed:InternalReference xmi:id="911" sofa="1161" begin="25280" end="25286" reftype="displayformula" refid="FD16"/><pubmed:InternalReference xmi:id="1956" sofa="1161" begin="26033" end="26044" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="1846" sofa="1161" begin="26629" end="26631" reftype="bibliography" refid="b27-8_27"/><pubmed:InternalReference xmi:id="2355" sofa="1161" begin="27412" end="27413" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2503" sofa="1161" begin="27415" end="27416" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="19" sofa="1161" begin="27445" end="27446" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2432" sofa="1161" begin="27448" end="27449" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="1545" sofa="1161" begin="27916" end="27922" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="519" sofa="1161" begin="27922" end="27922" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="2717" sofa="1161" begin="27925" end="27933" reftype="figure" refid="f7-8_27"/><pubmed:InternalReference xmi:id="2193" sofa="1161" begin="28120" end="28121" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="820" sofa="1161" begin="28133" end="28140" reftype="displayformula" refid="FD16"/><pubmed:InternalReference xmi:id="2377" sofa="1161" begin="28141" end="28143" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="42" sofa="1161" begin="28143" end="28143" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="2450" sofa="1161" begin="28145" end="28153" reftype="figure" refid="f8-8_27"/><pubmed:InternalReference xmi:id="1947" sofa="1161" begin="28237" end="28238" reftype="bibliography" refid="b5-8_27"/><pubmed:InternalReference xmi:id="3181" sofa="1161" begin="28382" end="28392" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="1649" sofa="1161" begin="28397" end="28399" reftype="figure" refid="f12-8_27"/><pubmed:InternalReference xmi:id="2656" sofa="1161" begin="28462" end="28463" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2839" sofa="1161" begin="28533" end="28534" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="1363" sofa="1161" begin="28643" end="28645" reftype="bibliography" refid="b30-8_27"/><pubmed:InternalReference xmi:id="859" sofa="1161" begin="28647" end="28655" reftype="figure" refid="f9-8_27"/><pubmed:InternalReference xmi:id="1597" sofa="1161" begin="28689" end="28690" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="725" sofa="1161" begin="28733" end="28742" reftype="figure" refid="f10-8_27"/><pubmed:InternalReference xmi:id="1213" sofa="1161" begin="28830" end="28831" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="1323" sofa="1161" begin="28841" end="28847" reftype="displayformula" refid="FD2"/><pubmed:InternalReference xmi:id="2459" sofa="1161" begin="28849" end="28850" reftype="displayformula" refid="FD4"/><pubmed:InternalReference xmi:id="2291" sofa="1161" begin="28855" end="28857" reftype="displayformula" refid="FD14"/><pubmed:InternalReference xmi:id="1837" sofa="1161" begin="28896" end="28905" reftype="figure" refid="f8-8_27"/><pubmed:InternalReference xmi:id="2815" sofa="1161" begin="28910" end="28911" reftype="figure" refid="f9-8_27"/><pubmed:InternalReference xmi:id="3219" sofa="1161" begin="29011" end="29012" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="938" sofa="1161" begin="29470" end="29475" reftype="displayformula" refid="FD8"/><pubmed:InternalReference xmi:id="1920" sofa="1161" begin="29770" end="29775" reftype="displayformula" refid="FD7"/><pubmed:InternalReference xmi:id="1186" sofa="1161" begin="29867" end="29873" reftype="displayformula" refid="FD11a"/><pubmed:InternalReference xmi:id="1452" sofa="1161" begin="29873" end="29873" reftype="displayformula" refid="FD11b"/><pubmed:InternalReference xmi:id="2899" sofa="1161" begin="29873" end="29873" reftype="displayformula" refid="FD11c"/><pubmed:InternalReference xmi:id="2124" sofa="1161" begin="29966" end="29975" reftype="figure" refid="f10-8_27"/><pubmed:InternalReference xmi:id="1049" sofa="1161" begin="29995" end="29996" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="357" sofa="1161" begin="30246" end="30257" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="2762" sofa="1161" begin="30378" end="30386" reftype="figure" refid="f8-8_27"/><pubmed:InternalReference xmi:id="1058" sofa="1161" begin="30505" end="30506" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="2400" sofa="1161" begin="30628" end="30637" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="2926" sofa="1161" begin="30786" end="30787" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="1785" sofa="1161" begin="30821" end="30829" reftype="figure" refid="f8-8_27"/><pubmed:InternalReference xmi:id="2908" sofa="1161" begin="30855" end="30864" reftype="figure" refid="f7-8_27"/><pubmed:InternalReference xmi:id="135" sofa="1161" begin="30866" end="30867" reftype="figure" refid="f9-8_27"/><pubmed:InternalReference xmi:id="3271" sofa="1161" begin="30872" end="30874" reftype="figure" refid="f10-8_27"/><pubmed:InternalReference xmi:id="404" sofa="1161" begin="30989" end="30990" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="2620" sofa="1161" begin="30992" end="30999" reftype="displayformula" refid="FD17"/><pubmed:InternalReference xmi:id="2536" sofa="1161" begin="31000" end="31002" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="1911" sofa="1161" begin="31002" end="31002" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="1503" sofa="1161" begin="31206" end="31216" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="606" sofa="1161" begin="31292" end="31301" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="1731" sofa="1161" begin="31394" end="31396" reftype="bibliography" refid="b24-8_27"/><pubmed:InternalReference xmi:id="2061" sofa="1161" begin="31519" end="31525" reftype="displayformula" refid="FD17"/><pubmed:InternalReference xmi:id="204" sofa="1161" begin="31644" end="31656" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="1803" sofa="1161" begin="31676" end="31677" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="3115" sofa="1161" begin="31820" end="31832" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="94" sofa="1161" begin="32127" end="32139" reftype="figure" refid="f12-8_27"/><pubmed:InternalReference xmi:id="2995" sofa="1161" begin="32362" end="32371" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="546" sofa="1161" begin="32421" end="32423" reftype="bibliography" refid="b15-8_27"/><pubmed:InternalReference xmi:id="624" sofa="1161" begin="32522" end="32523" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="1260" sofa="1161" begin="32625" end="32627" reftype="bibliography" refid="b31-8_27"/><pubmed:InternalReference xmi:id="2255" sofa="1161" begin="32688" end="32690" reftype="bibliography" refid="b32-8_27"/><pubmed:InternalReference xmi:id="2079" sofa="1161" begin="32698" end="32700" reftype="bibliography" refid="b33-8_27"/><pubmed:InternalReference xmi:id="1168" sofa="1161" begin="33177" end="33189" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="2324" sofa="1161" begin="33339" end="33348" reftype="figure" refid="f13-8_27"/><pubmed:InternalReference xmi:id="1871" sofa="1161" begin="33403" end="33404" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="489" sofa="1161" begin="33536" end="33544" reftype="figure" refid="f8-8_27"/><pubmed:InternalReference xmi:id="2264" sofa="1161" begin="33593" end="33595" reftype="bibliography" refid="b31-8_27"/><pubmed:InternalReference xmi:id="1819" sofa="1161" begin="33687" end="33689" reftype="bibliography" refid="b32-8_27"/><pubmed:InternalReference xmi:id="977" sofa="1161" begin="33947" end="33953" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="1177" sofa="1161" begin="33953" end="33953" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="231" sofa="1161" begin="34058" end="34067" reftype="figure" refid="f13-8_27"/><pubmed:InternalReference xmi:id="764" sofa="1161" begin="34359" end="34368" reftype="figure" refid="f13-8_27"/><pubmed:InternalReference xmi:id="1640" sofa="1161" begin="34454" end="34466" reftype="figure" refid="f11-8_27"/><pubmed:InternalReference xmi:id="289" sofa="1161" begin="34791" end="34800" reftype="figure" refid="f14-8_27"/><pubmed:InternalReference xmi:id="3106" sofa="1161" begin="34949" end="34950" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="1113" sofa="1161" begin="34998" end="35007" reftype="figure" refid="f15-8_27"/><pubmed:InternalReference xmi:id="3287" sofa="1161" begin="35120" end="35121" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="929" sofa="1161" begin="35417" end="35422" reftype="displayformula" refid="FD6"/><pubmed:InternalReference xmi:id="3028" sofa="1161" begin="35649" end="35650" reftype="bibliography" refid="b7-8_27"/><pubmed:InternalReference xmi:id="2977" sofa="1161" begin="36047" end="36048" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2133" sofa="1161" begin="36564" end="36572" reftype="figure" refid="f6-8_27"/><pubmed:InternalReference xmi:id="2229" sofa="1161" begin="36792" end="36800" reftype="figure" refid="f9-8_27"/><pubmed:InternalReference xmi:id="1974" sofa="1161" begin="37189" end="37190" reftype="bibliography" refid="b8-8_27"/><pubmed:InternalReference xmi:id="2679" sofa="1161" begin="37208" end="37209" reftype="bibliography" refid="b9-8_27"/><pubmed:InternalReference xmi:id="1251" sofa="1161" begin="37320" end="37326" reftype="displayformula" refid="FD17"/><pubmed:InternalReference xmi:id="1487" sofa="1161" begin="37532" end="37534" reftype="bibliography" refid="b11-8_27"/><pubmed:InternalReference xmi:id="3323" sofa="1161" begin="37554" end="37556" reftype="bibliography" refid="b12-8_27"/><pubmed:InternalReference xmi:id="1667" sofa="1161" begin="37899" end="37908" reftype="figure" refid="f7-8_27"/><pubmed:InternalReference xmi:id="1436" sofa="1161" begin="37909" end="37910" reftype="figure" refid="f8-8_27"/><pubmed:InternalReference xmi:id="597" sofa="1161" begin="37915" end="37917" reftype="figure" refid="f12-8_27"/><pubmed:InternalReference xmi:id="3341" sofa="1161" begin="37918" end="37920" reftype="figure" refid="f15-8_27"/><pubmed:InternalReference xmi:id="3148" sofa="1161" begin="38027" end="38032" reftype="figure" refid="f3-8_27"/><pubmed:InternalReference xmi:id="2780" sofa="1161" begin="38037" end="38043" reftype="figure" refid="f14-8_27"/><pubmed:InternalReference xmi:id="2025" sofa="1161" begin="38318" end="38319" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="3244" sofa="1161" begin="38321" end="38322" reftype="bibliography" refid="b3-8_27"/><pubmed:InternalReference xmi:id="2688" sofa="1161" begin="38403" end="38412" reftype="figure" refid="f13-8_27"/><pubmed:InternalReference xmi:id="3037" sofa="1161" begin="38685" end="38687" reftype="bibliography" refid="b34-8_27"/><pubmed:InternalReference xmi:id="1388" sofa="1161" begin="38763" end="38765" reftype="bibliography" refid="b35-8_27"/><pubmed:InternalReference xmi:id="2176" sofa="1161" begin="38771" end="38779" reftype="figure" refid="f2-8_27"/><pubmed:InternalReference xmi:id="836" sofa="1161" begin="39070" end="39072" reftype="bibliography" refid="b36-8_27"/><pubmed:InternalReference xmi:id="2789" sofa="1161" begin="39161" end="39162" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="2308" sofa="1161" begin="39287" end="39288" reftype="bibliography" refid="b2-8_27"/><pubmed:InternalReference xmi:id="920" sofa="1161" begin="39359" end="39361" reftype="bibliography" refid="b37-8_27"/><pubmed:InternalReference xmi:id="379" sofa="1161" begin="39623" end="39631" reftype="figure" refid="f6-8_27"/><pubmed:InternalReference xmi:id="187" sofa="1161" begin="39633" end="39640" reftype="displayformula" refid="FD18a"/><pubmed:InternalReference xmi:id="2872" sofa="1161" begin="39640" end="39640" reftype="displayformula" refid="FD18b"/><pubmed:InternalReference xmi:id="3080" sofa="1161" begin="39645" end="39647" reftype="displayformula" refid="FD19a"/><pubmed:InternalReference xmi:id="640" sofa="1161" begin="39647" end="39647" reftype="displayformula" refid="FD19b"/><pubmed:InternalReference xmi:id="2798" sofa="1161" begin="39720" end="39722" reftype="bibliography" refid="b38-8_27"/><types:Paragraph xmi:id="718" sofa="1161" begin="87" end="1745" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1512" sofa="1161" begin="1762" end="2333" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="350" sofa="1161" begin="2334" end="2875" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1496" sofa="1161" begin="2922" end="3194" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1292" sofa="1161" begin="3256" end="4102" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3064" sofa="1161" begin="4163" end="4494" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="582" sofa="1161" begin="4495" end="5125" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3190" sofa="1161" begin="5126" end="6164" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3166" sofa="1161" begin="6165" end="6432" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2579" sofa="1161" begin="6464" end="6871" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1372" sofa="1161" begin="6872" end="6967" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1714" sofa="1161" begin="6968" end="7287" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2409" sofa="1161" begin="7345" end="7793" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1106" sofa="1161" begin="7794" end="9374" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2832" sofa="1161" begin="9375" end="9604" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1694" sofa="1161" begin="9675" end="11289" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="153" sofa="1161" begin="9785" end="9815" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1147" sofa="1161" begin="9816" end="10035" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2386" sofa="1161" begin="10036" end="10084" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2710" sofa="1161" begin="10085" end="10188" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2425" sofa="1161" begin="10189" end="10261" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1626" sofa="1161" begin="10262" end="10681" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1445" sofa="1161" begin="10682" end="10750" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1067" sofa="1161" begin="10751" end="11204" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1042" sofa="1161" begin="11205" end="11288" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1099" sofa="1161" begin="11393" end="12041" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="829" sofa="1161" begin="12042" end="12619" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="249" sofa="1161" begin="12620" end="13816" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="877" sofa="1161" begin="13881" end="14312" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1339" sofa="1161" begin="14313" end="16570" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1299" sofa="1161" begin="16571" end="17392" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="427" sofa="1161" begin="17393" end="18265" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2521" sofa="1161" begin="18313" end="18849" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3280" sofa="1161" begin="18850" end="20301" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2672" sofa="1161" begin="20302" end="21523" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="773" sofa="1161" begin="21524" end="22841" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1633" sofa="1161" begin="22842" end="23344" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1864" sofa="1161" begin="23345" end="23622" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="413" sofa="1161" begin="23701" end="25115" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2586" sofa="1161" begin="25149" end="26568" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2317" sofa="1161" begin="26569" end="27149" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="711" sofa="1161" begin="27210" end="27494" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="780" sofa="1161" begin="27495" end="28056" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2857" sofa="1161" begin="28057" end="28400" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="420" sofa="1161" begin="28401" end="28732" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2545" sofa="1161" begin="28733" end="28935" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2468" sofa="1161" begin="28936" end="30245" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="35" sofa="1161" begin="30246" end="31446" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1332" sofa="1161" begin="31447" end="32424" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3237" sofa="1161" begin="32425" end="32691" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="103" sofa="1161" begin="32692" end="33338" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="845" sofa="1161" begin="33339" end="34324" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2151" sofa="1161" begin="34325" end="34790" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2952" sofa="1161" begin="34791" end="34997" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2393" sofa="1161" begin="34998" end="35168" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1154" sofa="1161" begin="35195" end="35332" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3132" sofa="1161" begin="35333" end="35363" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1991" sofa="1161" begin="35364" end="35722" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="397" sofa="1161" begin="35723" end="36049" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1812" sofa="1161" begin="36050" end="36356" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="128" sofa="1161" begin="36357" end="36405" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="633" sofa="1161" begin="36406" end="36801" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1740" sofa="1161" begin="36802" end="36874" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="180" sofa="1161" begin="36875" end="37210" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="787" sofa="1161" begin="37211" end="37279" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1619" sofa="1161" begin="37280" end="37810" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="852" sofa="1161" begin="37811" end="37898" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2665" sofa="1161" begin="37899" end="38016" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="28" sofa="1161" begin="38017" end="38323" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2202" sofa="1161" begin="38324" end="38767" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1235" sofa="1161" begin="38768" end="39073" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="2209" sofa="1161" begin="39074" end="39163" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="173" sofa="1161" begin="39164" end="39289" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="1747" sofa="1161" begin="39290" end="39632" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Paragraph xmi:id="3004" sofa="1161" begin="39633" end="39794" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="443" sofa="1161" begin="0" end="86" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="document"/><types:Title xmi:id="747" sofa="1161" begin="1746" end="1748" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="307" sofa="1161" begin="2876" end="2878" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="3011" sofa="1161" begin="3195" end="3198" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="333" sofa="1161" begin="4085" end="4088" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="3098" sofa="1161" begin="4103" end="4106" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="667" sofa="1161" begin="6127" end="6130" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="3173" sofa="1161" begin="6418" end="6421" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1284" sofa="1161" begin="6433" end="6436" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="3124" sofa="1161" begin="6503" end="6506" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="986" sofa="1161" begin="6951" end="6954" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="281" sofa="1161" begin="7122" end="7125" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1315" sofa="1161" begin="7288" end="7291" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2528" sofa="1161" begin="8555" end="8558" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2185" sofa="1161" begin="8722" end="8725" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1554" sofa="1161" begin="8823" end="8826" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="273" sofa="1161" begin="9051" end="9055" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1091" sofa="1161" begin="9179" end="9184" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="265" sofa="1161" begin="9225" end="9230" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="460" sofa="1161" begin="9271" end="9276" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="794" sofa="1161" begin="9605" end="9607" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="3306" sofa="1161" begin="11289" end="11291" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1131" sofa="1161" begin="11320" end="11323" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1139" sofa="1161" begin="11775" end="11779" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2807" sofa="1161" begin="12020" end="12024" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2300" sofa="1161" begin="13794" end="13798" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2238" sofa="1161" begin="13817" end="13820" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1346" sofa="1161" begin="14712" end="14716" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="994" sofa="1161" begin="18266" end="18269" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2935" sofa="1161" begin="20828" end="20832" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="51" sofa="1161" begin="21068" end="21072" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1580" sofa="1161" begin="21303" end="21308" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1276" sofa="1161" begin="21328" end="21333" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1983" sofa="1161" begin="21425" end="21430" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="511" sofa="1161" begin="21445" end="21450" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2495" sofa="1161" begin="23623" end="23626" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="589" sofa="1161" begin="25116" end="25119" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1419" sofa="1161" begin="27150" end="27152" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2478" sofa="1161" begin="31865" end="31869" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="1083" sofa="1161" begin="32244" end="32248" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2824" sofa="1161" begin="32301" end="32305" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="59" sofa="1161" begin="35169" end="35171" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:Title xmi:id="2864" sofa="1161" begin="38050" end="38054" componentId="de.julielab.jcore.reader.pmc.PMCReader"/><types:SectionTitle xmi:id="2561" sofa="1161" begin="1749" end="1761" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="2890" sofa="1161" begin="2879" end="2921" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="2968" sofa="1161" begin="3199" end="3255" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="2333" sofa="1161" begin="4107" end="4162" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1658" sofa="1161" begin="6437" end="6463" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1306" sofa="1161" begin="7292" end="7344" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="434" sofa="1161" begin="9608" end="9674" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="868" sofa="1161" begin="11292" end="11319" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1685" sofa="1161" begin="11324" end="11392" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="2016" sofa="1161" begin="13821" end="13880" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="341" sofa="1161" begin="18270" end="18312" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="76" sofa="1161" begin="23627" end="23700" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="702" sofa="1161" begin="25120" end="25148" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="222" sofa="1161" begin="27153" end="27209" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:SectionTitle xmi:id="1379" sofa="1161" begin="35172" end="35194" componentId="de.julielab.jcore.reader.pmc.PMCReader" titleType="section" depth="0"/><types:Section xmi:id="1701" sofa="1161" begin="1746" end="2876" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="2561" depth="0" label="1."/><types:Section xmi:id="2216" sofa="1161" begin="2876" end="9605" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="2890" depth="0" label="2."/><types:Section xmi:id="1465" sofa="1161" begin="3195" end="4103" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="2968" depth="1" label="2.1"/><types:Section xmi:id="2697" sofa="1161" begin="4103" end="6433" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="2333" depth="1" label="2.2"/><types:Section xmi:id="1222" sofa="1161" begin="6433" end="7288" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1658" depth="1" label="2.3"/><types:Section xmi:id="498" sofa="1161" begin="7288" end="9605" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1306" depth="1" label="2.4"/><types:Section xmi:id="366" sofa="1161" begin="9605" end="11289" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="434" depth="0" label="3."/><types:Section xmi:id="734" sofa="1161" begin="11289" end="27150" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="868" depth="0" label="4."/><types:Section xmi:id="1606" sofa="1161" begin="11320" end="13817" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1685" depth="1" label="4.1"/><types:Section xmi:id="160" sofa="1161" begin="13817" end="18266" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="2016" depth="1" label="4.2"/><types:Section xmi:id="2342" sofa="1161" begin="18266" end="23623" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="341" depth="1" label="4.3"/><types:Section xmi:id="1397" sofa="1161" begin="23623" end="25116" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="76" depth="1" label="4.4"/><types:Section xmi:id="3206" sofa="1161" begin="25116" end="27150" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="702" depth="1" label="4.5"/><types:Section xmi:id="1011" sofa="1161" begin="27150" end="35169" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="222" depth="0" label="5."/><types:Section xmi:id="1772" sofa="1161" begin="35169" end="39795" componentId="de.julielab.jcore.reader.pmc.PMCReader" sectionHeading="1379" depth="0" label="6."/><pubmed:OtherID xmi:id="196" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" id="27857605" source="PubMed"/><types:AuthorInfo xmi:id="477" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Hiroyuki" affiliation="af2-8_27" lastName="Ohshima"/><types:AuthorInfo xmi:id="965" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" foreName="Toshio" affiliation="af1-8_27" lastName="Mitsui"/><types:Keyword xmi:id="811" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="isotonic velocity transient"/><types:Keyword xmi:id="1204" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="energy liberation rate"/><types:Keyword xmi:id="1427" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="muscle stffness"/><types:Keyword xmi:id="1998" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="isometric tension transient"/><types:Keyword xmi:id="3262" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" name="force-velocity relation"/><types:Journal xmi:id="1528" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" pubDate="3296" volume="8" title="Biophysics" pages="27--39"/><types:AbstractText xmi:id="1721" sofa="1161" begin="87" end="1746" componentId="de.julielab.jcore.reader.pmc.PMCReader" structuredAbstractParts=""/><pubmed:Header xmi:id="1880" sofa="1161" begin="0" end="1746" componentId="de.julielab.jcore.reader.pmc.PMCReader" source="PubMed Central" docId="PMC5070457" copyright="©2012 THE BIOPHYSICAL SOCIETY OF JAPAN" truncated="false" authors="1461" pubTypeList="1528" doi="10.2142/biophysics.8.27" otherIDs="2475"/><pubmed:ManualDescriptor xmi:id="2364" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" keywordList="1269"/><types:Date xmi:id="3296" sofa="1161" begin="0" end="0" componentId="de.julielab.jcore.reader.pmc.PMCReader" day="25" month="1" year="2012"/><types:Gene xmi:id="4198" sofa="1161" begin="832" end="843" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T0" specificType="protein"/><types:Gene xmi:id="5446" sofa="1161" begin="856" end="886" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T1" specificType="protein"/><types:Gene xmi:id="4086" sofa="1161" begin="898" end="913" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T2" specificType="Gene"/><types:Gene xmi:id="5334" sofa="1161" begin="966" end="971" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T3" specificType="Gene"/><types:Gene xmi:id="6182" sofa="1161" begin="1002" end="1007" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T4" specificType="Gene"/><types:Gene xmi:id="6198" sofa="1161" begin="1228" end="1254" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T5" specificType="Gene"/><types:Gene xmi:id="5654" sofa="1161" begin="2743" end="2769" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T6" specificType="Gene"/><types:Gene xmi:id="6630" sofa="1161" begin="3279" end="3297" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T7" specificType="Gene"/><types:Gene xmi:id="5638" sofa="1161" begin="3307" end="3312" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T8" specificType="Gene"/><types:Gene xmi:id="5462" sofa="1161" begin="3350" end="3355" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T9" specificType="Gene"/><types:Gene xmi:id="4870" sofa="1161" begin="3387" end="3392" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T10" specificType="Gene"/><types:Gene xmi:id="4054" sofa="1161" begin="3418" end="3429" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T11" specificType="Gene"/><types:Gene xmi:id="5798" sofa="1161" begin="3569" end="3580" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T12" specificType="Gene"/><types:Gene xmi:id="5878" sofa="1161" begin="3618" end="3620" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T13" specificType="Gene"/><types:Gene xmi:id="6038" sofa="1161" begin="3984" end="3986" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T14" specificType="Gene"/><types:Gene xmi:id="4790" sofa="1161" begin="4436" end="4451" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T15" specificType="Gene"/><types:Gene xmi:id="4582" sofa="1161" begin="4737" end="4739" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T16" specificType="Gene"/><types:Gene xmi:id="4966" sofa="1161" begin="4744" end="4746" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T17" specificType="Gene"/><types:Gene xmi:id="6566" sofa="1161" begin="4840" end="4845" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T18" specificType="Gene"/><types:Gene xmi:id="5238" sofa="1161" begin="4884" end="4900" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T19" specificType="Gene"/><types:Gene xmi:id="4614" sofa="1161" begin="4936" end="4952" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T20" specificType="Gene"/><types:Gene xmi:id="4502" sofa="1161" begin="4956" end="4958" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T21" specificType="Gene"/><types:Gene xmi:id="3782" sofa="1161" begin="4981" end="4986" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T22" specificType="Gene"/><types:Gene xmi:id="5350" sofa="1161" begin="5140" end="5146" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T23" specificType="Gene"/><types:Gene xmi:id="3590" sofa="1161" begin="5156" end="5158" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T24" specificType="Gene"/><types:Gene xmi:id="5494" sofa="1161" begin="5337" end="5338" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T25" specificType="Gene"/><types:Gene xmi:id="5414" sofa="1161" begin="5561" end="5563" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T26" specificType="Gene"/><types:Gene xmi:id="3478" sofa="1161" begin="5663" end="5679" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T27" specificType="Gene"/><types:Gene xmi:id="6166" sofa="1161" begin="5741" end="5756" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T28" specificType="Gene"/><types:Gene xmi:id="4742" sofa="1161" begin="5819" end="5834" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T29" specificType="Gene"/><types:Gene xmi:id="4358" sofa="1161" begin="5995" end="6001" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T30" specificType="Gene"/><types:Gene xmi:id="3446" sofa="1161" begin="6015" end="6030" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T31" specificType="Gene"/><types:Gene xmi:id="3638" sofa="1161" begin="6219" end="6225" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T32" specificType="Gene"/><types:Gene xmi:id="5670" sofa="1161" begin="6247" end="6252" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T33" specificType="Gene"/><types:Gene xmi:id="6358" sofa="1161" begin="6282" end="6288" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T34" specificType="Gene"/><types:Gene xmi:id="5830" sofa="1161" begin="6726" end="6728" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T35" specificType="Gene"/><types:Gene xmi:id="4246" sofa="1161" begin="6730" end="6731" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T36" specificType="Gene"/><types:Gene xmi:id="5686" sofa="1161" begin="7383" end="7398" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T37" specificType="Gene"/><types:Gene xmi:id="5366" sofa="1161" begin="7427" end="7443" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T38" specificType="Gene"/><types:Gene xmi:id="3686" sofa="1161" begin="7478" end="7483" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T39" specificType="Gene"/><types:Gene xmi:id="4470" sofa="1161" begin="7856" end="7871" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T40" specificType="Gene"/><types:Gene xmi:id="3814" sofa="1161" begin="8002" end="8016" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T41" specificType="Gene"/><types:Gene xmi:id="4774" sofa="1161" begin="8086" end="8102" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T42" specificType="Gene"/><types:Gene xmi:id="5894" sofa="1161" begin="8240" end="8261" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T43" specificType="Gene"/><types:Gene xmi:id="6230" sofa="1161" begin="8365" end="8388" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T44" specificType="Gene"/><types:Gene xmi:id="6086" sofa="1161" begin="8633" end="8649" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T45" specificType="Gene"/><types:Gene xmi:id="6102" sofa="1161" begin="8758" end="8779" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T46" specificType="Gene"/><types:Gene xmi:id="3830" sofa="1161" begin="8784" end="8799" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T47" specificType="Gene"/><types:Gene xmi:id="3718" sofa="1161" begin="9202" end="9207" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T48" specificType="Gene"/><types:Gene xmi:id="3846" sofa="1161" begin="9248" end="9253" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T49" specificType="Gene"/><types:Gene xmi:id="4838" sofa="1161" begin="9272" end="9275" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T50" specificType="Gene"/><types:Gene xmi:id="4390" sofa="1161" begin="9332" end="9334" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T51" specificType="Gene"/><types:Gene xmi:id="4758" sofa="1161" begin="9335" end="9337" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T52" specificType="Gene"/><types:Gene xmi:id="5910" sofa="1161" begin="9440" end="9446" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T53" specificType="Gene"/><types:Gene xmi:id="3398" sofa="1161" begin="9462" end="9467" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T54" specificType="Gene"/><types:Gene xmi:id="4550" sofa="1161" begin="10358" end="10364" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T55" specificType="Gene"/><types:Gene xmi:id="6534" sofa="1161" begin="10369" end="10374" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T56" specificType="Gene"/><types:Gene xmi:id="6646" sofa="1161" begin="10682" end="10689" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T57" specificType="Gene"/><types:Gene xmi:id="6406" sofa="1161" begin="10726" end="10741" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T58" specificType="Gene"/><types:Gene xmi:id="3542" sofa="1161" begin="10830" end="10841" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T59" specificType="protein"/><types:Gene xmi:id="5718" sofa="1161" begin="10982" end="10985" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T60" specificType="Gene"/><types:Gene xmi:id="5254" sofa="1161" begin="11068" end="11085" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T61" specificType="Gene"/><types:Gene xmi:id="6262" sofa="1161" begin="11096" end="11101" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T62" specificType="Gene"/><types:Gene xmi:id="4374" sofa="1161" begin="11339" end="11351" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T63" specificType="Gene"/><types:Gene xmi:id="5174" sofa="1161" begin="11364" end="11369" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T64" specificType="Gene"/><types:Gene xmi:id="4118" sofa="1161" begin="11434" end="11440" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T65" specificType="Gene"/><types:Gene xmi:id="6006" sofa="1161" begin="11459" end="11464" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T66" specificType="Gene"/><types:Gene xmi:id="6054" sofa="1161" begin="11590" end="11592" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T67" specificType="Gene"/><types:Gene xmi:id="3734" sofa="1161" begin="11648" end="11664" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T68" specificType="Gene"/><types:Gene xmi:id="3974" sofa="1161" begin="11731" end="11733" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T69" specificType="Gene"/><types:Gene xmi:id="5286" sofa="1161" begin="11933" end="11935" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T70" specificType="Gene"/><types:Gene xmi:id="5526" sofa="1161" begin="12213" end="12219" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T71" specificType="Gene"/><types:Gene xmi:id="5398" sofa="1161" begin="12462" end="12477" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T72" specificType="Gene"/><types:Gene xmi:id="4262" sofa="1161" begin="12490" end="12495" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T73" specificType="Gene"/><types:Gene xmi:id="5590" sofa="1161" begin="12861" end="12867" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T74" specificType="Gene"/><types:Gene xmi:id="6438" sofa="1161" begin="12897" end="12902" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T75" specificType="Gene"/><types:Gene xmi:id="6614" sofa="1161" begin="12982" end="12988" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T76" specificType="Gene"/><types:Gene xmi:id="6518" sofa="1161" begin="13842" end="13845" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T77" specificType="Gene"/><types:Gene xmi:id="3702" sofa="1161" begin="13849" end="13860" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T78" specificType="Gene"/><types:Gene xmi:id="5158" sofa="1161" begin="13865" end="13870" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T79" specificType="Gene"/><types:Gene xmi:id="6374" sofa="1161" begin="13949" end="13960" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T80" specificType="protein"/><types:Gene xmi:id="5190" sofa="1161" begin="13967" end="13972" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T81" specificType="protein"/><types:Gene xmi:id="5062" sofa="1161" begin="14131" end="14142" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T82" specificType="protein"/><types:Gene xmi:id="4886" sofa="1161" begin="14156" end="14170" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T83" specificType="protein"/><types:Gene xmi:id="4214" sofa="1161" begin="14229" end="14240" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T84" specificType="protein"/><types:Gene xmi:id="5782" sofa="1161" begin="14295" end="14319" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T85" specificType="protein"/><types:Gene xmi:id="5574" sofa="1161" begin="14513" end="14527" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T86" specificType="Gene"/><types:Gene xmi:id="6342" sofa="1161" begin="14533" end="14538" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T87" specificType="Gene"/><types:Gene xmi:id="5222" sofa="1161" begin="14755" end="14760" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T88" specificType="Gene"/><types:Gene xmi:id="6294" sofa="1161" begin="14936" end="14943" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T89" specificType="Gene"/><types:Gene xmi:id="4806" sofa="1161" begin="14963" end="14974" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T90" specificType="Gene"/><types:Gene xmi:id="3926" sofa="1161" begin="15001" end="15016" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T91" specificType="Gene"/><types:Gene xmi:id="5110" sofa="1161" begin="15071" end="15078" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T92" specificType="Gene"/><types:Gene xmi:id="6470" sofa="1161" begin="15138" end="15143" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T93" specificType="Gene"/><types:Gene xmi:id="5046" sofa="1161" begin="15310" end="15317" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T94" specificType="Gene"/><types:Gene xmi:id="3414" sofa="1161" begin="15327" end="15334" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T95" specificType="Gene"/><types:Gene xmi:id="6118" sofa="1161" begin="15338" end="15349" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T96" specificType="Gene"/><types:Gene xmi:id="3798" sofa="1161" begin="15355" end="15366" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T97" specificType="Gene"/><types:Gene xmi:id="4406" sofa="1161" begin="15506" end="15522" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T98" specificType="Gene"/><types:Gene xmi:id="6454" sofa="1161" begin="15545" end="15552" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T99" specificType="Gene"/><types:Gene xmi:id="3894" sofa="1161" begin="15669" end="15684" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T100" specificType="Gene"/><types:Gene xmi:id="4822" sofa="1161" begin="15733" end="15755" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T101" specificType="Gene"/><types:Gene xmi:id="4102" sofa="1161" begin="16482" end="16493" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T102" specificType="protein"/><types:Gene xmi:id="3622" sofa="1161" begin="16503" end="16510" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T103" specificType="protein"/><types:Gene xmi:id="6582" sofa="1161" begin="16622" end="16635" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T104" specificType="Gene"/><types:Gene xmi:id="6310" sofa="1161" begin="16771" end="16782" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T105" specificType="Gene"/><types:Gene xmi:id="4998" sofa="1161" begin="17046" end="17057" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T106" specificType="Gene"/><types:Gene xmi:id="3990" sofa="1161" begin="17115" end="17120" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T107" specificType="Gene"/><types:Gene xmi:id="4230" sofa="1161" begin="17535" end="17546" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T108" specificType="Gene"/><types:Gene xmi:id="4534" sofa="1161" begin="17572" end="17577" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T109" specificType="Gene"/><types:Gene xmi:id="3766" sofa="1161" begin="18013" end="18018" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T110" specificType="Gene"/><types:Gene xmi:id="4182" sofa="1161" begin="18103" end="18106" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T111" specificType="Gene"/><types:Gene xmi:id="6326" sofa="1161" begin="18253" end="18264" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T112" specificType="Gene"/><types:Gene xmi:id="6662" sofa="1161" begin="18512" end="18523" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T113" specificType="Gene"/><types:Gene xmi:id="5318" sofa="1161" begin="18875" end="18887" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T114" specificType="protein"/><types:Gene xmi:id="3430" sofa="1161" begin="18901" end="18916" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T115" specificType="protein"/><types:Gene xmi:id="4486" sofa="1161" begin="19027" end="19030" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T116" specificType="Gene"/><types:Gene xmi:id="6390" sofa="1161" begin="19075" end="19090" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T117" specificType="Gene"/><types:Gene xmi:id="3942" sofa="1161" begin="19158" end="19173" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T118" specificType="Gene"/><types:Gene xmi:id="3878" sofa="1161" begin="19208" end="19213" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T119" specificType="Gene"/><types:Gene xmi:id="6022" sofa="1161" begin="19293" end="19308" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T120" specificType="Gene"/><types:Gene xmi:id="6550" sofa="1161" begin="19321" end="19326" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T121" specificType="Gene"/><types:Gene xmi:id="4950" sofa="1161" begin="19571" end="19586" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T122" specificType="Gene"/><types:Gene xmi:id="5814" sofa="1161" begin="19800" end="19815" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T123" specificType="Gene"/><types:Gene xmi:id="6214" sofa="1161" begin="19985" end="19996" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T124" specificType="Gene"/><types:Gene xmi:id="4566" sofa="1161" begin="20042" end="20057" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T125" specificType="Gene"/><types:Gene xmi:id="4630" sofa="1161" begin="20071" end="20086" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T126" specificType="Gene"/><types:Gene xmi:id="5142" sofa="1161" begin="20285" end="20300" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T127" specificType="Gene"/><types:Gene xmi:id="5270" sofa="1161" begin="20331" end="20346" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T128" specificType="Gene"/><types:Gene xmi:id="3382" sofa="1161" begin="20461" end="20472" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T129" specificType="Gene"/><types:Gene xmi:id="3366" sofa="1161" begin="21138" end="21156" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T130" specificType="Gene"/><types:Gene xmi:id="4422" sofa="1161" begin="21190" end="21201" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T131" specificType="Gene"/><types:Gene xmi:id="3462" sofa="1161" begin="21265" end="21274" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T132" specificType="Gene"/><types:Gene xmi:id="5206" sofa="1161" begin="21329" end="21332" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T133" specificType="Gene"/><types:Gene xmi:id="6070" sofa="1161" begin="22472" end="22487" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T134" specificType="Gene"/><types:Gene xmi:id="3862" sofa="1161" begin="22514" end="22529" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T135" specificType="Gene"/><types:Gene xmi:id="3350" sofa="1161" begin="22647" end="22652" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T136" specificType="Gene"/><types:Gene xmi:id="5382" sofa="1161" begin="22954" end="22957" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T137" specificType="Gene"/><types:Gene xmi:id="5734" sofa="1161" begin="24186" end="24195" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T138" specificType="Gene"/><types:Gene xmi:id="4918" sofa="1161" begin="24328" end="24333" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T139" specificType="protein"/><types:Gene xmi:id="6422" sofa="1161" begin="24374" end="24379" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T140" specificType="protein"/><types:Gene xmi:id="5974" sofa="1161" begin="24446" end="24457" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T141" specificType="Gene"/><types:Gene xmi:id="5430" sofa="1161" begin="24602" end="24613" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T142" specificType="protein"/><types:Gene xmi:id="4934" sofa="1161" begin="24618" end="24623" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T143" specificType="Gene"/><types:Gene xmi:id="5030" sofa="1161" begin="24709" end="24728" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T144" specificType="Gene"/><types:Gene xmi:id="4678" sofa="1161" begin="24752" end="24767" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T145" specificType="Gene"/><types:Gene xmi:id="5958" sofa="1161" begin="24885" end="24896" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T146" specificType="Gene"/><types:Gene xmi:id="4070" sofa="1161" begin="25217" end="25228" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T147" specificType="Gene"/><types:Gene xmi:id="5014" sofa="1161" begin="25536" end="25547" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T148" specificType="Gene"/><types:Gene xmi:id="3958" sofa="1161" begin="26193" end="26208" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T149" specificType="Gene"/><types:Gene xmi:id="6598" sofa="1161" begin="26321" end="26336" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T150" specificType="Gene"/><types:Gene xmi:id="4278" sofa="1161" begin="26970" end="26973" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T151" specificType="Gene"/><types:Gene xmi:id="6278" sofa="1161" begin="26974" end="26977" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T152" specificType="Gene"/><types:Gene xmi:id="5766" sofa="1161" begin="26978" end="26980" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T153" specificType="Gene"/><types:Gene xmi:id="5542" sofa="1161" begin="27702" end="27708" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T154" specificType="Gene"/><types:Gene xmi:id="3510" sofa="1161" begin="28514" end="28524" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T155" specificType="Gene"/><types:Gene xmi:id="4598" sofa="1161" begin="28639" end="28655" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T156" specificType="Gene"/><types:Gene xmi:id="4150" sofa="1161" begin="28777" end="28788" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T157" specificType="Gene"/><types:Gene xmi:id="4518" sofa="1161" begin="28806" end="28808" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T158" specificType="Gene"/><types:Gene xmi:id="4006" sofa="1161" begin="29057" end="29072" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T159" specificType="Gene"/><types:Gene xmi:id="5926" sofa="1161" begin="29221" end="29233" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T160" specificType="protein"/><types:Gene xmi:id="5302" sofa="1161" begin="29250" end="29255" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T161" specificType="protein"/><types:Gene xmi:id="3654" sofa="1161" begin="29302" end="29308" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T162" specificType="Gene"/><types:Gene xmi:id="4326" sofa="1161" begin="29427" end="29443" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T163" specificType="Gene"/><types:Gene xmi:id="3526" sofa="1161" begin="29501" end="29507" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T164" specificType="Gene"/><types:Gene xmi:id="6486" sofa="1161" begin="29534" end="29539" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T165" specificType="Gene"/><types:Gene xmi:id="4902" sofa="1161" begin="29602" end="29617" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T166" specificType="Gene"/><types:Gene xmi:id="4038" sofa="1161" begin="29660" end="29666" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T167" specificType="Gene"/><types:Gene xmi:id="4166" sofa="1161" begin="29689" end="29694" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T168" specificType="Gene"/><types:Gene xmi:id="4710" sofa="1161" begin="29720" end="29726" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T169" specificType="Gene"/><types:Gene xmi:id="3574" sofa="1161" begin="29743" end="29748" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T170" specificType="Gene"/><types:Gene xmi:id="3670" sofa="1161" begin="30014" end="30019" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T171" specificType="Gene"/><types:Gene xmi:id="3494" sofa="1161" begin="30118" end="30129" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T172" specificType="Gene"/><types:Gene xmi:id="5990" sofa="1161" begin="30154" end="30168" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T173" specificType="Gene"/><types:Gene xmi:id="4694" sofa="1161" begin="30224" end="30244" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T174" specificType="Gene"/><types:Gene xmi:id="5606" sofa="1161" begin="30333" end="30344" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T175" specificType="Gene"/><types:Gene xmi:id="5622" sofa="1161" begin="30600" end="30626" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T176" specificType="Gene"/><types:Gene xmi:id="6134" sofa="1161" begin="30718" end="30729" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T177" specificType="Gene"/><types:Gene xmi:id="3558" sofa="1161" begin="30925" end="30936" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T178" specificType="Gene"/><types:Gene xmi:id="5126" sofa="1161" begin="31063" end="31074" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T179" specificType="protein"/><types:Gene xmi:id="4454" sofa="1161" begin="31596" end="31607" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T180" specificType="Gene"/><types:Gene xmi:id="6502" sofa="1161" begin="31771" end="31782" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T181" specificType="Gene"/><types:Gene xmi:id="4294" sofa="1161" begin="31870" end="31921" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T182" specificType="Gene"/><types:Gene xmi:id="5702" sofa="1161" begin="31923" end="31930" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T183" specificType="Gene"/><types:Gene xmi:id="4310" sofa="1161" begin="32030" end="32041" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T184" specificType="Gene"/><types:Gene xmi:id="5750" sofa="1161" begin="32065" end="32072" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T185" specificType="Gene"/><types:Gene xmi:id="5942" sofa="1161" begin="32219" end="32240" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T186" specificType="Gene"/><types:Gene xmi:id="4726" sofa="1161" begin="32692" end="32700" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T187" specificType="Gene"/><types:Gene xmi:id="6678" sofa="1161" begin="33133" end="33144" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T188" specificType="Gene"/><types:Gene xmi:id="4342" sofa="1161" begin="33504" end="33505" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T189" specificType="Gene"/><types:Gene xmi:id="5510" sofa="1161" begin="33506" end="33511" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T190" specificType="Gene"/><types:Gene xmi:id="5078" sofa="1161" begin="34204" end="34219" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T191" specificType="Gene"/><types:Gene xmi:id="4134" sofa="1161" begin="34224" end="34229" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T192" specificType="Gene"/><types:Gene xmi:id="3750" sofa="1161" begin="34429" end="34466" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T193" specificType="Gene"/><types:Gene xmi:id="4982" sofa="1161" begin="34530" end="34533" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T194" specificType="Gene"/><types:Gene xmi:id="4854" sofa="1161" begin="34714" end="34740" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T195" specificType="Gene"/><types:Gene xmi:id="4022" sofa="1161" begin="36078" end="36104" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T196" specificType="Gene"/><types:Gene xmi:id="3606" sofa="1161" begin="36184" end="36189" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T197" specificType="Gene"/><types:Gene xmi:id="4438" sofa="1161" begin="36262" end="36273" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T198" specificType="Gene"/><types:Gene xmi:id="5094" sofa="1161" begin="36514" end="36544" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T199" specificType="Gene"/><types:Gene xmi:id="5478" sofa="1161" begin="37211" end="37218" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T200" specificType="Gene"/><types:Gene xmi:id="5846" sofa="1161" begin="37255" end="37270" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T201" specificType="Gene"/><types:Gene xmi:id="3910" sofa="1161" begin="37381" end="37392" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T202" specificType="Gene"/><types:Gene xmi:id="6150" sofa="1161" begin="37569" end="37584" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T203" specificType="Gene"/><types:Gene xmi:id="5862" sofa="1161" begin="38474" end="38489" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T204" specificType="Gene"/><types:Gene xmi:id="5558" sofa="1161" begin="38494" end="38499" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T205" specificType="Gene"/><types:Gene xmi:id="4662" sofa="1161" begin="39111" end="39126" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T206" specificType="Gene"/><types:Gene xmi:id="4646" sofa="1161" begin="39184" end="39200" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T207" specificType="Gene"/><types:Gene xmi:id="6246" sofa="1161" begin="39762" end="39778" confidence="1.0" componentId="de.julielab.jcore.ae.banner.BANNERAnnotator" id="T208" specificType="Gene"/><types:ArgumentMention xmi:id="7006" sofa="1161" begin="832" end="843" ref="4198" role="Theme"/><types:ArgumentMention xmi:id="7019" sofa="1161" begin="856" end="886" ref="5446" role="Theme2"/><types:ArgumentMention xmi:id="6996" sofa="1161" begin="10830" end="10841" ref="3542" role="Theme"/><types:ArgumentMention xmi:id="6704" sofa="1161" begin="13949" end="13960" ref="6374" role="Theme"/><types:ArgumentMention xmi:id="7089" sofa="1161" begin="13967" end="13972" ref="5190" role="Theme2"/><types:ArgumentMention xmi:id="6906" sofa="1161" begin="14131" end="14142" ref="5062" role="Theme"/><types:ArgumentMention xmi:id="7153" sofa="1161" begin="14156" end="14170" ref="4886" role="Theme2"/><types:ArgumentMention xmi:id="6748" sofa="1161" begin="14229" end="14240" ref="4214" role="Theme"/><types:ArgumentMention xmi:id="6848" sofa="1161" begin="14295" end="14319" ref="5782" role="Theme2"/><types:ArgumentMention xmi:id="6714" sofa="1161" begin="16482" end="16493" ref="4102" role="Theme"/><types:ArgumentMention xmi:id="6956" sofa="1161" begin="16503" end="16510" ref="3622" role="Theme2"/><types:ArgumentMention xmi:id="6804" sofa="1161" begin="18875" end="18887" ref="5318" role="Theme"/><types:ArgumentMention xmi:id="6838" sofa="1161" begin="18901" end="18916" ref="3430" role="Theme2"/><types:ArgumentMention xmi:id="6781" sofa="1161" begin="24328" end="24333" ref="4918" role="Theme"/><types:ArgumentMention xmi:id="6791" sofa="1161" begin="24374" end="24379" ref="6422" role="Theme"/><types:ArgumentMention xmi:id="7059" sofa="1161" begin="24602" end="24613" ref="5430" role="Theme"/><types:ArgumentMention xmi:id="7029" sofa="1161" begin="29221" end="29233" ref="5926" role="Theme"/><types:ArgumentMention xmi:id="6694" sofa="1161" begin="29250" end="29255" ref="5302" role="Theme2"/><types:ArgumentMention xmi:id="6966" sofa="1161" begin="31063" end="31074" ref="5126" role="Theme"/><types:EventMention xmi:id="6976" sofa="1161" begin="844" end="849" id="E1" specificType="Binding" arguments="6834"/><types:EventMention xmi:id="7133" sofa="1161" begin="10809" end="10817" id="E2" specificType="Gene_expression" arguments="7016"/><types:EventMention xmi:id="6758" sofa="1161" begin="13936" end="13943" id="E3" specificType="Binding" arguments="6744"/><types:EventMention xmi:id="6882" sofa="1161" begin="14143" end="14148" id="E4" specificType="Binding" arguments="6902"/><types:EventMention xmi:id="6814" sofa="1161" begin="14251" end="14255" id="E5" specificType="Binding" arguments="7163"/><types:EventMention xmi:id="7099" sofa="1161" begin="16494" end="16499" id="E6" specificType="Binding" arguments="6858"/><types:EventMention xmi:id="6862" sofa="1161" begin="18892" end="18897" id="E7" specificType="Binding" arguments="7129"/><types:EventMention xmi:id="6916" sofa="1161" begin="24266" end="24276" id="E9" specificType="Binding" arguments="7119"/><types:EventMention xmi:id="6936" sofa="1161" begin="24347" end="24354" id="E8" specificType="Binding" arguments="7126"/><types:EventMention xmi:id="7039" sofa="1161" begin="24584" end="24594" id="E10" specificType="Gene_expression" arguments="6778"/><types:EventMention xmi:id="6724" sofa="1161" begin="29238" end="29243" id="E11" specificType="Binding" arguments="7122"/><types:EventMention xmi:id="7069" sofa="1161" begin="31030" end="31040" id="E12" specificType="Positive_regulation" arguments="6801"/><types:Sentence xmi:id="8791" sofa="1161" begin="0" end="86" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9767" sofa="1161" begin="87" end="198" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7263" sofa="1161" begin="199" end="439" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9319" sofa="1161" begin="440" end="624" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8583" sofa="1161" begin="625" end="735" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8231" sofa="1161" begin="736" end="914" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9703" sofa="1161" begin="915" end="1045" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9119" sofa="1161" begin="1046" end="1128" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8783" sofa="1161" begin="1129" end="1255" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9975" sofa="1161" begin="1256" end="1457" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8279" sofa="1161" begin="1458" end="1629" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7727" sofa="1161" begin="1630" end="1745" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9423" sofa="1161" begin="1749" end="1761" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8479" sofa="1161" begin="1762" end="1869" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9759" sofa="1161" begin="1873" end="1913" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7279" sofa="1161" begin="1920" end="2080" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8359" sofa="1161" begin="2081" end="2278" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8831" sofa="1161" begin="2279" end="2333" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8335" sofa="1161" begin="2334" end="2452" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8303" sofa="1161" begin="2456" end="2508" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7935" sofa="1161" begin="2512" end="2575" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10015" sofa="1161" begin="2576" end="2770" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8383" sofa="1161" begin="2771" end="2829" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9583" sofa="1161" begin="2830" end="2875" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9575" sofa="1161" begin="2879" end="2921" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8895" sofa="1161" begin="2922" end="3130" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9807" sofa="1161" begin="3131" end="3194" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9631" sofa="1161" begin="3199" end="3255" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9727" sofa="1161" begin="3256" end="3356" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9335" sofa="1161" begin="3357" end="3533" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9663" sofa="1161" begin="3534" end="3621" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9591" sofa="1161" begin="3622" end="3756" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9679" sofa="1161" begin="3757" end="3894" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9239" sofa="1161" begin="3895" end="3968" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8591" sofa="1161" begin="3969" end="4084" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8327" sofa="1161" begin="4089" end="4102" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7447" sofa="1161" begin="4107" end="4162" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7983" sofa="1161" begin="4172" end="4343" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8503" sofa="1161" begin="4344" end="4410" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9919" sofa="1161" begin="4411" end="4494" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9159" sofa="1161" begin="4495" end="4704" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9751" sofa="1161" begin="4705" end="4762" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9199" sofa="1161" begin="4763" end="4830" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7823" sofa="1161" begin="4831" end="5009" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8815" sofa="1161" begin="5010" end="5097" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8415" sofa="1161" begin="5098" end="5125" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7839" sofa="1161" begin="5126" end="5227" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7623" sofa="1161" begin="5228" end="5295" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8031" sofa="1161" begin="5296" end="5407" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9383" sofa="1161" begin="5411" end="5470" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8551" sofa="1161" begin="5471" end="5640" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7575" sofa="1161" begin="5644" end="5722" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8079" sofa="1161" begin="5723" end="5811" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8127" sofa="1161" begin="5815" end="5899" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7999" sofa="1161" begin="5900" end="5975" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9023" sofa="1161" begin="5976" end="6082" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9495" sofa="1161" begin="6083" end="6126" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9263" sofa="1161" begin="6131" end="6164" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7895" sofa="1161" begin="6165" end="6314" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7479" sofa="1161" begin="6315" end="6417" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7399" sofa="1161" begin="6422" end="6432" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8023" sofa="1161" begin="6437" end="6463" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8687" sofa="1161" begin="6464" end="6502" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10039" sofa="1161" begin="6507" end="6639" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9311" sofa="1161" begin="6640" end="6871" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7751" sofa="1161" begin="6872" end="6950" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7775" sofa="1161" begin="6955" end="6967" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7679" sofa="1161" begin="6968" end="7101" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7967" sofa="1161" begin="7105" end="7121" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8047" sofa="1161" begin="7126" end="7287" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7631" sofa="1161" begin="7292" end="7344" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8063" sofa="1161" begin="7345" end="7523" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8143" sofa="1161" begin="7524" end="7615" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7407" sofa="1161" begin="7616" end="7704" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7343" sofa="1161" begin="7705" end="7793" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9287" sofa="1161" begin="7794" end="7913" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9863" sofa="1161" begin="7914" end="8017" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9879" sofa="1161" begin="8018" end="8085" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8439" sofa="1161" begin="8086" end="8187" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9799" sofa="1161" begin="8188" end="8228" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9247" sofa="1161" begin="8229" end="8303" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9127" sofa="1161" begin="8304" end="8389" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9887" sofa="1161" begin="8390" end="8420" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8839" sofa="1161" begin="8421" end="8447" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9535" sofa="1161" begin="8448" end="8554" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8743" sofa="1161" begin="8559" end="8707" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7703" sofa="1161" begin="8708" end="8721" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7351" sofa="1161" begin="8726" end="8822" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8495" sofa="1161" begin="8827" end="8943" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8823" sofa="1161" begin="8944" end="9031" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8615" sofa="1161" begin="9032" end="9050" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7567" sofa="1161" begin="9056" end="9178" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9967" sofa="1161" begin="9179" end="9184" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8407" sofa="1161" begin="9185" end="9224" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8151" sofa="1161" begin="9225" end="9230" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9143" sofa="1161" begin="9231" end="9270" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8191" sofa="1161" begin="9271" end="9276" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7455" sofa="1161" begin="9277" end="9374" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9503" sofa="1161" begin="9375" end="9604" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9743" sofa="1161" begin="9608" end="9674" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9511" sofa="1161" begin="9675" end="9784" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8655" sofa="1161" begin="9785" end="9815" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7599" sofa="1161" begin="9816" end="9862" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9391" sofa="1161" begin="9863" end="10035" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7759" sofa="1161" begin="10036" end="10084" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7591" sofa="1161" begin="10085" end="10188" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8623" sofa="1161" begin="10189" end="10261" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8575" sofa="1161" begin="10262" end="10411" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7943" sofa="1161" begin="10412" end="10570" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8799" sofa="1161" begin="10571" end="10681" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9095" sofa="1161" begin="10682" end="10750" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9639" sofa="1161" begin="10751" end="10914" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9431" sofa="1161" begin="10915" end="11041" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7503" sofa="1161" begin="11042" end="11204" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7431" sofa="1161" begin="11205" end="11288" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7527" sofa="1161" begin="11292" end="11319" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8983" sofa="1161" begin="11324" end="11392" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7415" sofa="1161" begin="11393" end="11507" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9927" sofa="1161" begin="11508" end="11544" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10047" sofa="1161" begin="11545" end="11593" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9375" sofa="1161" begin="11594" end="11689" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9951" sofa="1161" begin="11690" end="11753" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7199" sofa="1161" begin="11754" end="11774" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9735" sofa="1161" begin="11780" end="11903" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9991" sofa="1161" begin="11904" end="12008" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7791" sofa="1161" begin="12010" end="12019" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7391" sofa="1161" begin="12025" end="12041" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8519" sofa="1161" begin="12042" end="12083" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8679" sofa="1161" begin="12084" end="12287" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7647" sofa="1161" begin="12288" end="12356" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7951" sofa="1161" begin="12357" end="12619" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9039" sofa="1161" begin="12620" end="12936" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9255" sofa="1161" begin="12937" end="13252" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8311" sofa="1161" begin="13253" end="13358" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7607" sofa="1161" begin="13359" end="13535" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8135" sofa="1161" begin="13536" end="13723" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9911" sofa="1161" begin="13724" end="13793" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9479" sofa="1161" begin="13799" end="13816" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8927" sofa="1161" begin="13821" end="13880" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8775" sofa="1161" begin="13881" end="14072" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9327" sofa="1161" begin="14073" end="14321" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7319" sofa="1161" begin="14322" end="14395" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8015" sofa="1161" begin="14396" end="14528" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7535" sofa="1161" begin="14529" end="14657" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8847" sofa="1161" begin="14658" end="14711" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8935" sofa="1161" begin="14717" end="14944" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8223" sofa="1161" begin="14945" end="15017" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7975" sofa="1161" begin="15018" end="15059" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9407" sofa="1161" begin="15060" end="15170" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9343" sofa="1161" begin="15171" end="15350" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9655" sofa="1161" begin="15351" end="15917" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7487" sofa="1161" begin="15918" end="16029" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7991" sofa="1161" begin="16030" end="16219" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9855" sofa="1161" begin="16220" end="16300" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8255" sofa="1161" begin="16301" end="16399" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8863" sofa="1161" begin="16400" end="16466" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8071" sofa="1161" begin="16467" end="16566" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7495" sofa="1161" begin="16583" end="16679" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9351" sofa="1161" begin="16680" end="16871" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7543" sofa="1161" begin="16872" end="16979" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8263" sofa="1161" begin="16980" end="17171" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7695" sofa="1161" begin="17172" end="17392" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8007" sofa="1161" begin="17402" end="17660" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7359" sofa="1161" begin="17661" end="17801" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8711" sofa="1161" begin="17802" end="17912" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9167" sofa="1161" begin="17913" end="18052" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8663" sofa="1161" begin="18053" end="18107" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8727" sofa="1161" begin="18120" end="18141" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9543" sofa="1161" begin="18142" end="18265" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9207" sofa="1161" begin="18270" end="18312" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9999" sofa="1161" begin="18313" end="18453" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9183" sofa="1161" begin="18454" end="18580" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9791" sofa="1161" begin="18581" end="18701" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7687" sofa="1161" begin="18702" end="18849" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9487" sofa="1161" begin="18850" end="18936" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8639" sofa="1161" begin="18941" end="19014" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10007" sofa="1161" begin="19015" end="19123" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7207" sofa="1161" begin="19124" end="19228" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9959" sofa="1161" begin="19229" end="19390" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9151" sofa="1161" begin="19391" end="19523" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8951" sofa="1161" begin="19524" end="19648" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7471" sofa="1161" begin="19649" end="19787" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7871" sofa="1161" begin="19788" end="19930" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8287" sofa="1161" begin="19931" end="20058" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8399" sofa="1161" begin="20059" end="20177" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8239" sofa="1161" begin="20178" end="20301" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9815" sofa="1161" begin="20302" end="20422" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8599" sofa="1161" begin="20423" end="20581" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8039" sofa="1161" begin="20582" end="20827" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9871" sofa="1161" begin="20833" end="20932" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7767" sofa="1161" begin="20933" end="21054" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9695" sofa="1161" begin="21055" end="21067" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9399" sofa="1161" begin="21073" end="21248" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9895" sofa="1161" begin="21249" end="21282" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9271" sofa="1161" begin="21284" end="21302" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7183" sofa="1161" begin="21303" end="21308" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7743" sofa="1161" begin="21309" end="21327" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8447" sofa="1161" begin="21328" end="21333" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7855" sofa="1161" begin="21334" end="21395" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9055" sofa="1161" begin="21396" end="21421" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9031" sofa="1161" begin="21425" end="21430" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9687" sofa="1161" begin="21431" end="21444" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9295" sofa="1161" begin="21445" end="21450" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8943" sofa="1161" begin="21451" end="21523" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9903" sofa="1161" begin="21524" end="21591" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8247" sofa="1161" begin="21593" end="21724" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8703" sofa="1161" begin="21737" end="21789" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9599" sofa="1161" begin="21790" end="22051" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8487" sofa="1161" begin="22052" end="22244" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9447" sofa="1161" begin="22245" end="22280" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8527" sofa="1161" begin="22281" end="22419" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8199" sofa="1161" begin="22420" end="22509" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7799" sofa="1161" begin="22510" end="22586" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8055" sofa="1161" begin="22587" end="22659" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8207" sofa="1161" begin="22660" end="22710" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7815" sofa="1161" begin="22711" end="22841" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7551" sofa="1161" begin="22842" end="22958" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8759" sofa="1161" begin="22959" end="23037" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9063" sofa="1161" begin="23038" end="23102" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8767" sofa="1161" begin="23103" end="23344" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7919" sofa="1161" begin="23345" end="23460" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7375" sofa="1161" begin="23461" end="23622" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7423" sofa="1161" begin="23627" end="23700" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7327" sofa="1161" begin="23701" end="23803" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8959" sofa="1161" begin="23804" end="23893" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7847" sofa="1161" begin="23894" end="24083" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9935" sofa="1161" begin="24084" end="24232" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7863" sofa="1161" begin="24233" end="24389" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9647" sofa="1161" begin="24399" end="24431" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8319" sofa="1161" begin="24432" end="24510" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9615" sofa="1161" begin="24511" end="24614" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8111" sofa="1161" begin="24615" end="24684" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8807" sofa="1161" begin="24685" end="24729" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8607" sofa="1161" begin="24730" end="24866" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9463" sofa="1161" begin="24867" end="24950" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8271" sofa="1161" begin="24951" end="25012" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9671" sofa="1161" begin="25013" end="25115" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9223" sofa="1161" begin="25120" end="25148" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7231" sofa="1161" begin="25149" end="25288" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8903" sofa="1161" begin="25289" end="25412" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8543" sofa="1161" begin="25413" end="25518" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7439" sofa="1161" begin="25519" end="25632" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7311" sofa="1161" begin="25633" end="25880" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9455" sofa="1161" begin="25881" end="26045" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7191" sofa="1161" begin="26046" end="26209" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9359" sofa="1161" begin="26210" end="26337" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9559" sofa="1161" begin="26338" end="26568" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7959" sofa="1161" begin="26569" end="26710" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9567" sofa="1161" begin="26711" end="26895" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10063" sofa="1161" begin="26896" end="27041" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7511" sofa="1161" begin="27042" end="27149" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7167" sofa="1161" begin="27153" end="27209" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8119" sofa="1161" begin="27210" end="27348" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9071" sofa="1161" begin="27349" end="27411" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8559" sofa="1161" begin="27418" end="27494" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7831" sofa="1161" begin="27495" end="27608" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8167" sofa="1161" begin="27609" end="27741" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8647" sofa="1161" begin="27742" end="28056" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7335" sofa="1161" begin="28057" end="28239" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8567" sofa="1161" begin="28240" end="28328" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8511" sofa="1161" begin="28329" end="28400" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9983" sofa="1161" begin="28401" end="28461" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7671" sofa="1161" begin="28465" end="28532" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8087" sofa="1161" begin="28536" end="28732" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8215" sofa="1161" begin="28743" end="28809" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9215" sofa="1161" begin="28810" end="28912" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9783" sofa="1161" begin="28913" end="28935" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8967" sofa="1161" begin="28936" end="29032" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7247" sofa="1161" begin="29033" end="29189" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8423" sofa="1161" begin="29190" end="29374" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7927" sofa="1161" begin="29375" end="29477" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9191" sofa="1161" begin="29478" end="29618" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8975" sofa="1161" begin="29619" end="29776" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8631" sofa="1161" begin="29777" end="29801" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9007" sofa="1161" begin="29802" end="29820" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7879" sofa="1161" begin="29821" end="29902" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9607" sofa="1161" begin="29903" end="29924" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8991" sofa="1161" begin="29925" end="29976" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7655" sofa="1161" begin="29977" end="30075" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9519" sofa="1161" begin="30076" end="30245" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9279" sofa="1161" begin="30258" end="30496" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8455" sofa="1161" begin="30497" end="30627" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8671" sofa="1161" begin="30638" end="30696" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7239" sofa="1161" begin="30697" end="30754" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7639" sofa="1161" begin="30755" end="30875" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8295" sofa="1161" begin="30876" end="30957" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9831" sofa="1161" begin="30958" end="31116" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9087" sofa="1161" begin="31117" end="31217" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10023" sofa="1161" begin="31218" end="31446" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8751" sofa="1161" begin="31447" end="31527" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7911" sofa="1161" begin="31528" end="31657" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7559" sofa="1161" begin="31658" end="31675" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7663" sofa="1161" begin="31677" end="31833" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9439" sofa="1161" begin="31834" end="31864" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8391" sofa="1161" begin="31870" end="32052" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8535" sofa="1161" begin="32053" end="32140" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9015" sofa="1161" begin="32141" end="32243" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8871" sofa="1161" begin="32249" end="32300" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8911" sofa="1161" begin="32306" end="32424" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7215" sofa="1161" begin="32425" end="32513" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9551" sofa="1161" begin="32514" end="32691" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7807" sofa="1161" begin="32692" end="32795" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8471" sofa="1161" begin="32796" end="32905" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9719" sofa="1161" begin="32906" end="33023" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7223" sofa="1161" begin="33024" end="33190" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7367" sofa="1161" begin="33191" end="33338" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8719" sofa="1161" begin="33349" end="33402" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8343" sofa="1161" begin="33406" end="33556" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8463" sofa="1161" begin="33557" end="33737" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9623" sofa="1161" begin="33738" end="33882" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9367" sofa="1161" begin="33883" end="34025" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8879" sofa="1161" begin="34026" end="34093" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8431" sofa="1161" begin="34094" end="34277" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9415" sofa="1161" begin="34278" end="34321" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9047" sofa="1161" begin="34325" end="34507" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7463" sofa="1161" begin="34508" end="34579" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7303" sofa="1161" begin="34580" end="34790" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7711" sofa="1161" begin="34801" end="34932" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9711" sofa="1161" begin="34933" end="34997" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9839" sofa="1161" begin="35008" end="35103" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8695" sofa="1161" begin="35104" end="35168" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9135" sofa="1161" begin="35172" end="35194" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7287" sofa="1161" begin="35195" end="35285" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8159" sofa="1161" begin="35286" end="35332" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7615" sofa="1161" begin="35333" end="35363" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10031" sofa="1161" begin="35364" end="35567" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7783" sofa="1161" begin="35568" end="35664" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7583" sofa="1161" begin="35665" end="35722" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9103" sofa="1161" begin="35723" end="35792" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8095" sofa="1161" begin="35793" end="35879" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7175" sofa="1161" begin="35880" end="35951" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7271" sofa="1161" begin="35952" end="36046" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8175" sofa="1161" begin="36050" end="36152" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9943" sofa="1161" begin="36153" end="36274" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8735" sofa="1161" begin="36275" end="36356" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7719" sofa="1161" begin="36357" end="36405" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8999" sofa="1161" begin="36406" end="36573" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9079" sofa="1161" begin="36574" end="36708" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9527" sofa="1161" begin="36709" end="36801" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8103" sofa="1161" begin="36802" end="36874" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8367" sofa="1161" begin="36875" end="37048" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7383" sofa="1161" begin="37049" end="37210" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8183" sofa="1161" begin="37211" end="37279" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7735" sofa="1161" begin="37280" end="37460" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8855" sofa="1161" begin="37461" end="37748" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7903" sofa="1161" begin="37749" end="37810" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8919" sofa="1161" begin="37811" end="37898" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7255" sofa="1161" begin="37911" end="38016" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7887" sofa="1161" begin="38017" end="38049" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9471" sofa="1161" begin="38055" end="38258" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8351" sofa="1161" begin="38259" end="38317" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="10055" sofa="1161" begin="38324" end="38533" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9775" sofa="1161" begin="38534" end="38688" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7519" sofa="1161" begin="38689" end="38767" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="7295" sofa="1161" begin="38768" end="38816" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9231" sofa="1161" begin="38817" end="39069" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9847" sofa="1161" begin="39074" end="39160" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9111" sofa="1161" begin="39164" end="39286" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8887" sofa="1161" begin="39290" end="39363" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9823" sofa="1161" begin="39364" end="39539" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9303" sofa="1161" begin="39543" end="39622" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="9175" sofa="1161" begin="39641" end="39694" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><types:Sentence xmi:id="8375" sofa="1161" begin="39695" end="39794" componentId="de.julielab.jcore.ae.jsbd.main.SentenceAnnotator"/><tcas:DocumentAnnotation xmi:id="10071" sofa="1161" begin="0" end="39795" language="x-unspecified"/><ext:DBProcessingMetaData xmi:id="10076" sofa="1161" begin="0" end="0" subsetTable="gepi.errordoc" doNotMarkAsProcessed="false"><primaryKey>PMC5070457</primaryKey></ext:DBProcessingMetaData><ext:FlattenedRelation xmi:id="10093" sofa="1161" begin="844" end="849" id="FE0" arguments="10086" relations="10090" rootRelation="6976"/><ext:FlattenedRelation xmi:id="10111" sofa="1161" begin="10809" end="10817" id="FE1" arguments="10105" relations="10108" rootRelation="7133"/><ext:FlattenedRelation xmi:id="10130" sofa="1161" begin="13936" end="13943" id="FE2" arguments="10123" relations="10127" rootRelation="6758"/><ext:FlattenedRelation xmi:id="10149" sofa="1161" begin="14143" end="14148" id="FE3" arguments="10142" relations="10146" rootRelation="6882"/><ext:FlattenedRelation xmi:id="10168" sofa="1161" begin="14251" end="14255" id="FE4" arguments="10161" relations="10165" rootRelation="6814"/><ext:FlattenedRelation xmi:id="10187" sofa="1161" begin="16494" end="16499" id="FE5" arguments="10180" relations="10184" rootRelation="7099"/><ext:FlattenedRelation xmi:id="10206" sofa="1161" begin="18892" end="18897" id="FE6" arguments="10199" relations="10203" rootRelation="6862"/><ext:FlattenedRelation xmi:id="10224" sofa="1161" begin="24266" end="24276" id="FE7" arguments="10218" relations="10221" rootRelation="6916"/><ext:FlattenedRelation xmi:id="10242" sofa="1161" begin="24347" end="24354" id="FE8" arguments="10236" relations="10239" rootRelation="6936"/><ext:FlattenedRelation xmi:id="10260" sofa="1161" begin="24584" end="24594" id="FE9" arguments="10254" relations="10257" rootRelation="7039"/><ext:FlattenedRelation xmi:id="10279" sofa="1161" begin="29238" end="29243" id="FE10" arguments="10272" relations="10276" rootRelation="6724"/><ext:FlattenedRelation xmi:id="10302" sofa="1161" begin="31030" end="31040" id="FE11" arguments="10291" relations="10299" agents="10294" patients="10296" rootRelation="7069"/><cas:Sofa xmi:id="1161" sofaNum="1" sofaID="_InitialView" mimeType="text" sofaString="Theory of muscle contraction mechanism with cooperative interaction among crossbridges&#10;The power stroke model was criticized and a model was proposed for muscle contraction mechanism (Mitsui, 1999). The proposed model was further developed and calculations based on the model well reproduced major experimental data on the steady filament sliding (Mitsui and Ohshima, 2008) and on the transient phenomena (Mitsui, Takai and Ohshima, 2011). In this review more weight is put on explanation of the basic ideas of the model, especially logical necessity of the model, leaving mathematical details to the above-mentioned papers. A thermodynamic relationship that any models based upon the sliding filament theory should fulfill is derived. The model which fulfills the thermodynamic relationship is constructed on the assumption that a myosin head bound to an actin filament forms a complex with three actin molecules. In shortening muscles, the complex moves along the actin filament changing the partner actin molecules with steps of about 5.5 nm. This process is made possible through cooperative interaction among cross-bridges. The ATP hydrolysis energy is liberated by fraction at each step through chemical reactions between myosin and actin molecules. The cooperativity among crossbridges disappears in length-clamped muscles, in agreement with experimental observations that the cross-bridge produces force independently in the isometric tetanus state. The distance of the head movement per ATP hydrolysis cycle is expected to be about 5.5 nm or a few times of it under the condition of the in vitro single head experiments. Calculation results are surveyed illustrating that they are in good agreement with major experimental observations.&#10;1.&#10;Introduction&#10;The power stroke model was criticized and a model was proposed for the muscle contraction mechanism in ref. 1. The model was further developed in refs. 2, 3. Calculations by the model well explained major experimental data on the steady filament sliding2 and on the transient phenomena3 (as surveyed later in Sect. 5). Some readers of refs. 2, 3, however, commented that it was difficult to understand the model since the explanations were too brief on some basic ideas, especially on logical necessity of the model. In this review, we will try to fill up the deficiency.&#10;A thermodynamic relationship which any model based upon the sliding filament theory should fulfill is derived in Sect. 2. Requirements asked to any models are listed in Sect. 3. In Sect. 4, basic concepts of the proposed model are explained. The requirements are fulfilled by considering cooperative interaction among crossbridges and by fractional liberation of ATP energy through chemical reactions between myosin and actin molecules. In Sect. 5, calculation results by the model are surveyed. In Sect. 6, summary and discussion are given.&#10;2.&#10;Derivation of a thermodynamic relationship&#10;Generally the first step to construct a molecular model in material physics is to look for a thermodynamic relationship among parameters to appear in models and put restrictions on the construction of models. Let us follow this procedure for a study of muscle contraction.&#10;2.1&#10;Work done by one crossbridge using ATP hydrolysis energy&#10;It is assumed that the myosin head exerts force on actin filament only when it is attached to actin. The mean force exerted on the actin filament by one attached myosin head is denoted as p and the translation distance of the attached head during one ATP hydrolysis cycle as D. Then pD gives the work done by the myosin head utilizing the ATP hydrolysis energy, eA. Measured macroscopic quantities during muscle contraction are the tension T, contraction velocity V and the rate of heat production H. Usually the contraction velocity V is defined by the shortening velocity of muscle of length Lml divided by Lml so that unit of V is s−1. The rate of muscle work W is TV and the rate of energy production is W+H. The portion of eA used for work is given by eAW/(W + H), and therefore, by the first law of thermodynamics, we have&#10;(1)&#10;pD=eAW/(W+H).&#10;2.2&#10;Relation between p and T in the sliding filament theory&#10;Figure 1 schematically shows a portion of the sarcomere producing the tension T at full filament overlap: (a) side view and (b) projection of the sarcomere along the filament axis. The projection consists of rhombic cells of edge a or of hexagons. The hexagon contains one myosin filament and has the same area as the rhombic cell.&#10;Since the tension T is given in units of N/m2, let us consider a flat cylinder whose bases are 1 m2 wide and vertical to the filaments and thickness is the half sarcomere length s/2 between the planes Z and M. Such cylinders are indicated as C1 and C2 in Figure 1(a). The Z plane is considered fixed and thus the tension T is downward. Then the actin filaments in C1 exert upward force on myosin filaments balancing the tension T, while the myosin filaments in C2 exert upward force on actin filaments balancing T. In such ways, each flat cylinder produces the tension T in the sliding filament theory. Below we shall consider C1.&#10;The number of myosin heads in C1 is denoted as Nhs, whose numerical value can be obtained as follows. The edge of the rhombus unit cell is indicated as a in Figure 1(b). It is known that a varies depending upon T but, as an approximation, a may be set equal to 42 nm following ref. 4. Then the area of the unit cell is a2 sin 60°=1528×10−18 m2. Microscopic photographs of muscle suggest that the ratio of sarcomere area in the base of C1 is not very far from 1, and is set as 0.9 as an approximation following ref. 4. Then the number of myosin filaments in C1 is 0.9/(a2 sin 60°) = 5.89×1014 m−2. The period of the myosin filament is denoted as d, and set d equal to 14.3 nm as in ref. 4. The myosin filament has a three-fold rotation symmetry and has 6 myosin heads per d. The length of myosin filament minus that of the bare zone is about 1.43 µm. Thus the number of myosin heads on one myosin filament in half sarcomere is 6×(1.43×10−6/2)/14.3×10−9=300. Thus we have an approximate value of Nhs as&#10;(2)&#10;Nhs=300×5.89×1014m−2=1.76×1017m−2&#10;The symbol r is defined as the ratio of the number of myosin heads attached to an actin filament per total number of myosin heads at the same moment. Then the relation between the mean force p produced by one attached head and the tension T is given by&#10;(3)&#10;p=T/(rNhs)&#10;2.3&#10;Thermodynamic relationship&#10;Combining Eqs. 1 and 3 with W=TV gives&#10;(4)&#10;D/r=eANhsV/(W+H)This is the required thermodynamic relationship which any model based on the sliding filament theory should satisfy. If the units of the quantities in Eq. 4 are presented in parentheses, they are D (m), eA (J), Nhs (1/m2), V (1/s), (W+H) (J/m3s), so that the unit of the right-handed term is (J) (1/m2) (1/s) (m3s)/J=m, the same as the unit of D/r.&#10;As T approaches to 0, W=TV becomes negligible compared to H, and Eq. 4 becomes&#10;(5)&#10;D/r=eANhsV/H&#10;Let us use eA=8.0×10−20 J/molecule, Vmax=2.25 (1/s) (V under no load)5, H=35.2 kW/m2 at T=0 at 0°C6, as was done in Sect. 2.1 of ref. 2. Then Eq. 5 gives&#10;(6)&#10;D/r=900 nm,     for T=0 at 0°CThis value of D/r is large but those of a similar order of magnitude are obtained from other experimental data in the next section.&#10;2.4&#10;D/r values estimated from the data by Harada et al.7&#10;Harada et al.7 observed the motion of actin filaments on a glass strip covered by myosin molecules, and measured the velocity of the actin filament and rate of hydrolysis of ATP. They estimated from their data that D is larger than 60 nm at 30°C and than 200 nm at 20°C. As mentioned by several authors, however, their way to estimate D seems too speculative. Nevertheless, the order of magnitude of D/r can be estimated from their data as follows.&#10;According to Harada et al.7, the mean sliding velocity of the actin filaments does not depend upon the filament length. They determined the rate of hydrolysis of ATP by the Pi production rate per 1 nm of the actin filament. Here let us simplify their system by supposing the following model. Myosin molecules are arranged on a straight line with the mutual separation of dH on the glass strip. The length of the line is more than 1 m. A straight actin filament of 1 m is on the myosin line and moves along it. Now the stress T is regarded as a friction force against the actin filament movement. Then relations similar to Eqs. 1, 3 and 4 can be derived. If the quantities concerning the experiment by Harada et al.7 are expressed with suffixes H, Eq. 4 becomes&#10;(7)&#10;DH/rH=eANHVH/(HH+WH)Harada et al. determined the average distance between myosin molecules on the glass strip by electron microscope as about 12 nm. Hence we set.&#10;(8)&#10;dH=12 nmSince the length of the actin filament is 1 m and myosin molecule is two-headed, we have&#10;(9)&#10;NH=2/dH=2/12×10−9=0.17×109In Table 1 of the paper by Harada et al.7, values of ν (µm/s) and (Pi/s per nm) are given. With our notations, VH (m/s) = ν×10−6 and HH + WH (J/s per m) = εATP (Pi/s per nm)×109. Then Eq. 7 becomes&#10;(10)&#10;DH (nm)/rH=170ν/(Pi/s  per nm)By using the values given in Table 1 of the paper by Harada et al.7 and NH in Eq. 9, we have&#10;(11a)&#10;DH/rH=850 nm for actin filament at 22°C&#10;(11b)&#10;DH/rH=350 nm for actin filament at 30°C&#10;(11c)&#10;DH/rH=1280 nm For thin filament at 22°CThese values of DH/rH are as large as D/r=900 nm in Eq. 6.&#10;Presumably, one can get valuable information on the mechanism of myosin sliding on the actin filament by extending the ingenious experiment by Harada et al.7, changing experimental parameters and using muscles of different kinds.&#10;3.&#10;Requirements for any models based upon the sliding filament theory&#10;Requirements which any models based upon the sliding filament theory should fulfill may be listed as follows.&#10;Large D/r in shortening muscle&#10;According to Eq. 6, D/r=900 nm for T=0 at 0°C. This relation is derived solely based upon the filament sliding theory and thus any model based upon the sliding filament theory should give this order of magnitude of D/r.&#10;Chemical reactions associated with the large D/r&#10;Coupling between the force production and chemical reaction should be explained in a reasonable manner.&#10;Crossbridges produces force independently in the isometric tetanus state&#10;Ramsey and Street8 and Gordon et al.9 examined variation of active tension with overlap between myosin and actin filaments in fibers of frog muscles. They observed a roughly linear decline of active tension with extension of the fiber beyond the length at which it gave maximum force (cf. Fig. 1 in ref. 10). These observations indicate that cross-bridges produces force independently when the muscle length is clamped.&#10;Small D observed in experiments with single myosin molecule in vitro&#10;Molloy et al.11 studied the movement and force generation produced by a single myosin head and found the movement during one ATP hydrolysis cycle to be about 4 nm. Kitamura et al.12 developed a new assay for direct manipulation of S-1 making visualization possible with a fluorescent label. Their results show that a myosin head moves along the actin filament with 5.3 nm steps, often producing displacement of 11 to 30 nm for each ATP hydrolysis cycle.&#10;The model should explain major experimental data on muscle properties or behaviors.&#10;4.&#10;Construction of a new model&#10;4.1&#10;Ratio r of the myosin heads attached to actin filaments to all heads&#10;In Sect. 2.2, the ratio of the number of myosin heads attached to actin filament per total number is denoted as r. Here let us estimate the value of r. The isometric tension per head is denoted as p0. Ishijima et al.13 did single-molecule analysis of the actomyosin motor using nano-manipulation. Their experimental results indicate that p0 is close to 5.7 pN. We adopt this value:&#10;(12)&#10;p0=5.7 pN.According to Eq. 3,p=T/(rNhs), and thus denoting the macroscopic isometric tension as T0, we have r = T0/(p0Nhs). If we use the above value of p0 and Nhs in Eq. 2 with the experimental values of T0=4.1×105 N/m2 in ref. 5, we have&#10;(13)&#10;r=0.41, at T=T0.&#10;X-ray data are favorable to this r value. By X-ray diffraction study of the equatorial reflections from a sartorius muscle, Matsubara et al.14 estimated that about 80% of myosin heads migrate around the thin filament in isometric tetanus muscle. This percentage is higher (about 92∼99%) according to Yagi et al.15. Since these percentages are about twice r=41%, it seems reasonable to suppose that one of two heads of a myosin molecule attaches to actin and produces positive or negative force while the other exists in the vicinity of the thin filament as non-attached relief.&#10;Podolsky et al.16, Huxley17, Huxley and Kress18 and Yagi et al.15 observed that the intensity ratio of the [1,0] and [1,1] equatorial reflections increases only minimally as shortening velocity increases, indicating that the total number of myosin heads in the vicinity of the actin filament decreases only slightly. In the power stroke model, the number of the myosin heads producing tension depends upon the tension and the X-ray observation is explained by assuming that weakly attached crossbridges exist in addition to strongly attached crossbridges and the muscle stiffness is determined by the strongly attached crossbridges. Then a question arises how this weakly attached crossbridge is related to the force production mechanism. It seems reasonable to consider that both the strongly attached and weakly attached crossbridges are substantially related to the force production in positive or negative ways. In our model both are counted to calculate the ratio r, i.e., it is assumed that, as an approximation, r is constant and independent of the shortening velocity, as the X-ray data suggest. Referring to Eq. 13, the following value of r seems close to reality.&#10;(14)&#10;r=0.41, for any T&#10;4.2&#10;Formation of complex MA3 of myosin head and actin molecules&#10;Andreeva et al.19 and Xiao et al.20 studied the way of binding of a myosin head to an actin filament by using tryptic digestion of myofibrils and measuring optical polarization and dichroism. They concluded that in the rigor rabbit psoas muscle each myosin head binds to two actin monomers in a thin filament20, suggesting the possibility that the myosin head may first bind to one and then to two monomers in the actin filament19.&#10;Figure 2 shows an example of possible mechanism of how such binding change occurs. In Figure 2(a), an ATP-activated single myosin head (S−1) is indicated as M, which sits at a position apart from the actin filament. The actin filament is represented by the periodic array of potential wells which correspond to the possible binding sites for M. The wells are arranged with a period of the strand L:&#10;(15)&#10;L=5.46 nmThe helical structure of the actin filament will disturb the periodic potential arrangement but its effect does not seem essential and is neglected here. Figure 2(b) shows the state at the moment M attaches to actin 1. Attachment of the myosin head may cause large strain in actin molecules. Protein molecules are structurally polar. (Note that G-actin has the permanent dipole moment of 600 Debye21.) Hence the actin filament is piezoelectric. Consequently, it is a possibility that the strain produces an electric polarization as symbolically indicated by the electric charges - in actin 1 and + in actin 2 in Figure 2(c). The myosin head is negatively charged and the Coulomb interaction between the head charge and the induced polarization charges raise the potential well at actin molecule 1 and lower the well at actin 2, resulting in the potential energy distribution for the head shown in Figure 1(c), which might be wide over the two actin molecules and has two narrow inner potential wells at the actin molecule 1 and 2, such that M can jump from one to the other due to the thermal fluctuation and is statistically distributed in these wells as schematically drawn in Figure 2(d). The negative charge of the head will tend to stabilize the charges caused by the polarization in wells 1 and 2. The head at well 2 will newly produce the polarization charge − in well 2 and + in well 3 similarly to the case of (c) but somewhat differently due to the difference in prehistory from (c). The induced charges in this case are symbolically shown by small − and + in (d). Accordingly, the structural changes in (d) will be similar to (c) but somewhat different from (c). The potential distribution is supposed to be such as shown in (d). Presumably the myosin head bound to Actin 1 and 2 is observed in the experiment of ref. 19 and ref. 20.&#10;Figure 2(d) shows the potential barrier U* which a myosin head M has to cross over to move from well 2 to 3. In our model, it is assumed that U* is relatively large if there is no force acting on the myosin head but sensitively depends upon the force on the head, as discussed in detail in Sect. 4.3. Accordingly, the head M moves by about 5.46 nm from well 1 to 2 and stops if there is no force on the head. As mentioned in Sect. 3, Kitamura et al.12 observed that a single myosin head moves by 5.3 nm or about a few times of 5.3 nm along the actin filament during one ATP hydrolysis cycle in vitro. This observation seem to indicate that U* is large in the absence of the force on the head but there is some variation of the force depending upon experimental conditions and causes the difference of the number of steps.&#10;Figure 3 illustrates the change of molecular structures associated with the change in potential distribution in Figure 2. Figure 3(a) shows a myosin head just after attachment to actin filament (the shape is speculated), which corresponds to the state in Figure 2(b). Then the potential changes occur from Figure 2(b) to (c) and (d), causing the shift of the binding site of the head from Figure 3(a) to (b). The shape of the head in (b) is depicted referring to the shape in Figure 6 of the paper by Rayment, et al.22. As in Figure 2(d), it is assumed that the molecular deformation occurs mainly in the shadowed three actin molecules together with the head. The shadowed complex in Figure 3(b) is denoted as MA3. Figure 3(c) is a modeling of (b). The tilting angle of the neck domain relative to the vertical z axis is denoted as θeq in the case of a single myosin head.&#10;4.3&#10;Cooperative interaction among crossbridges&#10;As mentioned in Section 3, the experimental observations indicate that crossbridges produce force independently in isometric tetanus muscle. Based on this observation it is commonly assumed that the myosin head produces force independently in any contracting muscles. This assumption, however, seems too speculative because the experiments were done in the special case of clamped length. Below we shall discuss contracting muscles in general and treat the length-clamped experiment as a specific case that the sliding velocity is zero.&#10;In our model, 41% of the myosin heads are bound to actin filaments as indicated by Eq. 14. Then crossbridge may appear in one of the three states shown in Figure 4. The complex MA3 is depicted by shadowing and connected to a myosin filament by the thin bar denoted as tail. The end of the crossbridge at the myosin filament is indicated as K and that at the actin filament as J. The right direction corresponds to the sliding direction of the myosin filament against the actin filament and is called forward and the left direction backward. The tail has bending freedom (cf. Sect. 3.4 of ref. 2), which is represented by one hinge depicted as a black square for simplicity. The force which the cross-bridge exerts on the myosin filament is denoted as p(y), where y is shortening of the crossbridge. In Figure 4(a), the tilting angle of the neck domain is equal to the equilibrium angle θeq and the crossbridge produces no force (p(y)=0). In (b), the myosin filament is set in left from (a) and the neck domain tilts to left from θeq and pulls the myosin filament forward (p(y)&gt;0). The tail is straightened and the elastic force by the myosin head will be straightforwardly transmitted to the myosin filament. In (c), the myosin filament is set in right from (a) and the crossbridge pushes the myosin filament backward (p(y)&lt;0). The tail is bent and the effect of the backward force by the crossbridge will be weakly transmitted to the myosin filament.&#10;As shown in Figure 2(d), the myosin molecule has to cross over the potential barrier U* to move from well 2 to 3 in MA3. In ref. 2, the probability Q that the myosin head crosses over U* is discussed based upon Eyring theory of rate process23 as was done by Huxley and Simmons24. (It should be noted that our model is quite different from the model discussed in ref. 24, although Eyring theory is used during formulation in both models.) Considering that U* is a function of the shortening of the crossbridge y, Q is given by&#10;(16)&#10;Q(y)=Aexp(−U*(y)/kT)where A is a constant, k the Boltzmann constant and T the absolute temperature. In ref. 2, U *(y) is expressed by U*(y)=U*0−afJ(y) (Eq. 3-5-2) and fJ(y) =−p(y) (Eq. 3-4-2), where U and a are constants. Thus we have&#10;(17)&#10;U*(y)=U*0+ap(y)The stiffness of the elongated crossbridge (i.e., myosin head + tail) is denoted as κf in the case of Figure 4(b) and that of the bent crossbridge in (c) as κb. Then, following Eq. 3-4-6 in ref. 2, p(y) is given by&#10;(18a)&#10;p(y)=−κfy, for y&lt;0&#10;(18b)&#10;p(y)=−κby, for y&gt;0Values of the stiffnesses are given by Eqs. 4-1-13 and 4-1-14 in ref. 2:&#10;(19a)&#10;κf=2.80 pN/nm&#10;(19b)&#10;κb=0.26 pN/nmEq. 19 means that the crossridge is elastically non-linear.&#10;We leave mathematical discussion based upon these equations to ref. 2, and explain only implication of the equations here. Figure 5 illustrates what happens with the model during the filament sliding. Figure 5(a) shows the state that p(y)&lt;0 and the stiffness is κb. (The head corresponds to the weakly attached head in term of the power stroke model.) When the myosin filament moves forward, p(y) negatively increases and thus the potential barrier U*(y) decreases by Eq. 17 and the transition probability Q increase by Eq, 16. As an statistical average, there may be a critical y (denoted as yc) at which the head crosses over U *. Figure 5(b) shows the state after the head crosses over U* and forms a new complex MA3. Now p(y)&gt;0 and the stiffness is κf. (The head corresponds to the strongly attached head in term of the power stroke model.) U*(y) is large by Eq. 17 and Q is small by Eq. 16. Hence the head does not cross over U* and pulls the myosin filament at the same position. The myosin filament continues to move forward and the state (c) is realized. The situation at (c) is identical to (a) except for binding actin mates. Filament sliding takes place repeating such steps. Since the stiffness κf is much larger than κb (cf. Eq. 19), the time-averaged force produced by the cross-bridge becomes positive.&#10;The filament sliding causes the shift of MA3, while the filament sliding is a result of force production by all MA3. Thus the force production of each crossbridge is helped by other crossbridges. In this sense, there is a mutual cooperation among crossbridges. Generally cooperative system is mathematically discussed by using a feedback loop that output of each element is affected by output of all elements in the system (cf. for instance, Sect. 22 of ref. 25). Eqs. 16∼19 imply such a feedback loop.&#10;Cooperative interaction among crossbridges is caused by the filament sliding and disappears when the sliding stops. Accordingly, crossbridges produce force independently in the isometric tetanus state, in agreement with the observation by Ramsey and Street8 and Gordon et al.9.&#10;4.4&#10;energy liberation and chemical reactions associated with force production&#10;Lymn and Taylor26 proposed the chemical cycle relating the force production in the power stroke model. In their scheme, dissociations of Pi and ADP play the important role in force production. In our model, the way of energy flow and chemical reaction is quite different from theirs and dissociations of Pi and ADP do not play any important role in connection with force production. Portions of the ATP hydration energy eA stored in the head are used in formation the complex MA3 (cf. Fig. 3(b)) and for the force generation steps. Each step of force generation is associated with the chemical reactions: dissociation from one actin molecule and binding to the neighboring actin molecule. Figure 6 is an illustration of the steps. On the right, myosin head is depicted as a box with the energy G stored in it. The level of G is lowered successively associated with the step of force production of the myosin head. An actin filament is depicted as a box with many shelves on the left. Shelf Ai corresponds to i-th actin molecule. The force fJ from the myosin filament to the head lowers the potential barrier U* and lets the head dissociate from Ai−1 and bind to Ai. With this step, a myosin head produces force by spending the energy indicated as g. Calculation result on this scheme is shown later in Figure 9. The step energy liberation seems to be related with thermal fluctuations as discussed in next section.&#10;4.5&#10;Role of thermal fluctuations&#10;Based upon Eyring theory of rate process23, the probability Q for a myosin head to cross over U* is given by Q(y)=Aexp(−U*(y)/kT) (Eq. 16). In usual textbooks, Eyring theory is discussed in case that a single material particle crosses over a potential barrier U*. Then, the energy for the particle to cross over U* is supplied by the thermal energy of the surroundings. In the case of a myosin head, however, the head has much internal freedom for structural and thermal fluctuation. Then it becomes possible that the fluctuation occurs adiabatic in a limited time scale in the way that the decrease of structural fluctuation energy is compensated by increase of the thermal fluctuation energy of constituent atoms, and vise versa. Thus the internal energy of the head can be used to excite the mode of motion of center of gravity of the head and to help the head to cross over U* in Figure 2(d). Then the difference between the potential energies at wells 2 and 3 is converted into the elastic energy of the crossbridge to be used to pull the myosin filament. In this way the internal energy of the head originally supplied by ATP hydrolysis is used by parts to pull the myosin filament. The thermal fluctuation plays an important role for force production but it is not against the second law of thermodynamics because contribution from thermal fluctuation of surroundings to force production is zero as time average.&#10;This idea is similar to the model proposed by Huxley in 195727 in the sense that the thermal energy plays an important role to produce force. In Huxley model, however, it is assumed that excitation of thermal fluctuation having energy comparable to the ATP hydration energy eA (about 21 kT) occurs to produce the power stroke. Then the probability for such excitation is approximately proportional to exp(−eA/kT) = exp(−21)=7.6×10−10, which seem too small to be realistic. In our model, a fraction of εATP is used in each cycle of force production and there is no such difficulty.&#10;5.&#10;Comparison of calculation results with experimental data&#10;In this section it is shown that calculations based upon our model well explain observed relations between muscle properties or behaviors. The presented figures are reproduction of the figures in refs. 2, 3. Readers may refer to refs. 2, 3 for detailed discussion on the calculations.&#10;The ratio of the stiffness of the muscle at T=0 to that at T= T0 is about 0.35 where T0 is the isometric tension. This T-dependency of stiffness was attributed to the variation of the number of the attached myosin heads in the power stroke model. In our model, the T-dependency is explained based upon the difference between the stiffness κf of the cross-bridge having the straightened tail and κb for the bent tail (cf. Eq. 19). Figure 7 shows the calculated relative stiffness S/S(T0) of muscle as a function of T/T0, in comparison with the experimental data.&#10;The force-velocity relation is calculated in Sect. 4.2 of ref. 2 based upon Eqs. 16∼19. Figure 8 shows the result by the red curve in comparison with the experimental data by Edman5. The data points deviate from the red curve as the tension increases beyond Tdev/T0=0.66. The origin of this deviation is discussed later with Figures 11 and 12.&#10;The energy liberation rate is discussed in Sect. 4.3 of ref. 2. The obtained expression for the rate is given by Eq. 4-3-13 in ref. 2. The functional form of the expression is quite similar to the empirical equation for the rate given by Hill30. Figure 9 shows calculation result in ref. 2 in comparison with the experimental data.&#10;Figure 10 shows the distance D over which a myosin head translates using eA. Calculation was done2 by using Eqs. 2, 4 and 14 and the values given by the curves in Figures 8 and 9. D is 369 nm at T/T0=0.&#10;Large values of D can be obtained also from the experiment by Harada et al.7 cited in Sect. 2.4. They observed that most actin filaments shorter than about 40 nm dissociated from the myosin-coated surface while longer ones remained bound to the surface. This fact suggests that 2 or 3 myosin heads are bound to an actin filament of 40 nm so that at least one of the myosin heads remains to attach to the filament though the others detach. In Sect. 2.4, the mean distance between neighboring myosin molecules dH is set equal to 12 nm (Eq. 8). Then the number of the myosin heads which are under the actin filament of 40 nm is 2×40/12=6.7 considering two heads of the myosin molecule. Let us denote the ratio of the number of myosin heads attached to the actin filament per that of all myosin heads under the actin filament as rH as in Eq. 7. Then we have 6.7 rH=2∼3. Thus rH=0.30∼0.45. By using values of D/rH= 850, 350, 1280 nm in Eq. 11, we have D=260∼380, 110∼160. 380∼580 nm at T/T0=0. They are as large as 369 nm at T/T0=0 in Figure 10. Also Harada et al.7 observed that an actin filament of about 40 nm sometimes stopped the movement. Presumably this is the case that only one myosin head remains to attach to an actin filament so that there is no cooperatively working mate and the actin filament stops.&#10;Figure 2(d) schematically shows the general case that the existence probability of the myosin head is distributed in wells 1 and 2. Figure 8 shows that the experimental data deviate from the calculation result (the red curve) for T/T0 &gt; Tdev/T0=0.66. In ref. 3, the origin of this deviation is attributed to variation of the existence probability of the myosin head in MA3 complex. Figure 11 illustrates the two cases of the probability distribution. In the case (a), the myosin head exists solely in well 2. This case is discussed in ref. 2 and leads us to the red curve in Figure 8 as well as the curves in Figures 7, 9 and 10. In the case (b) the existence probability of the myosin head is finite in well 1. This case is discussed in ref. 3. Eqs. 17∼19 mean that U* decreases and stimulates forward motion of the myosin head when forward force on the head increases. Similarly, U*12 decrease when backward force on the head increases as T approaches T0 in Figure 11b. Here it should be noted that implication of the potential distribution in Figure 11 is quite different from the potential diagram in Figure 6 of the paper by Huxley and Simmons24 although they are somewhat similar in appearance.&#10;In Sect. 4.3, the potential barrier U* is expressed by U*(y)=U*0+ap(y) (Eq. 17). This expression is used in combination with the assumption that the myosin head exists solely in well 2 as shown in Figure 11(a). In Sect.7 of ref. 3, it is discussed how the potential height U*(y) varies when the existence probability of the myosin head becomes finite in well 1 as shown in Figure 11(b). As a result, U*(y) is given by&#10;(20)&#10;U*(y)=U*0+ap(y)+c(ycH−yc*))Here c is a constant and (ycH−yc*) is a parameter concerning structural modulation of MA3 due to finite existence probability of the myosin head in well 1. Calculated (ycH−yc*) is given as a function of T/T0 by the green curve in Figure 12(a). The filament sliding velocity ν* to be observed is given in relation with the Hill-type velocity νH by&#10;(21)&#10;ν=νHexp(−bν(ycH−yc*))Calculation is done by setting&#10;(22)&#10;bν=c/kT=7.9 (1/nm)Obtained ν is shown by the blue curve Figure 2b in comparison with the Experimental data by Edman15.&#10;Various workers studied the isometric tension transient and isotonic velocity transient. In ref. 3, calculations are done to explain the experimental data on isometric tension transient by Ford et al.31 and on the isotonic velocity transient by Civan and Podolsky32.&#10;Huxley33 divided the transient responses to the sudden reduction of length or of load into four phases. The first phase of the isometric tension transient is the step change of tension due to sudden length change. The first phase of the isotonic velocity transient is the step change of muscle length due to the sudden load change. The initial state in these changes is the isometric tetanus state, in which the existence probability of the myosin head is finite in well 1 as shown in Figure 11(b). Calculation is done assuming that the existence probability does not change during the first phases since the length or load changes are very fast.&#10;Figure 13 shows calculation results reported in Sect. 4 of ref. 3. Here the length change per half sarcomere is denoted as ΔLhs and the relative tension is given by T/T0obs in both transients (cf. Figure 8 for T0obs). The experimental data by Ford et al.31 are shown by circles for the isometric tension transient and the data by Civan and Podolsky32 by squares for the isotonic velocity transient. Calculation results are shown by the green curve for the isometric tension transient and by the brown curve for the isotonic velocity transient. Calculation is done by using the values of stiffnesses given by Eq. 19, which implies that the elasticity of crossbridge is largely nonlinear. The curvatures of the curves in Figure 13 reflect the nonlinearity. To get good agreement with the experimental data, it is assumed that extensibility ratios of the crossbridge, myosin filament and actin filament are 0.22, 0.26 and 0.52, respectively. A comment is given on these values in Sect. 6.&#10;As mentioned above, the curves in Figure 13 are obtained assuming that the existence probability of the myosin head in well 1 of Figure 11(b) does not change during the first phases. Thus the structure of MA3 becomes unstable just after the first phases. The complex muscle behaviors after the first phases are considered to correspond to the process that the existence probability of the myosin head in MA3 changes into the stable probability at a given condition.&#10;Figure 14 shows T/T0obs as functions of time t for various length change steps ΔLhs after the first phase in the isometric tension transient. Sect. 5 of ref. 3 gives mathematical details on the calculation.&#10;Figure 15 shows time course of length change ΔLhs’ after the first phase in the isotonic velocity change. Sect. 6 of ref. 3 gives mathematical details on the calculation.&#10;6.&#10;Summary and Discussion&#10;In Sect. 3, requirements for any models based upon the filament sliding theory are listed. They are discussed with the same item numbers.&#10;Large D/r in shortening muscle&#10;Note that the large magnitude of D/r such as 900 nm (Eq. 6) is obtained based upon the sliding filament theory itself and thus any model based upon the sliding filament theory should give such large D/r. Large values of D/r are obtained also from the experimental data by Harada et al.7 in Sect. 2.4. Our model is constructed to give the large values of D/r.&#10;The order of magnitude of D is about 10 nm in the power stroke model. Combining D/r=900 nm and D=10 nm gives r=0.011, which seems too small to be realistic. Hence the power stroke model does not seem to fulfill this requirement. More detailed discussion on difficulty of the power stroke model is given in Sect. 2.2 of ref. 2.&#10;The cooperation between the myosin and actin molecules is important for force production in our model. In the power stroke model, the actin filament is treated as a relatively passive element like a ladder for a myosin head. In general protein molecules seem to play more active role in biological systems.&#10;Chemical reactions associated with the large D/r&#10;In our model, the ATP hydration energy is spent by fraction through repeated chemical reactions between the myosin head and actin filament, as illustrated in Figure 6. The small amount of energy liberated per one step make the contribution of the thermal fluctuation possible as discussed in Sect. 4.5. This scheme well explains the energy liberation rate vs. T/T0 relation as shown in Figure 9.&#10;Crossbridges produces force independently in the isometric tetanus state&#10;As discussed at the end of Sect. 4.3, the cooperative interaction among crossbridges is caused by the filament sliding and disappears when the sliding velocity becomes zero. Accordingly, crossbridges produce force independently in the isometric tetanus state, in agreement with the observation by Ramsey and Street8 and Gordon et al.9.&#10;Small D observed in experiments with single myosin molecule in vitro&#10;In the expression of U*(y)=U*0 + ap(y) (Eq. 17), U*0 is relatively large and the probability for the myosin head to cross over U* is small when the force p(y) on the head is small. As discussed in Sect. 4.2, in the in vitro experiments by Molloy et al.11 and Kitamura et al.12, the single myosin molecule has no partner of cooperation and the force on the head is close to 0 though there may be some fluctuation of the force depending upon the experimental conditions. Hence D is expected to be about 5.46 nm or a few times of it.&#10;The model should explain many observed relations between muscle properties or behaviors&#10;Figures 7∼9 and 12∼15 show good agreement between calculation results based upon the model and the experimental data.&#10;Combining Eq. 3 and Eq. 14 gives&#10;(23)&#10;p/p0=T/T0This relation means that we can get an expression of a quantity as a function of macroscopic parameter T/T0 when we get an expression of the quantity as a function of microscopic parameter p/p0. This was very convenient in theoretical treatment in refs. 2, 3.&#10;To get good agreement between the calculation results and experimental data in Figure 13, it is assumed that extensibility ratios of the crossbridge, myosin filament and actin filament are 0.22, 0.26 and 0.52. These values are approximately in agreement with the extensibility ratios 0.31, 0.27 and 0.42 estimated through X-ray diffraction by Wakabayashi et al.34. The ratio is assumed to be 1, 0 and 0 in the power stroke model (cf. ref. 35).&#10;In Figure 2, piezoelectric effect is considered. Protein molecules are structurally polar and any biological system should be treated as a system of four variables (stress, strain, electric field and polarization) from the physical viewpoint, as in the discussion on the flagellar rotary motor in ref. 36.&#10;Merit of the two-headed structure of myosin molecule is discussed in Sect. 5.2 of ref. 2.&#10;Cooperativity among myosin molecules is discussed concerning cytoplasmic streaming in Characean algae in Sect. 5.3 of ref. 2.&#10;Our model has some similarity with the ratchet model (cf. e.g., ref. 37). In the ratchet model, however, concrete picture seems lacking on chemical reaction and energy liberation process during the force production as asked by Requirement 2 in Sect. 3. Chemical reaction and energy liberation process in our model are illustrated in figure 6.&#10;Eqs. 18 and 19 mean elastic nonlinearity of the cross-bridge. Recently Kaya and Higuchi38 studied nonlinear elasticity of single myosin molecules in myofilament.&#10;"/><cas:FSArray xmi:id="1461" elements="965 477"/><cas:FSArray xmi:id="2475" elements="196"/><cas:FSArray xmi:id="1269" elements="1427 3262 1204 1998 811"/><cas:FSArray xmi:id="6834" elements="7006 7019"/><cas:FSArray xmi:id="7016" elements="6996"/><cas:FSArray xmi:id="6744" elements="6704 7089"/><cas:FSArray xmi:id="6902" elements="6906 7153"/><cas:FSArray xmi:id="7163" elements="6748 6848"/><cas:FSArray xmi:id="6858" elements="6714 6956"/><cas:FSArray xmi:id="7129" elements="6804 6838"/><cas:FSArray xmi:id="7119" elements="6781"/><cas:FSArray xmi:id="7126" elements="6791"/><cas:FSArray xmi:id="6778" elements="7059"/><cas:FSArray xmi:id="7122" elements="7029 6694"/><cas:FSArray xmi:id="6801" elements="6966"/><cas:FSArray xmi:id="10086" elements="7006 7019"/><cas:FSArray xmi:id="10090" elements="6976"/><cas:FSArray xmi:id="10105" elements="6996"/><cas:FSArray xmi:id="10108" elements="7133"/><cas:FSArray xmi:id="10123" elements="6704 7089"/><cas:FSArray xmi:id="10127" elements="6758"/><cas:FSArray xmi:id="10142" elements="6906 7153"/><cas:FSArray xmi:id="10146" elements="6882"/><cas:FSArray xmi:id="10161" elements="6748 6848"/><cas:FSArray xmi:id="10165" elements="6814"/><cas:FSArray xmi:id="10180" elements="6714 6956"/><cas:FSArray xmi:id="10184" elements="7099"/><cas:FSArray xmi:id="10199" elements="6804 6838"/><cas:FSArray xmi:id="10203" elements="6862"/><cas:FSArray xmi:id="10218" elements="6781"/><cas:FSArray xmi:id="10221" elements="6916"/><cas:FSArray xmi:id="10236" elements="6791"/><cas:FSArray xmi:id="10239" elements="6936"/><cas:FSArray xmi:id="10254" elements="7059"/><cas:FSArray xmi:id="10257" elements="7039"/><cas:FSArray xmi:id="10272" elements="7029 6694"/><cas:FSArray xmi:id="10276" elements="6724"/><cas:FSArray xmi:id="10291" elements="6966"/><cas:FSArray xmi:id="10299" elements="7069"/><cas:FSArray xmi:id="10294" elements=""/><cas:FSArray xmi:id="10296" elements="6966"/><cas:View sofa="1161" members="1754 2282 2097 1676 2115 468 1562 2570 2070 1024 2512 1242 3228 3157 2158 3139 893 2246 2034 947 1855 324 2416 110 573 388 675 1033 2726 693 3197 2611 3046 1195 1002 2486 684 658 3253 298 3055 1 884 2753 1588 119 3019 144 2142 615 1354 2441 1478 1763 67 315 755 213 85 2917 1519 1902 451 3314 1571 2771 2735 2043 649 2106 1074 2052 528 2986 2007 1965 2629 3071 537 2744 2647 1410 10 2088 2943 2638 2881 2552 2848 240 1938 1929 256 2602 956 802 1828 3089 564 2273 2593 555 902 2959 1794 1122 2167 3332 911 1956 1846 2355 2503 19 2432 1545 519 2717 2193 820 2377 42 2450 1947 3181 1649 2656 2839 1363 859 1597 725 1213 1323 2459 2291 1837 2815 3219 938 1920 1186 1452 2899 2124 1049 357 2762 1058 2400 2926 1785 2908 135 3271 404 2620 2536 1911 1503 606 1731 2061 204 1803 3115 94 2995 546 624 1260 2255 2079 1168 2324 1871 489 2264 1819 977 1177 231 764 1640 289 3106 1113 3287 929 3028 2977 2133 2229 1974 2679 1251 1487 3323 1667 1436 597 3341 3148 2780 2025 3244 2688 3037 1388 2176 836 2789 2308 920 379 187 2872 3080 640 2798 718 1512 350 1496 1292 3064 582 3190 3166 2579 1372 1714 2409 1106 2832 1694 153 1147 2386 2710 2425 1626 1445 1067 1042 1099 829 249 877 1339 1299 427 2521 3280 2672 773 1633 1864 413 2586 2317 711 780 2857 420 2545 2468 35 1332 3237 103 845 2151 2952 2393 1154 3132 1991 397 1812 128 633 1740 180 787 1619 852 2665 28 2202 1235 2209 173 1747 3004 443 747 307 3011 333 3098 667 3173 1284 3124 986 281 1315 2528 2185 1554 273 1091 265 460 794 3306 1131 1139 2807 2300 2238 1346 994 2935 51 1580 1276 1983 511 2495 589 1419 2478 1083 2824 59 2864 2561 2890 2968 2333 1658 1306 434 868 1685 2016 341 76 702 222 1379 1701 2216 1465 2697 1222 498 366 734 1606 160 2342 1397 3206 1011 1772 196 477 965 811 1204 1427 1998 3262 1528 1721 1880 2364 3296 4198 5446 4086 5334 6182 6198 5654 6630 5638 5462 4870 4054 5798 5878 6038 4790 4582 4966 6566 5238 4614 4502 3782 5350 3590 5494 5414 3478 6166 4742 4358 3446 3638 5670 6358 5830 4246 5686 5366 3686 4470 3814 4774 5894 6230 6086 6102 3830 3718 3846 4838 4390 4758 5910 3398 4550 6534 6646 6406 3542 5718 5254 6262 4374 5174 4118 6006 6054 3734 3974 5286 5526 5398 4262 5590 6438 6614 6518 3702 5158 6374 5190 5062 4886 4214 5782 5574 6342 5222 6294 4806 3926 5110 6470 5046 3414 6118 3798 4406 6454 3894 4822 4102 3622 6582 6310 4998 3990 4230 4534 3766 4182 6326 6662 5318 3430 4486 6390 3942 3878 6022 6550 4950 5814 6214 4566 4630 5142 5270 3382 3366 4422 3462 5206 6070 3862 3350 5382 5734 4918 6422 5974 5430 4934 5030 4678 5958 4070 5014 3958 6598 4278 6278 5766 5542 3510 4598 4150 4518 4006 5926 5302 3654 4326 3526 6486 4902 4038 4166 4710 3574 3670 3494 5990 4694 5606 5622 6134 3558 5126 4454 6502 4294 5702 4310 5750 5942 4726 6678 4342 5510 5078 4134 3750 4982 4854 4022 3606 4438 5094 5478 5846 3910 6150 5862 5558 4662 4646 6246 7006 7019 6996 6704 7089 6906 7153 6748 6848 6714 6956 6804 6838 6781 6791 7059 7029 6694 6966 6976 7133 6758 6882 6814 7099 6862 6916 6936 7039 6724 7069 8791 9767 7263 9319 8583 8231 9703 9119 8783 9975 8279 7727 9423 8479 9759 7279 8359 8831 8335 8303 7935 10015 8383 9583 9575 8895 9807 9631 9727 9335 9663 9591 9679 9239 8591 8327 7447 7983 8503 9919 9159 9751 9199 7823 8815 8415 7839 7623 8031 9383 8551 7575 8079 8127 7999 9023 9495 9263 7895 7479 7399 8023 8687 10039 9311 7751 7775 7679 7967 8047 7631 8063 8143 7407 7343 9287 9863 9879 8439 9799 9247 9127 9887 8839 9535 8743 7703 7351 8495 8823 8615 7567 9967 8407 8151 9143 8191 7455 9503 9743 9511 8655 7599 9391 7759 7591 8623 8575 7943 8799 9095 9639 9431 7503 7431 7527 8983 7415 9927 10047 9375 9951 7199 9735 9991 7791 7391 8519 8679 7647 7951 9039 9255 8311 7607 8135 9911 9479 8927 8775 9327 7319 8015 7535 8847 8935 8223 7975 9407 9343 9655 7487 7991 9855 8255 8863 8071 7495 9351 7543 8263 7695 8007 7359 8711 9167 8663 8727 9543 9207 9999 9183 9791 7687 9487 8639 10007 7207 9959 9151 8951 7471 7871 8287 8399 8239 9815 8599 8039 9871 7767 9695 9399 9895 9271 7183 7743 8447 7855 9055 9031 9687 9295 8943 9903 8247 8703 9599 8487 9447 8527 8199 7799 8055 8207 7815 7551 8759 9063 8767 7919 7375 7423 7327 8959 7847 9935 7863 9647 8319 9615 8111 8807 8607 9463 8271 9671 9223 7231 8903 8543 7439 7311 9455 7191 9359 9559 7959 9567 10063 7511 7167 8119 9071 8559 7831 8167 8647 7335 8567 8511 9983 7671 8087 8215 9215 9783 8967 7247 8423 7927 9191 8975 8631 9007 7879 9607 8991 7655 9519 9279 8455 8671 7239 7639 8295 9831 9087 10023 8751 7911 7559 7663 9439 8391 8535 9015 8871 8911 7215 9551 7807 8471 9719 7223 7367 8719 8343 8463 9623 9367 8879 8431 9415 9047 7463 7303 7711 9711 9839 8695 9135 7287 8159 7615 10031 7783 7583 9103 8095 7175 7271 8175 9943 8735 7719 8999 9079 9527 8103 8367 7383 8183 7735 8855 7903 8919 7255 7887 9471 8351 10055 9775 7519 7295 9231 9847 9111 8887 9823 9303 9175 8375 10071 10076 10093 10111 10130 10149 10168 10187 10206 10224 10242 10260 10279 10302"/></xmi:XMI>
\ No newline at end of file
diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
index 34b0e1f93..9ceb5f84e 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReCondensedDocumentText.java
@@ -1,11 +1,15 @@
 package de.julielab.jcore.utility;
 
+import de.julielab.jcore.types.InternalReference;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 
 import java.util.Map.Entry;
 import java.util.NavigableMap;
 import java.util.Set;
 import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * This class is helpful when some parts of the CAS document text should be cut
@@ -22,6 +26,11 @@ public class JCoReCondensedDocumentText {
     private String condensedText;
     private JCas cas;
     private Set<Character> cutAwayFillCharacters;
+    private boolean skipInternalReferencesWithLetters;
+
+    public boolean isSkipInternalReferencesWithLetters() {
+        return skipInternalReferencesWithLetters;
+    }
 
     /**
      * <p>
@@ -35,7 +44,41 @@ public class JCoReCondensedDocumentText {
      * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains non-existing type names.
      */
     public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
-        this(cas, cutAwayTypes, null);
+        this(cas, cutAwayTypes, false);
+    }
+
+    /**
+     * <p>
+     * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
+     * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
+     * empty, this class' methods will return the original CAS data.
+     * </p>
+     *
+     * @param cas          The CAS for which the document text should be cut.
+     * @param cutAwayTypes The types for cutting. May be null.
+     * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains non-existing type names.
+     */
+    public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, boolean skipInternalReferencesWithLetters) throws ClassNotFoundException {
+        this(cas, cutAwayTypes, null, skipInternalReferencesWithLetters);
+    }
+
+    /**
+     * <p>
+     * Cuts away the covered text of annotations of a type in <tt>cutAwayTypes</tt>
+     * from the <tt>cas</tt> document text. If <tt>cutAwayTypes</tt> is null or
+     * empty, this class' methods will return the original CAS data.
+     * </p>
+     * <p>The <tt>cutAwayFillCharacters</tt> set may provide characters that, when being the only character between
+     * to cut-away annotations, will add to the span of text being cut away. This way, enumerations of references
+     * (e.g. "4,6,8") can be completely removed, for example.</p>
+     *
+     * @param cas                   The CAS for which the document text should be cut.
+     * @param cutAwayTypes          The types for cutting. May be null.
+     * @param cutAwayFillCharacters Characters that, when being the only separator between two cut away annotations, are also cut away.
+     * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains non-existing type names.
+     */
+    public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, Set<Character> cutAwayFillCharacters) throws ClassNotFoundException{
+        this(cas, cutAwayTypes, cutAwayFillCharacters, false);
     }
 
     /**
@@ -53,9 +96,10 @@ public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes) throws Cla
      * @param cutAwayFillCharacters Characters that, when being the only separator between two cut away annotations, are also cut away.
      * @throws ClassNotFoundException If <tt>cutAwayTypes</tt> contains non-existing type names.
      */
-    public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, Set<Character> cutAwayFillCharacters) throws ClassNotFoundException {
+    public JCoReCondensedDocumentText(JCas cas, Set<String> cutAwayTypes, Set<Character> cutAwayFillCharacters, boolean skipInternalReferencesWithLetters) throws ClassNotFoundException {
         this.cas = cas;
         this.cutAwayFillCharacters = cutAwayFillCharacters;
+        this.skipInternalReferencesWithLetters = skipInternalReferencesWithLetters;
         buildMap(cas, cutAwayTypes);
     }
 
@@ -84,6 +128,7 @@ public JCas getCas() {
     public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundException {
         if (cutAwayTypes == null || cutAwayTypes.isEmpty())
             return;
+        Pattern letterP = Pattern.compile("[a-zA-Z]");
         StringBuilder sb = new StringBuilder();
         condensedPos2SumCutMap = new TreeMap<>();
         condensedPos2SumCutMap.put(0, 0);
@@ -103,6 +148,17 @@ public void buildMap(JCas cas, Set<String> cutAwayTypes) throws ClassNotFoundExc
             int begin = merger.getCurrentBegin();
             int end = merger.getCurrentEnd();
 
+            // Only remove InternalReferences without letters. Those are just numbers in
+            // PMC and often lead to errors because they are not really part of the sentence. Table and figure
+            // references, on the other hand, are embedded in the text. Rule of thumb: Remove references
+            // that don't have a letter.
+            if (skipInternalReferencesWithLetters && (merger.getAnnotation() instanceof InternalReference || merger.getAnnotation() instanceof de.julielab.jcore.types.pubmed.InternalReference)) {
+                String coveredText = ((Annotation)merger.getAnnotation()).getCoveredText();
+                Matcher letterM = letterP.matcher(coveredText);
+                if (letterM.find())
+                    continue;
+            }
+
             boolean moreThanOneCharacterDistance = begin - lastEnd > 2;
             boolean previousCharacterIsCutAwayDelimiter = cutAwayFillCharacters == null || cutAwayFillCharacters.isEmpty() || (begin - lastEnd == 2 && cutAwayFillCharacters.contains(cas.getDocumentText().charAt(begin - 1)));
             if (lastEnd > 0 && begin > lastEnd && (previousCharacterIsCutAwayDelimiter || moreThanOneCharacterDistance)) {
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
index 470baa250..da51e1c59 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReCondensedDocumentTextTest.java
@@ -119,7 +119,6 @@ public void testReduce5() throws Exception {
 
 		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
 				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())));
-		System.out.println(condensedText.getCodensedText());
 		assertEquals("Leptin is an adipose-derived protein secreted by adipocytes and is expressed in adipose tissue.\n" +
 				"It has the role of being a key regulator of several physiological pathways including body weight and regulation of food intake, inflammation, endocrine function, energy homeostasis, bone metabolism and immunity.\n" +
 				"Results from various studies indicate that leptin may play a significant role in bone physiology, independent of the central nervous system.\n", condensedText.getCodensedText());
@@ -127,6 +126,30 @@ public void testReduce5() throws Exception {
 		assertEquals(314, condensedText.getOriginalOffsetForCondensedOffset(308));
 	}
 
+	@Test
+	public void testReduce6() throws Exception {
+		// Test the option to skip internal references that have letters from omission from the condensed text.
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		jcas.setDocumentText("Andreeva et al.19 and Xiao et al.20 studied the way of binding of a myosin head to an actin filament by using tryptic digestion of myofibrils and measuring optical polarization and dichroism. They concluded that in the rigor rabbit psoas muscle each myosin head binds to two actin monomers in a thin filament20, suggesting the possibility that the myosin head may first bind to one and then to two monomers in the actin filament19.\n" +
+				"Figure 2 shows an example of possible mechanism of how such binding change occurs.");
+		InternalReference ref1 = new InternalReference(jcas, 15, 17);
+		ref1.addToIndexes();
+		InternalReference ref2 = new InternalReference(jcas, 33, 35);
+		ref2.addToIndexes();
+		InternalReference ref3 = new InternalReference(jcas, 308, 310);
+		ref3.addToIndexes();
+		InternalReference ref4 = new InternalReference(jcas, 428, 430);
+		ref4.addToIndexes();
+		InternalReference ref5 = new InternalReference(jcas, 432, 440);
+		ref5.addToIndexes();
+
+		JCoReCondensedDocumentText condensedText = new JCoReCondensedDocumentText(jcas,
+				new HashSet<>(Arrays.asList(InternalReference.class.getCanonicalName())), true);
+		assertEquals("Andreeva et al. and Xiao et al. studied the way of binding of a myosin head to an actin filament by using tryptic digestion of myofibrils and measuring optical polarization and dichroism. They concluded that in the rigor rabbit psoas muscle each myosin head binds to two actin monomers in a thin filament, suggesting the possibility that the myosin head may first bind to one and then to two monomers in the actin filament.\n" +
+				"Figure 2 shows an example of possible mechanism of how such binding change occurs.", condensedText.getCodensedText());
+	}
+
 
 	@Test
 	public void testCondensedOffsetsWithinCutawayAnnotations() throws Exception {

From 20c7a4569c5777f2e77cd8a6308cce8ed666c153 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 20 Jan 2022 10:00:01 +0100
Subject: [PATCH 111/269] Remove internal reference spans included in BANNER
 gene mentions.

---
 .../jcore/ae/banner/BANNERAnnotator.java      | 33 +++++++++++++++-
 .../jcore/ae/banner/desc/jcore-banner-ae.xml  |  1 +
 .../jcore/ae/banner/BANNERAnnotatorTest.java  | 38 ++++++++++++++++++-
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
index 1f6077e17..0d8837ff6 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
@@ -9,8 +9,10 @@
 import banner.types.Mention;
 import banner.types.Sentence;
 import de.julielab.jcore.types.EntityMention;
+import de.julielab.jcore.types.pubmed.InternalReference;
 import de.julielab.jcore.utility.JCoReAnnotationTools;
 import de.julielab.jcore.utility.JCoReTools;
+import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
 import dragon.nlp.tool.Tagger;
 import dragon.nlp.tool.lemmatiser.EngLemmatiser;
 import org.apache.commons.configuration.ConfigurationException;
@@ -34,6 +36,7 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import java.util.stream.Collectors;
@@ -145,6 +148,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
         String docId = "<unknown>";
         try {
             docId = JCoReTools.getDocId(jcas);
+            JCoReOverlapAnnotationIndex<InternalReference> intRefIndex = new JCoReOverlapAnnotationIndex<>(jcas, InternalReference.type);
             FSIterator<Annotation> sentIt = jcas.getAnnotationIndex(de.julielab.jcore.types.Sentence.type).iterator();
             int geneCount = 0;
             int sentCount = 0;
@@ -164,8 +168,15 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
                     String typeName = typeMap.getOrDefault(entityType.getText(),
                             EntityMention.class.getCanonicalName());
                     Annotation a = JCoReAnnotationTools.getAnnotationByClassName(jcas, typeName);
-                    a.setBegin(sentenceBegin + mention.getStartChar());
-                    a.setEnd(sentenceBegin + mention.getEndChar());
+                    int originalBegin = sentenceBegin + mention.getStartChar();
+                    int originalEnd = sentenceBegin + mention.getEndChar();
+                    a.setBegin(originalBegin);
+                    a.setEnd(originalEnd);
+                    excludeReferenceAnnotationSpans(a, intRefIndex);
+                    if (a.getEnd() <= a.getBegin()) {
+                        log.error("After removing internal reference spans from the gene, it has no positive span any more. The original text was {} with offsets {}-{}. The new offsets are {}-{}.", jcas.getDocumentText().substring(originalBegin, originalEnd), originalBegin, originalEnd, a.getBegin(), a.getEnd());
+                        continue;
+                    }
                     if (a instanceof de.julielab.jcore.types.Annotation) {
                         de.julielab.jcore.types.Annotation jcoreA = (de.julielab.jcore.types.Annotation) a;
                         jcoreA.setId("BANNER, " + docId + ": " + geneCount++);
@@ -184,4 +195,22 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
             throw new AnalysisEngineProcessException(e);
         }
     }
+
+    /**
+     * Internal references can actually look like a part of a gene, e.g. "filament19" where "19" is a reference.
+     * Exclude those spans from the gene mentions.
+     * @param a The gene annotation.
+     * @param intRefIndex The reference index.
+     */
+    private void excludeReferenceAnnotationSpans(Annotation a, JCoReOverlapAnnotationIndex<? extends Annotation> intRefIndex) {
+        List<? extends Annotation> annotationsInGene = intRefIndex.search(a);
+        for (Annotation overlappingAnnotation : annotationsInGene) {
+            if (overlappingAnnotation.getBegin() == a.getBegin()) {
+                a.setBegin(overlappingAnnotation.getEnd());
+            }
+            if (overlappingAnnotation.getEnd() == a.getEnd()) {
+                a.setEnd(overlappingAnnotation.getBegin());
+            }
+        }
+    }
 }
diff --git a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
index b98b5f42f..6eddce439 100644
--- a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
+++ b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
@@ -29,6 +29,7 @@
         <import name="de.julielab.jcore.types.jcore-document-structure-types" />
         <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
         <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
       </imports>
     </typeSystemDescription>
     <typePriorities />
diff --git a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
index 489ecd37d..61f748892 100644
--- a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
+++ b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
@@ -12,6 +12,7 @@
 
 import de.julielab.jcore.types.Gene;
 import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.pubmed.InternalReference;
 import org.apache.uima.UIMAException;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -34,7 +35,8 @@ public void testProcess() throws Exception {
 		// just tag a single sentence with a test model that actually used that sentence as training data.
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
 				"de.julielab.jcore.types.jcore-document-meta-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types");
+				"de.julielab.jcore.types.jcore-semantics-biology-types",
+				"de.julielab.jcore.types.jcore-document-structure-pubmed-types");
 		// this is sentence P00055040A0000 from the test BC2GM train data
 		jcas.setDocumentText(
 				"Ten out-patients with pustulosis palmaris et plantaris were examined with direct immunofluorescence (IF) technique for deposition of fibrinogen, fibrin or its degradation products (FR-antigen) in affected and unaffected skin, together with heparin-precipitable fraction (HPF), cryoglobulin and total plasma fibrinogen in the blood.");
@@ -59,6 +61,40 @@ public void testProcess() throws Exception {
 		assertEquals("fibrinogen", geneList.get(4).getCoveredText());
 	}
 
+	@Test
+	public void testInternalReferenceExclusion() throws Exception {
+		// Internal references in papers, e.g. for bibliography, often appear as numbers. If such a number is
+		// directly appended to a gene name, it is mostly included into the gene name by BANNER.
+		// Thus, such reference spans are removed afterwards in the annotator and this test is checking that it works.
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
+				"de.julielab.jcore.types.jcore-document-meta-types",
+				"de.julielab.jcore.types.jcore-semantics-biology-types",
+				"de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+		// this is sentence P00055040A0000 from the test BC2GM train data EXCEPT the '19' following 'fibrinogen' which
+		// is our internal reference for this test.
+		jcas.setDocumentText(
+				"Ten out-patients with pustulosis palmaris et plantaris were examined with direct immunofluorescence (IF) technique for deposition of fibrinogen19, fibrin or its degradation products (FR-antigen) in affected and unaffected skin, together with heparin-precipitable fraction (HPF), cryoglobulin and total plasma fibrinogen in the blood.");
+		new Sentence(jcas, 0, jcas.getDocumentText().length()).addToIndexes();
+		new InternalReference(jcas, 143, 145).addToIndexes();
+		AnalysisEngine bannerAe = AnalysisEngineFactory.createEngine(BANNERAnnotator.class,
+				BANNERAnnotator.PARAM_CONFIG_FILE, "src/test/resources/banner_ae_test.xml", BANNERAnnotator.PARAM_TYPE_MAPPING, new String[] {"GENE=de.julielab.jcore.types.Gene"});
+		bannerAe.process(jcas);
+
+		// expected result from the GENE.eval.small file:
+		// P00055040A0000|116 125|fibrinogen
+		// P00055040A0000|127 132|fibrin
+		// P00055040A0000|158 167|FR-antigen
+		// P00055040A0000|243 254|cryoglobulin
+		// P00055040A0000|269 278|fibrinogen
+		// However, we ignore the offsets because the eval offsets ignore white spaces
+		List<Gene> geneList = new ArrayList<Gene>(JCasUtil.select(jcas, Gene.class));
+		assertEquals("fibrinogen", geneList.get(0).getCoveredText());
+		assertEquals("fibrin", geneList.get(1).getCoveredText());
+		assertEquals("FR-antigen", geneList.get(2).getCoveredText());
+		assertEquals("cryoglobulin", geneList.get(3).getCoveredText());
+		assertEquals("fibrinogen", geneList.get(4).getCoveredText());
+	}
+
 	@Test
 	public void testMultithreading() throws Exception {
 		List<Thread> ts = new ArrayList<>();

From 94f8e7d7a535df08d1c6eaf5509d0eac23c698c3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 20 Jan 2022 10:31:10 +0100
Subject: [PATCH 112/269] Remove internal reference spans included in flair
 gene mentions.

Flair exhibits the same behaviour as BANNER, to no surprise. Numbers appended to gene names are just included into the recognized gene name. Thus, explicit exclusion of references is here also necessary.
---
 .../jcore/ae/banner/BANNERAnnotatorTest.java  |  3 +-
 .../jcore/ae/flairner/FlairNerAnnotator.java  | 23 ++++++++++
 .../ae/flairner/FlairNerAnnotatorTest.java    | 44 ++++++++++++++++---
 3 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
index 61f748892..ed1ce4cee 100644
--- a/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
+++ b/jcore-banner-ae/src/test/java/de/julielab/jcore/ae/banner/BANNERAnnotatorTest.java
@@ -113,7 +113,8 @@ private void tagalot() throws UIMAException {
         // just tag a single sentence with a test model that actually used that sentence as training data.
         JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
                 "de.julielab.jcore.types.jcore-document-meta-types",
-                "de.julielab.jcore.types.jcore-semantics-biology-types");
+                "de.julielab.jcore.types.jcore-semantics-biology-types",
+				"de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         // this is sentence P00055040A0000 from the test BC2GM train data
         jcas.setDocumentText(
                 "Maintenance of skeletal muscle mass is regulated by the balance between anabolic and catabolic processes. Mammalian target of rapamycin (mTOR) is an evolutionarily conserved serine/threonine kinase, and is known to play vital roles in protein synthesis. Recent findings have continued to refine our understanding of the function of mTOR in maintaining skeletal muscle mass. mTOR controls the anabolic and catabolic signaling of skeletal muscle mass, resulting in the modulation of muscle hypertrophy and muscle wastage. This review will highlight the fundamental role of mTOR in skeletal muscle growth by summarizing the phenotype of skeletal-specific mTOR deficiency. In addition, the evidence that mTOR is a dual regulator of anabolism and catabolism in skeletal muscle mass will be discussed. A full understanding of mTOR signaling in the maintenance of skeletal muscle mass could help to develop mTOR-targeted therapeutics to prevent muscle wasting.");
diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 215b07718..8154b0f5c 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -9,9 +9,11 @@
 import de.julielab.jcore.types.EntityMention;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
+import de.julielab.jcore.types.pubmed.InternalReference;
 import de.julielab.jcore.utility.JCoReAnnotationTools;
 import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.jcore.utility.index.Comparators;
+import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
 import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
 import de.julielab.jcore.utility.index.TermGenerators;
 import org.apache.uima.UimaContext;
@@ -24,6 +26,7 @@
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.DoubleArray;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -169,6 +172,7 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
                 log.debug("Document {} does not have any tokens", JCoReTools.getDocId(aJCas));
         }
         try {
+            JCoReOverlapAnnotationIndex<InternalReference> intRefIndex = new JCoReOverlapAnnotationIndex<>(aJCas, InternalReference.type);
             final AnnotationAdderHelper helper = new AnnotationAdderHelper();
             log.trace("Sending document sentences to flair for entity tagging.");
             final NerTaggingResponse taggingResponse = connector.tagSentences(StreamSupport.stream(sentIndex.spliterator(), false));
@@ -178,6 +182,7 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
                 final Sentence sentence = sentenceMap.get(entity.getDocumentId());
                 EntityMention em = (EntityMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityClass);
                 helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
+                excludeReferenceAnnotationSpans(em, intRefIndex);
                 em.setSpecificType(entity.getTag());
                 em.setConfidence(String.valueOf(entity.getLabelConfidence()));
                 em.setComponentId(componentId);
@@ -250,6 +255,24 @@ private void addTokenEmbeddings(JCas aJCas, Map<String, Sentence> sentenceMap, A
         }
     }
 
+    /**
+     * Internal references can actually look like a part of a gene, e.g. "filament19" where "19" is a reference.
+     * Exclude those spans from the gene mentions.
+     * @param a The gene annotation.
+     * @param intRefIndex The reference index.
+     */
+    private void excludeReferenceAnnotationSpans(Annotation a, JCoReOverlapAnnotationIndex<? extends Annotation> intRefIndex) {
+        List<? extends Annotation> annotationsInGene = intRefIndex.search(a);
+        for (Annotation overlappingAnnotation : annotationsInGene) {
+            if (overlappingAnnotation.getBegin() == a.getBegin()) {
+                a.setBegin(overlappingAnnotation.getEnd());
+            }
+            if (overlappingAnnotation.getEnd() == a.getEnd()) {
+                a.setEnd(overlappingAnnotation.getBegin());
+            }
+        }
+    }
+
     @Override
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         try {
diff --git a/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java b/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java
index 9c5171fd6..56fc4d046 100644
--- a/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java
+++ b/jcore-flair-ner-ae/src/test/java/de/julielab/jcore/ae/flairner/FlairNerAnnotatorTest.java
@@ -4,6 +4,7 @@
 import de.julielab.jcore.types.Gene;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
+import de.julielab.jcore.types.pubmed.InternalReference;
 import de.julielab.jcore.utility.index.Comparators;
 import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
 import de.julielab.jcore.utility.index.TermGenerators;
@@ -43,7 +44,7 @@ public class FlairNerAnnotatorTest {
 
     @Test
     public void testAnnotatorWithoutWordEmbeddings() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
         String text = "Knockdown of SUB1 homolog by siRNA inhibits the early stages of HIV-1 replication in 293T cells infected with VSV-G pseudotyped HIV-1 .";
         jCas.setDocumentText(text);
@@ -69,10 +70,39 @@ public void testAnnotatorWithoutWordEmbeddings() throws Exception {
         engine.collectionProcessComplete();
     }
 
+    @Test
+    public void testAnnotatorWithoutWordEmbeddings2() throws Exception {
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
+        String text = "Knockdown of SUB1 homolog2 by siRNA inhibits the early stages of HIV-1 replication in 293T cells infected with VSV-G pseudotyped HIV-1 .";
+        jCas.setDocumentText(text);
+        Sentence s = new Sentence(jCas, 0, text.length());
+        addTokens(jCas);
+        s.addToIndexes();
+        new InternalReference(jCas, 25, 26).addToIndexes();
+        engine.process(jCas);
+        List<String> foundGenes = new ArrayList<>();
+        JCoReTreeMapAnnotationIndex<Long, Token> tokenIndex = new JCoReTreeMapAnnotationIndex<>(TermGenerators.longOffsetTermGenerator(), TermGenerators.longOffsetTermGenerator(), jCas, Token.type);
+        for (Annotation a : jCas.getAnnotationIndex(Gene.type)) {
+            Gene g = (Gene) a;
+            foundGenes.add(g.getCoveredText());
+            assertThat(g.getSpecificType().equals("Gene"));
+            final Iterator<Token> tokenIt = tokenIndex.searchFuzzy(g).iterator();
+            while (tokenIt.hasNext()) {
+                Token token = tokenIt.next();
+                assertThat(token.getEmbeddingVectors()).isNull();
+            }
+            assertThat(Double.parseDouble(g.getConfidence())).isGreaterThan(0.64);
+            assertThat(g.getComponentId().equals(FlairNerAnnotator.class.getSimpleName()));
+        }
+        assertThat(foundGenes).containsExactly("SUB1 homolog", "HIV-1", "VSV-G", "HIV-1");
+        engine.collectionProcessComplete();
+    }
+
     @Test
     public void testAnnotatorWithEntityWordEmbeddings() throws Exception {
         embeddingsCache.clear();
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_STORE_EMBEDDINGS, ENTITIES, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt", FlairNerAnnotator.PARAM_COMPONENT_ID, "ATotallyDifferentComponentId");
         String text = "Knockdown of SUB1 homolog by siRNA inhibits the early stages of HIV-1 replication in 293T cells infected with VSV-G pseudotyped HIV-1 .";
         jCas.setDocumentText(text);
@@ -111,7 +141,7 @@ public void testAnnotatorWithEntityWordEmbeddings() throws Exception {
 
     @Test(dependsOnMethods = "testAnnotatorWithEntityWordEmbeddings")
     public void testAnnotatorWithEntitySubWordEmbeddings() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_STORE_EMBEDDINGS, ENTITIES, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
         String text = "Knockdown of SUB1 homolog by siRNA inhibits the early stages of HIV-1 replication in 293T cells infected with VSV-G pseudotyped HIV-1 .";
         jCas.setDocumentText(text);
@@ -179,7 +209,7 @@ private double l2Norm(double[] vector) {
 
     @Test
     public void testAnnotatorWithAllEmbeddings() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_STORE_EMBEDDINGS, FlairNerAnnotator.StoreEmbeddings.ALL, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
         String text = "Knockdown of SUB1 homolog by siRNA inhibits the early stages of HIV-1 replication in 293T cells infected with VSV-G pseudotyped HIV-1 .";
         jCas.setDocumentText(text);
@@ -214,7 +244,7 @@ private void addSentences(JCas jCas) {
 
     @Test
     public void testAnnotator2() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
         // The sentence detection and tokenization was done by the jcore-j[st]bd-biomedical-english JCoRe project components, using the executable (java -jar) command line artifact created when building the components.
         String text = "Synergistic lethal effect between hydrogen peroxide and neocuproine ( 2,9-dimethyl 1,10-phenanthroline ) in Escherichia coli .\n" +
@@ -241,7 +271,7 @@ public void testAnnotator2() throws Exception {
 
     @Test
     public void testAnnotatorOnOffsetIssueDocument() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt");
 
         XmiCasDeserializer.deserialize(new FileInputStream(Path.of("src", "test", "resources", "1681975.xmi").toString()), jCas.getCas());
@@ -259,7 +289,7 @@ public void testAnnotatorOnOffsetIssueDocument() throws Exception {
 
     @Test
     public void testEmbeddings2() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(FlairNerAnnotator.class, FlairNerAnnotator.PARAM_ANNOTATION_TYPE, Gene.class.getCanonicalName(), FlairNerAnnotator.PARAM_FLAIR_MODEL, "src/test/resources/genes-small-model.pt", FlairNerAnnotator.PARAM_STORE_EMBEDDINGS, ENTITIES);
         // The sentence detection and tokenization was done by the jcore-j[st]bd-biomedical-english JCoRe project components, using the executable (java -jar) command line artifact created when building the components.
         String text = "We show that tal controls gene expression and tissue folding in Drosophila , thus acting as a link between patterning and morphogenesis .\n tal function is mediated by several 33-nucleotide-long open reading frames ( ORFs )";

From 7a2e154fa127e4eef2c2ce0384cc046126be9b83 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 24 Jan 2022 13:38:40 +0100
Subject: [PATCH 113/269] Fixes #127.

---
 jcore-pmc-reader/README.md                    |  4 +-
 .../jcore/multiplier/pmc/PMCMultiplier.java   | 31 ++++++++-
 .../jcore/reader/pmc/CasPopulator.java        |  5 +-
 .../jcore/reader/pmc/PMCMultiplierReader.java | 12 +++-
 .../julielab/jcore/reader/pmc/PMCReader.java  |  3 +-
 .../jcore/reader/pmc/PMCReaderBase.java       |  5 ++
 .../pmc/parser/DefaultElementParser.java      | 10 +--
 .../reader/pmc/parser/NxmlDocumentParser.java |  2 +-
 .../pmc/desc/jcore-pmc-multiplier-reader.xml  |  7 +++
 .../reader/pmc/desc/jcore-pmc-reader.xml      |  7 +++
 .../elementproperties-no-bib-refs.yml         | 63 +++++++++++++++++++
 .../pmc/resources/elementproperties.yml       |  1 -
 .../jcore/reader/pmc/PMCReaderTest.java       | 49 +++++++++++++++
 .../jcore-uri-multiplier-types.xml            | 13 ++++
 14 files changed, 193 insertions(+), 19 deletions(-)
 create mode 100644 jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml

diff --git a/jcore-pmc-reader/README.md b/jcore-pmc-reader/README.md
index 4fb82a46a..f42e43e76 100644
--- a/jcore-pmc-reader/README.md
+++ b/jcore-pmc-reader/README.md
@@ -102,7 +102,9 @@ The following properties are currently supported:
 | paths                  | list of objects | Allows to specify a relative or absolute XPath like sequence of element names in the form `abstract/sec/title` and properties that should be applied to elements matching this path. |
 | type                   | string          | The UIMA type that should be used to annotate the text contents of the element |
 
-The `attribute` and `path` properties define criteria where the base properties are overwritten by the properties specified in association with the given attribute-value combination or path. For example, it is possible to include a certain element for document text but omit it if has a specific element as parent or some attribute value.
+The `attribute` and `path` properties define criteria where the base properties are overwritten by the properties specified in association with the given attribute-value combination or path. Attributes are addressed by specifying `name` and `value` keys. The `name` is the name of the attribute to test and `value` is the value the attribute must have for the property override to take effect. Paths require the `path` key followed by a slash-separated sequence of element names that ends with the name of the XML element for which the rule should hold. The path does not need to start from the root, it should just be long enough to identify the element distinctively.
+
+For example, it is possible to include a certain element for document text but omit it if it has a specific element as parent or some attribute value.
 
 Here is an example taken directly from the `elementproperties.yml` file:
 ```yml
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
index 38d52f4b8..f15b5d983 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
@@ -2,8 +2,10 @@
 
 import de.julielab.jcore.reader.pmc.CasPopulator;
 import de.julielab.jcore.reader.pmc.NoDataAvailableException;
+import de.julielab.jcore.reader.pmc.PMCMultiplierReader;
 import de.julielab.jcore.reader.pmc.parser.ElementParsingException;
 import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import de.julielab.jcore.types.casmultiplier.MultiplierConfigParameters;
 import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.AbstractCas;
@@ -12,6 +14,7 @@
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -21,13 +24,13 @@
 import java.util.Iterator;
 
 @ResourceMetaData(name = "JCoRe Pubmed Central NXML Multiplier", description = "This multiplier expect to receive URIs to NXML documents in the form of JCoReURI feature structures. All JCoReURI FS in the annotation indexes are read and output as new CASes.")
-@OperationalProperties(outputsNewCases = true, multipleDeploymentAllowed = true, modifiesCas = false)
+@OperationalProperties(outputsNewCases = true, modifiesCas = false)
 @TypeCapability(outputs = {"de.julielab.jcore.types.TitleType", "de.julielab.jcore.types.Title", "de.julielab.jcore.types.TextObject", "de.julielab.jcore.types.Table", "de.julielab.jcore.types.SectionTitle", "de.julielab.jcore.types.Section", "de.julielab.jcore.types.PubType", "de.julielab.jcore.types.Paragraph", "de.julielab.jcore.types.OtherPub", "de.julielab.jcore.types.pubmed.OtherID", "de.julielab.jcore.types.pubmed.ManualDescriptor", "de.julielab.jcore.types.Keyword", "de.julielab.jcore.types.Journal", "de.julielab.jcore.types.pubmed.Header", "de.julielab.jcore.types.Footnote", "de.julielab.jcore.types.Figure", "uima.tcas.DocumentAnnotation", "de.julielab.jcore.types.Date", "de.julielab.jcore.types.CaptionType", "de.julielab.jcore.types.Caption", "de.julielab.jcore.types.AutoDescriptor", "de.julielab.jcore.types.AuthorInfo", "de.julielab.jcore.types.AbstractText", "de.julielab.jcore.types.AbstractSectionHeading", "de.julielab.jcore.types.AbstractSection"})
 public class PMCMultiplier extends JCasMultiplier_ImplBase {
     private final static Logger log = LoggerFactory.getLogger(PMCMultiplier.class);
     private Iterator<URI> currentUriBatch;
     private CasPopulator casPopulator;
-
+    private Boolean omitBibReferences = null;
 
     @Override
     public void process(JCas aJCas) throws AnalysisEngineProcessException {
@@ -35,14 +38,36 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
         if (log.isDebugEnabled())
             log.debug("Received batch of {} NXML URIs", jcoreUris.size());
         currentUriBatch = jcoreUris.stream().map(JCoReURI::getUri).map(URI::create).iterator();
+        determineOmitBibReferences(aJCas);
         try {
-            casPopulator = new CasPopulator(currentUriBatch);
+            casPopulator = new CasPopulator(currentUriBatch, omitBibReferences);
         } catch (IOException e) {
             log.error("Exception occurred when trying to initialize the NXML parser", e);
             throw new AnalysisEngineProcessException(e);
         }
     }
 
+    private void determineOmitBibReferences(JCas aJCas) throws AnalysisEngineProcessException {
+        try {
+            MultiplierConfigParameters multiplierConfigParameters = JCasUtil.selectSingle(aJCas, MultiplierConfigParameters.class);
+            StringArray parameters = multiplierConfigParameters.getParameters();
+            for (int i = 0; i < parameters.size(); ++i) {
+                String[] paramPair = parameters.get(i).split("\\s+=\\s+");
+                if (paramPair.length != 2) {
+                    String msg = "Error while parsing multiplier configuration parameters passed from the multiplier reader. The parameter array contains the entry \"" + parameters.get(i) + "\". The expected format is <key>=<value>.";
+                    log.error(msg);
+                    throw new AnalysisEngineProcessException(new IllegalArgumentException(msg));
+                }
+                if (paramPair[0].equals(PMCMultiplierReader.PARAM_OMIT_BIB_REFERENCES)) {
+                    omitBibReferences = Boolean.parseBoolean(paramPair[1]);
+                }
+            }
+        } catch (IllegalArgumentException e) {
+            omitBibReferences = false;
+            // nothing further; there were no parameters given
+        }
+    }
+
 
     @Override
     public boolean hasNext() {
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 61e2851a5..d3b402b36 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -15,10 +15,11 @@ public class CasPopulator {
     private NxmlDocumentParser nxmlDocumentParser;
     private Iterator<URI> nxmlIterator;
 
-    public CasPopulator(Iterator<URI> nxmlIterator) throws IOException {
+    public CasPopulator(Iterator<URI> nxmlIterator, Boolean omitBibReferences) throws IOException {
         this.nxmlIterator = nxmlIterator;
         nxmlDocumentParser = new NxmlDocumentParser();
-        nxmlDocumentParser.loadElementPropertyFile("/de/julielab/jcore/reader/pmc/resources/elementproperties.yml");
+        String settings = omitBibReferences ? "/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml" : "/de/julielab/jcore/reader/pmc/resources/elementproperties.yml";
+        nxmlDocumentParser.loadElementPropertyFile(settings);
     }
 
     public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, NoDataAvailableException {
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCMultiplierReader.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCMultiplierReader.java
index 5527a249c..4c349098c 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCMultiplierReader.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCMultiplierReader.java
@@ -1,6 +1,7 @@
 package de.julielab.jcore.reader.pmc;
 
 import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import de.julielab.jcore.types.casmultiplier.MultiplierConfigParameters;
 import org.apache.uima.UimaContext;
 import org.apache.uima.collection.CollectionException;
 import org.apache.uima.ducc.Workitem;
@@ -8,6 +9,7 @@
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -24,6 +26,7 @@ public class PMCMultiplierReader extends PMCReaderBase {
     public static final String PARAM_WHITELIST = PMCReaderBase.PARAM_WHITELIST;
     public static final String PARAM_SEND_CAS_TO_LAST = "SendCasToLast";
     public static final String PARAM_BATCH_SIZE = "BatchSize";
+    public static final String PARAM_OMIT_BIB_REFERENCES = PMCReaderBase.PARAM_OMIT_BIB_REFERENCES;
     private final static Logger log = LoggerFactory.getLogger(PMCMultiplierReader.class);
     @ConfigurationParameter(name = PARAM_SEND_CAS_TO_LAST, mandatory = false, defaultValue = "false", description = "UIMA DUCC relevant parameter when using a CAS multiplier. When set to true, the worker CAS from the collection reader is forwarded to the last component in the pipeline. This can be used to send information about the progress to the CAS consumer in order to have it perform batch operations. For this purpose, a feature structure of type WorkItem from the DUCC library is added to the worker CAS. This feature structure has information about the current progress.")
     private boolean sendCasToLast;
@@ -51,9 +54,16 @@ public void getNext(JCas jCas) throws CollectionException {
                 log.error("Exception with URI: " + uri.toString(), e);
                 throw new CollectionException(e);
             }
-
             completed++;
         }
+        // Send configuration parameters to the multiplier if necessary
+        if (omitBibReferences) {
+            MultiplierConfigParameters parameters = new MultiplierConfigParameters(jCas);
+            StringArray paramArray = new StringArray(jCas, 1);
+            paramArray.set(0, PMCReaderBase.PARAM_OMIT_BIB_REFERENCES+"="+omitBibReferences);
+            parameters.setParameters(paramArray);
+            parameters.addToIndexes();
+        }
         if (sendCasToLast) {
             Workitem workitem = new Workitem(jCas);
             // Send the work item CAS also to the consumer. Normally, only the CASes emitted by the CAS multiplier
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java
index 921fc10b5..86a5fac26 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReader.java
@@ -33,6 +33,7 @@ public class PMCReader extends PMCReaderBase {
     public static final String PARAM_SEARCH_ZIP = PMCReaderBase.PARAM_SEARCH_ZIP;
     public static final String PARAM_WHITELIST = PMCReaderBase.PARAM_WHITELIST;
     public static final String PARAM_EXTRACT_ID_FROM_FILENAME = PMCReaderBase.PARAM_EXTRACT_ID_FROM_FILENAME;
+    public static final String PARAM_OMIT_BIB_REFERENCES = PMCReaderBase.PARAM_OMIT_BIB_REFERENCES;
     private static final Logger log = LoggerFactory.getLogger(PMCReader.class);
     private CasPopulator casPopulator;
 
@@ -40,7 +41,7 @@ public class PMCReader extends PMCReaderBase {
     public void initialize(UimaContext context) throws ResourceInitializationException {
         super.initialize(context);
         try {
-            casPopulator = new CasPopulator(pmcFiles);
+            casPopulator = new CasPopulator(pmcFiles, omitBibReferences);
         } catch (IOException e) {
             log.error("Exception occurred when trying to initialize NXML parser", e);
             throw new ResourceInitializationException(e);
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReaderBase.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReaderBase.java
index a9fdd3890..73e16a0a0 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReaderBase.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/PMCReaderBase.java
@@ -28,6 +28,7 @@ public abstract class PMCReaderBase extends JCasCollectionReader_ImplBase {
     public static final String PARAM_SEARCH_ZIP = "SearchInZipFiles";
     public static final String PARAM_WHITELIST = "WhitelistFile";
     public static final String PARAM_EXTRACT_ID_FROM_FILENAME = "ExtractIdFromFilename";
+    public static final String PARAM_OMIT_BIB_REFERENCES = "OmitBibliographyReferences";
     private final static Logger log = LoggerFactory.getLogger(PMCReaderBase.class);
     @ConfigurationParameter(name = PARAM_INPUT, description = "The path to an NXML file or a directory with NXML files and possibly subdirectories holding more NXML files.")
     protected File input;
@@ -44,6 +45,9 @@ public abstract class PMCReaderBase extends JCasCollectionReader_ImplBase {
     @ConfigurationParameter(name = PARAM_EXTRACT_ID_FROM_FILENAME, mandatory = false, description = "Used for NXML documents that carry their ID in the file name but not in the document itself. Extracts the string after the last path separator and the first dot after the separator and sets it to the docId feature of the Header annotation.")
     protected boolean extractIdFromFilename;
 
+    @ConfigurationParameter(name = PARAM_OMIT_BIB_REFERENCES, mandatory = false, defaultValue = "false", description = "If set to true, references to the bibliography are omitted from the CAS text.")
+    protected boolean omitBibReferences;
+
     protected Iterator<URI> pmcFiles;
 
     protected int completed;
@@ -60,6 +64,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
         searchRecursively = Optional.ofNullable((Boolean) getConfigParameterValue(PARAM_RECURSIVELY)).orElse(false);
         searchZip = Optional.ofNullable((Boolean) getConfigParameterValue(PARAM_SEARCH_ZIP)).orElse(false);
         whitelistFile = Optional.ofNullable((String) getConfigParameterValue(PARAM_WHITELIST)).map(File::new).orElse(null);
+        omitBibReferences = Optional.ofNullable((Boolean) getConfigParameterValue(PARAM_OMIT_BIB_REFERENCES)).orElse(false);
         log.info("Reading PubmedCentral NXML file(s) from {}", input);
         try {
             Set<String> whitelist = readWhitelist(whitelistFile);
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/DefaultElementParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/DefaultElementParser.java
index ac2f3cd23..42e1dc5a6 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/DefaultElementParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/DefaultElementParser.java
@@ -54,7 +54,7 @@ public DefaultElementParser(NxmlDocumentParser nxmlDocumentParser) {
 
 	@Override
 	protected void beforeParseElement() throws ElementParsingException {
-		// since this parser does not know the element is is used upon, set
+		// since this parser does not know the element it is used upon, set
 		// it first for the parsing result creation
 		try {
 			elementName = vn.toString(vn.getCurrentIndex());
@@ -138,12 +138,6 @@ protected void editResult(ElementParsingResult result) throws NavException {
 		if (typeName.equals(ElementProperties.TYPE_NONE))
 			return;
 
-		// @SuppressWarnings("unchecked")
-		// Map<String, Object> defaultFeatureValues = (Map<String, Object>)
-		// nxmlDocumentParser
-		// .getTagProperties(elementName)
-		// .getOrDefault(ElementProperties.DEFAULT_FEATURE_VALUES,
-		// Collections.emptyMap());
 		@SuppressWarnings("unchecked")
 		Map<String, Object> defaultFeatureValues = (Map<String, Object>) getApplicableProperties()
 				.orElse(Collections.emptyMap())
@@ -276,8 +270,6 @@ private Optional<Map<String, Object>> getApplicableProperties() throws NavExcept
 				String attributeValue = attributesOfElement.get(attribute.get(ElementProperties.NAME));
 				if (attributeValue != null && attributeValue.equals(attribute.get(ElementProperties.VALUE))
 						&& attribute.containsKey(ElementProperties.OMIT_ELEMENT)) {
-					// omitElement = (boolean)
-					// attribute.get(ElementProperties.OMIT_ELEMENT);
 					applicableProperties = Optional.of(attribute);
 				}
 			}
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
index 2042b258c..9f75ba8db 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
@@ -86,7 +86,7 @@ public void reset(InputStream is, JCas cas) throws DocumentParsingException {
      * @throws NavException
      * @throws DocTypeNotFoundException
      */
-    private void setTagset() throws NavException, DocTypeNotFoundException, DocTypeNotSupportedException {
+    private void setTagset() throws NavException, DocTypeNotFoundException {
         for (int i = 0; i < vn.getTokenCount(); i++) {
             if (vn.getTokenType(i) == VTDNav.TOKEN_DTD_VAL) {
                 String docType = StringUtils.normalizeSpace(vn.toString(i)).replaceAll("'", "\"");
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
index 224b668eb..5f1655fc7 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
@@ -49,6 +49,13 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>OmitBibliographyReferences</name>
+                <description>If set to true, references to the bibliography are omitted from the CAS text.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings>
             <nameValuePair>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
index 284f41cdd..478806bfb 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
@@ -42,6 +42,13 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>OmitBibliographyReferences</name>
+                <description>If set to true, references to the bibliography are omitted from the CAS text.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings>
             <nameValuePair>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
new file mode 100644
index 000000000..16d5355bb
--- /dev/null
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
@@ -0,0 +1,63 @@
+article-title:
+    block-element: true
+    type: de.julielab.jcore.types.Title
+title:
+    block-element: true
+    type: de.julielab.jcore.types.Title
+    default-feature-values:
+        titleType: other
+    paths:
+        - path: sec/title
+          type: de.julielab.jcore.types.SectionTitle
+          default-feature-values:
+            titleType: section
+        - path: abstract/sec/title
+          type: de.julielab.jcore.types.AbstractSectionHeading
+          default-feature-values:
+            titleType: abstractSection
+abstract:
+    block-element: true
+    type: de.julielab.jcore.types.AbstractText
+label:
+    block-element: true
+    type: de.julielab.jcore.types.Title
+    paths:
+        - path: list-item/label
+          omit-element: true
+sec:
+    block-element: true
+    type: de.julielab.jcore.types.Section
+    paths:
+        - path: abstract/sec
+          type: de.julielab.jcore.types.AbstractSection
+p:
+    block-element: true
+    type: de.julielab.jcore.types.Paragraph
+list:
+    block-element: true
+    type: de.julielab.jcore.types.List
+list-item:
+    block-element: true
+    type: de.julielab.jcore.types.ListItem
+caption:
+    block-element: true
+    type: de.julielab.jcore.types.Caption
+    default-feature-values:
+        captionType: other
+fn-group:
+    block-element: true
+front:
+    omit-element: true
+back:
+    omit-element: true
+fig-group:
+    omit-element: true
+floats-group:
+    omit-element: true
+array:
+    omit-element: true
+xref:
+    attributes:
+        - name: ref-type
+          value: bibr
+          omit-element: true
\ No newline at end of file
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
index f8b5d3429..230bbf929 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
@@ -56,4 +56,3 @@ floats-group:
     omit-element: true
 array:
     omit-element: true
-
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
index 7d5547754..3a79a51e8 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
@@ -23,6 +23,8 @@
 
 import java.util.List;
 import java.util.*;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
@@ -359,6 +361,53 @@ public void testFigureReferencesAnnotated() throws Exception {
         assertThat(figRefs).extracting("refid").containsExactly("Fig1", "Fig2");
     }
 
+    @Test
+    public void testBibliographyReferencesAnnotated() throws Exception {
+        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+        CollectionReader reader = CollectionReaderFactory.createReader(PMCReader.class, PMCReader.PARAM_INPUT,
+                "src/test/resources/documents-recursive/PMC2847692.nxml.gz");
+        reader.getNext(cas.getCas());
+        Collection<InternalReference> refs = JCasUtil.select(cas, InternalReference.class);
+        // Without a filter on bibliographic references, there should 76 references to bibliography
+        List<InternalReference> bibliography = refs.stream().filter(r -> r.getReftype().equalsIgnoreCase("bibliography")).collect(Collectors.toList());
+        assertThat(bibliography).hasSize(76);
+
+        // RegEx for something like "2004a"
+        Matcher yearReferenceMatcher = Pattern.compile("[0-9]{4}[ab]?").matcher(cas.getDocumentText());
+        int numReferencePatternsInText = 0;
+        while (yearReferenceMatcher.find()) {
+            ++numReferencePatternsInText;
+        }
+        // Some found patterns are no references, thus the number is higher than that of the references.
+        assertThat(numReferencePatternsInText).isEqualTo(84);
+    }
+
+    @Test
+    public void testBibliographyReferencesOmitted() throws Exception {
+        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+        CollectionReader reader = CollectionReaderFactory.createReader(PMCReader.class, PMCReader.PARAM_INPUT,
+                "src/test/resources/documents-recursive/PMC2847692.nxml.gz",
+                PMCMultiplierReader.PARAM_OMIT_BIB_REFERENCES, true);
+        reader.getNext(cas.getCas());
+        Collection<InternalReference> refs = JCasUtil.select(cas, InternalReference.class);
+        // Since we set the omission parameter to true, there should be no bibliographic references
+        List<InternalReference> bibliography = refs.stream().filter(r -> r.getReftype().equalsIgnoreCase("bibliography")).collect(Collectors.toList());
+        assertThat(bibliography).isEmpty();
+
+        // RegEx for something like "2004a"
+        Matcher yearReferenceMatcher = Pattern.compile("[0-9]{4}[ab]?").matcher(cas.getDocumentText());
+        int numReferencePatternsInText = 0;
+        while (yearReferenceMatcher.find()) {
+            ++numReferencePatternsInText;
+        }
+        // In the test above, where we have the same document but with bib. references, there were 84 occurrences
+        // of the pattern. 76 of those were actual references. Thus, after removing the references, 8 pattern
+        // occurrences should remain.
+        assertThat(numReferencePatternsInText).isEqualTo(8);
+    }
+
     @Test
     public void testPmcReaderDescriptor() throws Exception {
         // read a whole directory with subdirectories
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
index 5f6a3459b..fe06797e8 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
@@ -19,5 +19,18 @@
         </featureDescription>
       </features>
     </typeDescription>
+    <typeDescription>
+      <name>de.julielab.jcore.types.casmultiplier.MultiplierConfigParameters</name>
+      <description>A list of strings in properties format to specify parameters and their values. The format is &lt;key&gt;=&lt;value&gt;. May be used to transfer configuration properties from the multiplier reader to the multiplier.
+      Normally in UIMA, the multiplier would just have the configuration parameters itself. However, it can be confusing that the basic reader - without a successive multiplier - has some parameters that the multiplier reader does not exhibit because they must be set on the multiplier. Using this annotation, parameter settings can be sent to the multiplier which then does not need further configuration on its own.</description>
+      <supertypeName>uima.tcas.Annotation</supertypeName>
+      <features>
+        <featureDescription>
+          <name>parameters</name>
+          <description>An array of string holding key - value pairs in the format &lt;key&gt;=&lt;value&gt;.</description>
+          <rangeTypeName>uima.cas.StringArray</rangeTypeName>
+        </featureDescription>
+      </features>
+    </typeDescription>
   </types>
 </typeSystemDescription>
\ No newline at end of file

From 7aa1b354c789597b300460474cf8ce9f2b474ef6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 24 Jan 2022 14:13:09 +0100
Subject: [PATCH 114/269] Remove gene annotations completely covered by an
 internal reference annotation span.

---
 .../src/main/java/banner/tagging/pipe/LemmaPOS.java   |  2 --
 .../de/julielab/jcore/ae/banner/BANNERAnnotator.java  |  9 ++++++++-
 .../julielab/jcore/ae/flairner/FlairNerAnnotator.java | 11 +++++++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
index 8068cfa1b..36e8a7cd5 100644
--- a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
+++ b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
@@ -43,13 +43,11 @@ public LemmaPOS(Lemmatiser lemmatiser, Tagger posTagger) {
     public void setLemmatiser(Lemmatiser lemmatiser) {
         initResourcesMap();
         getResources().lemmatiser = lemmatiser;
-        System.out.println("Setting lemmatiser to " + Thread.currentThread());
     }
 
     public void setPosTagger(Tagger posTagger) {
         initResourcesMap();
         getResources().posTagger = posTagger;
-        System.out.println("Setting PoS Tagger to " + Thread.currentThread());
     }
 
     synchronized private void initResourcesMap() {
diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
index 0d8837ff6..1b48675d2 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
@@ -174,7 +174,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
                     a.setEnd(originalEnd);
                     excludeReferenceAnnotationSpans(a, intRefIndex);
                     if (a.getEnd() <= a.getBegin()) {
-                        log.error("After removing internal reference spans from the gene, it has no positive span any more. The original text was {} with offsets {}-{}. The new offsets are {}-{}.", jcas.getDocumentText().substring(originalBegin, originalEnd), originalBegin, originalEnd, a.getBegin(), a.getEnd());
+                        // It seems there was nothing left of a gene mention outside the internal reference; skip
                         continue;
                     }
                     if (a instanceof de.julielab.jcore.types.Annotation) {
@@ -211,6 +211,13 @@ private void excludeReferenceAnnotationSpans(Annotation a, JCoReOverlapAnnotatio
             if (overlappingAnnotation.getEnd() == a.getEnd()) {
                 a.setEnd(overlappingAnnotation.getBegin());
             }
+            // Set zero-character spans on genes that are completely enclosed by a reference. Those are cases
+            // like, for instance, "Supplementary Figs. S12 and S13, Tables S2 and S3" where S12, S13 and even
+            // Tables S2 are annotated as genes.
+            if (overlappingAnnotation.getBegin() <= a.getBegin() && overlappingAnnotation.getEnd() >= a.getEnd()) {
+                a.setBegin(0);
+                a.setEnd(0);
+            }
         }
     }
 }
diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 8154b0f5c..76184a17a 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -183,6 +183,10 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
                 EntityMention em = (EntityMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityClass);
                 helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
                 excludeReferenceAnnotationSpans(em, intRefIndex);
+                if (em.getEnd() <= em.getBegin()) {
+                    // It seems there was nothing left of a gene mention outside the internal reference; skip
+                    continue;
+                }
                 em.setSpecificType(entity.getTag());
                 em.setConfidence(String.valueOf(entity.getLabelConfidence()));
                 em.setComponentId(componentId);
@@ -270,6 +274,13 @@ private void excludeReferenceAnnotationSpans(Annotation a, JCoReOverlapAnnotatio
             if (overlappingAnnotation.getEnd() == a.getEnd()) {
                 a.setEnd(overlappingAnnotation.getBegin());
             }
+            // Set zero-character spans on genes that are completely enclosed by a reference. Those are cases
+            // like, for instance, "Supplementary Figs. S12 and S13, Tables S2 and S3" where S12, S13 and even
+            // Tables S2 are annotated as genes.
+            if (overlappingAnnotation.getBegin() <= a.getBegin() && overlappingAnnotation.getEnd() >= a.getEnd()) {
+                a.setBegin(0);
+                a.setEnd(0);
+            }
         }
     }
 

From 7f0a16e7015d2a351f6f9906612cf17e894273b6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 3 Feb 2022 15:41:52 +0100
Subject: [PATCH 115/269] Remove gene annotations when the covered text is
 blank.

The previous effort to remove internal references from gene spans sometimes resulted in blank gene names.
---
 .../main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java  | 2 +-
 .../java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
index 1b48675d2..b5c7e816e 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
@@ -173,7 +173,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
                     a.setBegin(originalBegin);
                     a.setEnd(originalEnd);
                     excludeReferenceAnnotationSpans(a, intRefIndex);
-                    if (a.getEnd() <= a.getBegin()) {
+                    if (a.getEnd() <= a.getBegin() || a.getCoveredText().isBlank()) {
                         // It seems there was nothing left of a gene mention outside the internal reference; skip
                         continue;
                     }
diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 76184a17a..de2382319 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -183,7 +183,7 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
                 EntityMention em = (EntityMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, entityClass);
                 helper.setAnnotationOffsetsRelativeToSentence(sentence, em, entity, adderConfig);
                 excludeReferenceAnnotationSpans(em, intRefIndex);
-                if (em.getEnd() <= em.getBegin()) {
+                if (em.getEnd() <= em.getBegin() || em.getCoveredText().isBlank()) {
                     // It seems there was nothing left of a gene mention outside the internal reference; skip
                     continue;
                 }

From 3159c5760252e946159b0c6f3e807b17e981f05a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 3 Feb 2022 15:42:41 +0100
Subject: [PATCH 116/269] Fix a regular expression bug where the PMC multiplier
 could not receive its parameters.

---
 .../java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
index f15b5d983..e4b80fac7 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCMultiplier.java
@@ -52,7 +52,7 @@ private void determineOmitBibReferences(JCas aJCas) throws AnalysisEngineProcess
             MultiplierConfigParameters multiplierConfigParameters = JCasUtil.selectSingle(aJCas, MultiplierConfigParameters.class);
             StringArray parameters = multiplierConfigParameters.getParameters();
             for (int i = 0; i < parameters.size(); ++i) {
-                String[] paramPair = parameters.get(i).split("\\s+=\\s+");
+                String[] paramPair = parameters.get(i).split("\\s*=\\s*");
                 if (paramPair.length != 2) {
                     String msg = "Error while parsing multiplier configuration parameters passed from the multiplier reader. The parameter array contains the entry \"" + parameters.get(i) + "\". The expected format is <key>=<value>.";
                     log.error(msg);

From fc2c1f710b13fbe224e1f306acb92250ed93b720 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 3 Feb 2022 15:44:17 +0100
Subject: [PATCH 117/269] Allow relative file paths.

---
 .../PersistentIndexAddonTermsProvider.java                | 8 +++++++-
 .../sharedresources/PersistentStringIndexMapProvider.java | 8 +++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
index b98514ee3..18d45b5b0 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentIndexAddonTermsProvider.java
@@ -120,7 +120,13 @@ public void load(DataResource aData) throws ResourceInitializationException {
         File indexFile = null;
         boolean loadData = true;
         try {
-            File resourceFile = new File(uri);
+            File resourceFile;
+            try {
+                resourceFile = new File(uri);
+            } catch (IllegalArgumentException e) {
+                // to support relative file paths like file:resources/somefile.txt
+                resourceFile = new File(uri.getSchemeSpecificPart());
+            }
             String resourceFileName = FilenameUtils.getName(uri.toURL().getPath());
             indexFile = new File("es-consumer-cache", resourceFileName);
             if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
index 2551cedea..39994dc9c 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/PersistentStringIndexMapProvider.java
@@ -122,7 +122,13 @@ public void load(DataResource aData) throws ResourceInitializationException {
         File indexFile = null;
         boolean loadData = true;
         try {
-            File resourceFile = new File(uri);
+            File resourceFile;
+            try {
+                resourceFile = new File(uri);
+            } catch (IllegalArgumentException e) {
+                // to support relative file paths like file:resources/somefile.txt
+                resourceFile = new File(uri.getSchemeSpecificPart());
+            }
             String resourceFileName = FilenameUtils.getName(uri.toURL().getPath());
             indexFile = new File("es-consumer-cache", resourceFileName);
             if (resourceFile.exists() && indexFile.exists() && resourceFile.lastModified() > indexFile.lastModified()) {

From bf31e93091fc3edf9b071e6335e5ff95b289a5a1 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 3 Feb 2022 15:45:11 +0100
Subject: [PATCH 118/269] Add the actual flattened event type to the event
 flattener descriptor.

---
 .../jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml    | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
index ff351724b..7e3a1f520 100644
--- a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
+++ b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
@@ -13,6 +13,7 @@
         <typeSystemDescription>
       <imports>
         <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+          <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
       </imports>
     </typeSystemDescription>
         <fsIndexCollection />

From 37f37d4ff3a945c56cc716ac6589140bf7b6c425 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 10 Feb 2022 11:55:30 +0100
Subject: [PATCH 119/269] PMCReader: Fix a bug where figure captions were
 labeled as table captions

---
 .../java/de/julielab/jcore/reader/pmc/parser/FigParser.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FigParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FigParser.java
index 9149d8af9..428903fbb 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FigParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FigParser.java
@@ -30,7 +30,7 @@ public FigParser(NxmlDocumentParser nxmlDocumentParser) {
 	@Override
 	protected void parseElement(ElementParsingResult figResult) throws ElementParsingException {
 		try {
-			Optional<String> tableWrapId = getXPathValue("@id");
+			Optional<String> figureId = getXPathValue("@id");
 			Optional<ParsingResult> labelResult = parseXPath("label");
 			Optional<String> labelString = getXPathValue("label");
 			Optional<ParsingResult> captionResult = parseXPath("caption");
@@ -38,7 +38,7 @@ protected void parseElement(ElementParsingResult figResult) throws ElementParsin
 			captionResult.ifPresent(r -> {
 				ElementParsingResult result = (ElementParsingResult) r;
 				Caption caption = (Caption) result.getAnnotation();
-				caption.setCaptionType("table");
+				caption.setCaptionType("figure");
 				figResult.addSubResult(r);
 			});
 			labelResult.ifPresent(figResult::addSubResult);
@@ -52,7 +52,7 @@ protected void parseElement(ElementParsingResult figResult) throws ElementParsin
 			labelString.ifPresent(figure::setObjectLabel);
 			captionResult.map(r -> (Caption) ((ElementParsingResult) r).getAnnotation())
 					.ifPresent(figure::setObjectCaption);
-			tableWrapId.ifPresent(figure::setObjectId);
+			figureId.ifPresent(figure::setObjectId);
 
 			figResult.setAnnotation(figure);
 		} catch (NavException | XPathParseException | XPathEvalException e) {

From 14a04cdcbec913d6e58aab11d093e90d56132dd5 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:18:10 +0100
Subject: [PATCH 120/269] Add the PMCDBMultiplier.

The PMCReader was also adapted to work directly with input streams.
---
 jcore-pmc-db-reader/LICENSE                   |  26 +++
 jcore-pmc-db-reader/README.md                 |  34 +++
 jcore-pmc-db-reader/component.meta            |  21 ++
 jcore-pmc-db-reader/pom.xml                   |  71 ++++++
 .../jcore/multiplier/pmc/PMCDBMultiplier.java | 221 ++++++++++++++++++
 .../pmc/desc/jcore-pmc-db-multiplier.xml      |  71 ++++++
 .../PMCDBMultiplierHashComparisonTest.java    | 213 +++++++++++++++++
 .../multiplier/pmc/PMCDBMultiplierTest.java   | 103 ++++++++
 .../test/resources/testdocs/PMC6949206.xml    |   6 +
 .../test/resources/testdocs/PMC7511315.xml    |  28 +++
 jcore-pmc-reader/LICENSE                      |   2 +-
 .../jcore/reader/pmc/CasPopulator.java        |  17 ++
 .../jcore/reader/pmc/NXMLURIIterator.java     |  11 +-
 .../reader/pmc/parser/NxmlDocumentParser.java |   2 +-
 .../src/main/resources/LICENSE.txt            |   2 +-
 15 files changed, 822 insertions(+), 6 deletions(-)
 create mode 100644 jcore-pmc-db-reader/LICENSE
 create mode 100644 jcore-pmc-db-reader/README.md
 create mode 100644 jcore-pmc-db-reader/component.meta
 create mode 100644 jcore-pmc-db-reader/pom.xml
 create mode 100644 jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
 create mode 100644 jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
 create mode 100644 jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
 create mode 100644 jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java
 create mode 100644 jcore-pmc-db-reader/src/test/resources/testdocs/PMC6949206.xml
 create mode 100644 jcore-pmc-db-reader/src/test/resources/testdocs/PMC7511315.xml

diff --git a/jcore-pmc-db-reader/LICENSE b/jcore-pmc-db-reader/LICENSE
new file mode 100644
index 000000000..d0f946a29
--- /dev/null
+++ b/jcore-pmc-db-reader/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2022, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-pmc-db-reader/README.md b/jcore-pmc-db-reader/README.md
new file mode 100644
index 000000000..f97bc30d2
--- /dev/null
+++ b/jcore-pmc-db-reader/README.md
@@ -0,0 +1,34 @@
+# JCoRe Pubmed Central DB Reader
+
+**Descriptor Path**:
+```
+de.julielab.jcore.reader.pmc.desc.jcore-pmc-db-reader
+```
+
+JeDIS database reader for PMC base documents.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-pmc-db-reader/component.meta b/jcore-pmc-db-reader/component.meta
new file mode 100644
index 000000000..c57c78fa7
--- /dev/null
+++ b/jcore-pmc-db-reader/component.meta
@@ -0,0 +1,21 @@
+{
+    "categories": [
+        "multiplier",
+        "reader"
+    ],
+    "description": "JeDIS database reader for PMC base documents.",
+    "descriptors": [
+        {
+            "category": "multiplier",
+            "location": "de.julielab.jcore.multiplier.pmc.desc.jcore-pmc-db-multiplier"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-pmc-db-reader",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe Pubmed Central DB Reader"
+}
diff --git a/jcore-pmc-db-reader/pom.xml b/jcore-pmc-db-reader/pom.xml
new file mode 100644
index 000000000..21d363909
--- /dev/null
+++ b/jcore-pmc-db-reader/pom.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-pmc-db-reader</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jedis-parent</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+        <relativePath>../jedis-parent</relativePath>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-db-reader</artifactId>
+            <version>2.6.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-pmc-reader</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-db-test-utilities</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
+    </dependencies>
+    <name>JCoRe Pubmed Central DB Reader</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-pmc-db-reader</url>
+    <description>JeDIS database reader for PMC base documents.</description>
+</project>
diff --git a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
new file mode 100644
index 000000000..551b8dacb
--- /dev/null
+++ b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
@@ -0,0 +1,221 @@
+package de.julielab.jcore.multiplier.pmc;
+
+import de.julielab.costosys.configuration.FieldConfig;
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.jcore.reader.db.DBMultiplier;
+import de.julielab.jcore.reader.db.DBReader;
+import de.julielab.jcore.reader.pmc.CasPopulator;
+import de.julielab.jcore.reader.pmc.NoDataAvailableException;
+import de.julielab.jcore.reader.pmc.PMCReaderBase;
+import de.julielab.jcore.reader.pmc.parser.ElementParsingException;
+import de.julielab.jcore.types.casflow.ToVisit;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.types.pubmed.Header;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.cas.FeatureStructure;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class PMCDBMultiplier extends DBMultiplier {
+    public static final String PARAM_OMIT_BIB_REFERENCES = PMCReaderBase.PARAM_OMIT_BIB_REFERENCES;
+    public static final String PARAM_ADD_SHA_HASH = "AddShaHash";
+    public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
+    public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
+    public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
+    protected static final byte[] comma = ",".getBytes();
+    private final static Logger log = LoggerFactory.getLogger(PMCDBMultiplier.class);
+    @ConfigurationParameter(name = PARAM_OMIT_BIB_REFERENCES, mandatory = false, defaultValue = "false", description = "If set to true, references to the bibliography are omitted from the CAS text.")
+    protected boolean omitBibReferences;
+    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
+    private String documentItemToHash;
+    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
+            "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
+    private String xmiStorageDataTable;
+    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT_SCHEMA, mandatory = false, description = "For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the " + PARAM_TABLE_DOCUMENT + " parameter - adheres to. Only the primary key part is required for hash value retrieval.")
+    private String xmiStorageDataTableSchema;
+    @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.")
+    private String[] toVisitKeys;
+
+    private CasPopulator casPopulator;
+    private Map<String, String> docId2HashMap;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        xmiStorageDataTable = (String) aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT);
+        xmiStorageDataTableSchema = (String) aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT_SCHEMA);
+        documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
+        toVisitKeys = (String[]) aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
+        omitBibReferences = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_OMIT_BIB_REFERENCES)).orElse(false);
+        // We don't know yet which tables to read. Thus, we leave the row mapping out.
+        // We will now once the DBMultiplier#process(JCas) will have been run.
+        initialized = false;
+
+        if (!(xmiStorageDataTable == null && xmiStorageDataTableSchema == null) && !(xmiStorageDataTable != null && xmiStorageDataTableSchema != null && documentItemToHash != null)) {
+            String errorMsg = String.format("From the parameters '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA);
+            log.error(errorMsg);
+            throw new ResourceInitializationException(new IllegalArgumentException(errorMsg));
+        }
+
+        try {
+            casPopulator = new CasPopulator(omitBibReferences);
+        } catch (IOException e) {
+            String errorMsg = "Could not initialize the PMC CasPopulator.";
+            log.error(errorMsg);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public void process(JCas aJCas) throws AnalysisEngineProcessException {
+        super.process(aJCas);
+        docId2HashMap = fetchCurrentHashesFromDatabase(JCasUtil.selectSingle(aJCas, RowBatch.class));
+    }
+
+    @Override
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        JCas jCas = getEmptyJCas();
+        try {
+            if (documentDataIterator.hasNext()) {
+                byte[][] documentData = documentDataIterator.next();
+                String pkString = DBReader.setDBProcessingMetaData(dbc, readDataTable, tableName, documentData, jCas);
+                populateCas(jCas, documentData, pkString);
+                setToVisitAnnotation(jCas, pkString);
+            }
+        } catch (Exception e) {
+            log.error("Exception occurred: ", e);
+            throw new AnalysisEngineProcessException(e);
+        }
+        return jCas;
+    }
+
+    private void populateCas(JCas jCas, byte[][] documentData, String pkString) throws NoDataAvailableException, ElementParsingException {
+        List<Integer> pkIndices = dbc.getPrimaryKeyIndices();
+
+        // get index of xmlData;
+        // assumes that only one byte[] in arrayArray contains this data
+        // and that this byte[] is at the only index position that holds no
+        // primary key
+        List<Integer> allIndices = new ArrayList<Integer>();
+        for (int i = 0; i < documentData.length; i++) {
+            allIndices.add(i);
+        }
+        List<Integer> xmlIndices = new ArrayList<>(allIndices);
+        for (Integer pkIndex : pkIndices)
+            xmlIndices.remove(pkIndex);
+        int xmlIndex = xmlIndices.get(0);
+        try {
+            casPopulator.populateCas(new ByteArrayInputStream(documentData[xmlIndex]), jCas);
+        } catch (Exception e) {
+            log.error("Could not parse document {}.", pkString, e);
+            throw e;
+        }
+        // It actually happens that some PMC XML documents do not contain their own ID. We can use the ID obtained
+        // via the database primary key, which in turn might be derived from the original file name or some meta file.
+        Header header = JCasUtil.selectSingle(jCas, Header.class);
+        if (header.getDocId().isBlank()) {
+            log.debug("Document has no docId set. Derived the ID {} from the primary key and setting it as the Header#docId feature.", pkString);
+            header.setDocId(pkString);
+        }
+    }
+
+    /**
+     * <p>Fetches the hashes of the currently stored documents in the database.</p>
+     *
+     * @param rowBatch The annotation specifying which documents should be fetched by the multiplier and then be processed by the aggregate.
+     * @return A map from a string representation of the RowBatches document IDs to the hashes for the respective IDs.
+     * @throws AnalysisEngineProcessException If the SQL request fails.
+     */
+    private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
+        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable) && rowBatch.getIdentifiers() != null && rowBatch.getIdentifiers().size() > 0) {
+            String hashColumn = documentItemToHash + "_sha256";
+            // Extract the document IDs in this RowBatch. The IDs could be composite keys.
+            List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
+            Iterator<FeatureStructure> documentIDsIt = rowBatch.getIdentifiers().iterator();
+            while (documentIDsIt.hasNext()) {
+                StringArray pkArray = (StringArray) documentIDsIt.next();
+                documentIds.add(pkArray.toStringArray());
+            }
+            Map<String, String> id2hash = new HashMap<>(documentIds.size());
+            // This is the map we want to fill that lets us look up the hash of the document text by document ID.
+            String sql = null;
+            // Query the database for the document IDs in the current RowBatch and retrieve hashes.
+            try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+                FieldConfig xmiTableSchema = dbc.getFieldConfiguration(xmiStorageDataTableSchema);
+                String idQuery = documentIds.stream()
+                        .map(key -> Arrays.stream(key).map(part -> "%s='" + part + "'").toArray(String[]::new))
+                        .map(xmiTableSchema::expandPKNames).map(expandedKeys -> String.join(" AND ", expandedKeys))
+                        .collect(Collectors.joining(" OR "));
+                sql = String.format("SELECT %s,%s FROM %s WHERE %s", xmiTableSchema.getPrimaryKeyString(), hashColumn, xmiStorageDataTable, idQuery);
+                ResultSet rs = conn.createStatement().executeQuery(sql);
+                while (rs.next()) {
+                    StringBuilder pkSb = new StringBuilder();
+                    for (int i = 0; i < xmiTableSchema.getPrimaryKey().length; i++)
+                        pkSb.append(rs.getString(i + 1)).append(',');
+                    // Remove trailing comma
+                    pkSb.deleteCharAt(pkSb.length() - 1);
+                    String hash = rs.getString(xmiTableSchema.getPrimaryKey().length + 1);
+                    id2hash.put(pkSb.toString(), hash);
+                }
+            } catch (SQLException e) {
+                log.error("Could not retrieve hashes from the database. SQL query was '{}':", sql, e);
+                throw new AnalysisEngineProcessException(e);
+            }
+            return id2hash;
+        }
+        return null;
+    }
+
+    /**
+     * <p>Creates a {@link ToVisit} annotation based on document text hash comparison and the defined parameter values.</p>
+     * <p>Computes the hash of the newly read CAS and compares it to the hash for the same document retrieved from the
+     * database, if present. If there was a hash in the database and the hash values are equal, creates the <tt>ToVisit</tt>
+     * annotation and adds the toVisitKeys passed in the configuration of this component.</p>
+     *
+     * @param jCas The newly read JCas.
+     * @param pkString
+     */
+    private void setToVisitAnnotation(JCas jCas, String pkString) {
+        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable)) {
+            String existingHash = docId2HashMap.get(pkString);
+            if (existingHash != null) {
+                String newHash = getHash(jCas);
+                if (existingHash.equals(newHash)) {
+                    if (log.isTraceEnabled())
+                        log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", pkString, toVisitKeys);
+                    ToVisit toVisit = new ToVisit(jCas);
+                    if (toVisitKeys != null && toVisitKeys.length != 0) {
+                        StringArray keysArray = new StringArray(jCas, toVisitKeys.length);
+                        keysArray.copyFromArray(toVisitKeys, 0, 0, toVisitKeys.length);
+                        toVisit.setDelegateKeys(keysArray);
+                    }
+                    toVisit.addToIndexes();
+                }
+            } else {
+                log.trace("No existing hash was found for document {}", pkString);
+            }
+        }
+    }
+
+    private String getHash(JCas newCas) {
+        final String documentText = newCas.getDocumentText();
+        final byte[] sha = DigestUtils.sha256(documentText.getBytes());
+        return Base64.encodeBase64String(sha);
+    }
+}
diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
new file mode 100644
index 000000000..1bf858c07
--- /dev/null
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.multiplier.pmc.PMCDBMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe Abstract Database Multiplier</name>
+        <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <copyright>JULIE Lab Jena, Germany</copyright>
+        <configurationParameters>
+            <configurationParameter>
+                <name>OmitBibliographyReferences</name>
+                <description>If set to true, references to the bibliography are omitted from the CAS text.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AddShaHash</name>
+                <description>For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentTable</name>
+                <description>For use with AnnotationDefinedFlowController. String parameter indicating the name of the table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentTableSchema</name>
+                <description>For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the DocumentTable parameter - adheres to. Only the primary key part is required for hash value retrieval.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>ToVisitKeys</name>
+                <description>For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.</description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>OmitBibliographyReferences</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities/>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
new file mode 100644
index 000000000..a4f02e11a
--- /dev/null
+++ b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
@@ -0,0 +1,213 @@
+package de.julielab.jcore.multiplier.pmc;
+
+
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
+import de.julielab.jcore.db.test.DBTestUtils;
+import de.julielab.jcore.types.casflow.ToVisit;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.configuration2.ex.ConfigurationException;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.JCasIterator;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.factory.TypeSystemDescriptionFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.containers.PostgreSQLContainer;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.sql.PreparedStatement;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * This test is an adaption of the XMLDBMultiplierTest in jcore-xml-db-reader. It tests whether the hash code comparison
+ * works as intended.
+ */
+public class PMCDBMultiplierHashComparisonTest {
+
+    private static final String SOURCE_XML_TABLE = "source_xml_table";
+    private static final String TARGET_XMI_TABLE = "target_xmi_table";
+    private static final String PMCID_FIELD_NAME = "pmcid";
+    private static final String DOCID_FIELD_NAME = "docid";
+    private static final String XML_FIELD_NAME = "xml";
+    private static final String BASE_DOCUMENT_FIELD_NAME = "base_document";
+    private static final String HASH_FIELD_NAME = "documentText_sha256";
+    private static final String MAX_XMI_ID_FIELD_NAME = "max_xmi_id";
+    private static final String SOFA_MAPPING_FIELD_NAME = "sofa_mapping";
+    private static final String SUBSET_TABLE = "test_subset";
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
+    private static String costosysConfig;
+
+    @BeforeAll
+    public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
+        postgres.start();
+        DBTestUtils.createAndSetHiddenConfig(Path.of("src", "test", "resources", "hiddenConfig").toString(), postgres);
+
+        DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
+        dbc.setActiveTableSchema("pmc");
+        costosysConfig = DBTestUtils.createTestCostosysConfig("pmc", 2, postgres);
+        new File(costosysConfig).deleteOnExit();
+        try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+            // We create two tables. One is the XML table the multiplier reads from and maps the contents to the JCas.
+            // The other is a simulation of an XMI table used to serialize CAS instances via the jcore-xmi-db-writer.
+            // We need that target table to test the hash value comparison mechanism: If a document does not exist
+            // in the target table or has a non-matching hash on its document text, proceed as normal.
+            // But if the hash matches, we want to reserve the possibility to skip most part of the subsequent pipeline.
+            // For this, we could use the AnnnotationDefinedFlowController for jcore-flow-controllers. This controller
+            // looks for annotations of the ToVisit type that specify which exact components in an aggregate should
+            // be applied to the CAS carrying the ToVisit annotation.
+            prepareSourceXMLTable(dbc, conn);
+            prepareTargetXMITable(dbc, conn);
+        }
+        dbc.defineSubset(SUBSET_TABLE, SOURCE_XML_TABLE, "Test subset");
+        assertThat(dbc.getNumRows(SOURCE_XML_TABLE)).isEqualTo(10);
+        assertThat(dbc.getNumRows(TARGET_XMI_TABLE)).isEqualTo(5);
+
+        dbc.close();
+    }
+
+    private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
+        String xmlFmt = "<!DOCTYPE article PUBLIC \"-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN\" \"JATS-archivearticle1-3-mathml3.dtd\">\n" +
+                "<article><front><article-meta><article-id pub-id-type=\"pmc\">%d</article-id><volume>42</volume></article-meta></front>\n" +
+                "<body><sec><p>This is text nr %d.</p></sec></body>\n" +
+                "</article>";
+        dbc.createTable(SOURCE_XML_TABLE, "Test table for hash comparison test.");
+        String sql = String.format("INSERT INTO %s (%s,%s) VALUES (?,XMLPARSE(CONTENT ?))", SOURCE_XML_TABLE, PMCID_FIELD_NAME, XML_FIELD_NAME);
+        PreparedStatement ps = conn.prepareStatement(sql);
+        for (int i = 0; i < 10; i++) {
+            String xml = String.format(xmlFmt, i, i);
+            ps.setString(1, String.valueOf(i));
+            ps.setString(2, xml);
+            ps.addBatch();
+        }
+        ps.executeBatch();
+    }
+
+    private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
+        // The PMC parser tries to format blocks of content using newlines which makes the test a bit awkward.
+        // The test might break if this formatting is changed.
+        String documentTextFmt = "\nThis is text nr %d.\n\n";
+        dbc.createTable(TARGET_XMI_TABLE, "xmi_text", "Test table for hash comparison test.");
+        dbc.assureColumnsExist(TARGET_XMI_TABLE, List.of(HASH_FIELD_NAME), "text");
+        String sql = String.format("INSERT INTO %s (%s,%s,%s,%s,%s) VALUES (?,XMLPARSE(CONTENT ?),?,?,?)", TARGET_XMI_TABLE, DOCID_FIELD_NAME, BASE_DOCUMENT_FIELD_NAME, HASH_FIELD_NAME, MAX_XMI_ID_FIELD_NAME, SOFA_MAPPING_FIELD_NAME);
+        PreparedStatement ps = conn.prepareStatement(sql);
+        // Note that we only add half of the documents compared to the source XML import. This way we test
+        // if the code behaves right when the target document does not yet exist at all.
+        for (int i = 0; i < 5; i++) {
+            String xml = String.format(documentTextFmt, i, i);
+            ps.setString(1, String.valueOf(i));
+            ps.setString(2, xml);
+            // For one document in the "target XMI" table we put in a wrong hash. Thus, this document should not trigger
+            // the "toVisit" mechanism.
+            if (i != 3)
+                ps.setString(3, getHash(xml));
+            else ps.setString(3, "someanotherhash");
+            ps.setInt(4, 0);
+            ps.setString(5, "dummy");
+            ps.addBatch();
+        }
+        ps.executeBatch();
+    }
+
+    @AfterAll
+    public static void tearDown() {
+        postgres.stop();
+    }
+
+    private static String getHash(String str) {
+        final byte[] sha = DigestUtils.sha256(str.getBytes());
+        return Base64.encodeBase64String(sha);
+    }
+
+    /**
+     * Creates a JCas and adds a RowBatch for all 10 documents in the source XML table as well as the data table and subset table and schema names.
+     *
+     * @return A JCas prepared for the tests in this class.
+     * @throws UIMAException If some UIMA operation fails.
+     */
+    private JCas prepareCas() throws UIMAException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.jcore-casflow-types");
+        RowBatch rowBatch = new RowBatch(jCas);
+        StringArray dataTable = new StringArray(jCas, 1);
+        dataTable.set(0, SOURCE_XML_TABLE);
+        rowBatch.setTables(dataTable);
+        StringArray tableSchema = new StringArray(jCas, 1);
+        tableSchema.set(0, "pmc");
+        rowBatch.setTableSchemas(tableSchema);
+        rowBatch.setTableName(SUBSET_TABLE);
+        FSArray pks = new FSArray(jCas, 10);
+        // Read all documents
+        for (int i = 0; i < 10; i++) {
+            StringArray pk = new StringArray(jCas, 1);
+            pk.set(0, String.valueOf(i));
+            pks = JCoReTools.addToFSArray(pks, pk);
+        }
+        rowBatch.setIdentifiers(pks);
+        rowBatch.setCostosysConfiguration(costosysConfig);
+        rowBatch.addToIndexes();
+        return jCas;
+    }
+
+    @Test
+    public void testHashComparison() throws Exception {
+        JCas jCas = prepareCas();
+        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(PMCDBMultiplier.class, tsDesc,
+                PMCDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
+                PMCDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
+                PMCDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text",
+                PMCDBMultiplier.PARAM_TO_VISIT_KEYS, "ThisIsTheVisitKey"
+        );
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
+        List<String> toVisitKeys = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+            Collection<ToVisit> select = JCasUtil.select(newCas, ToVisit.class);
+            select.forEach(tv -> tv.getDelegateKeys().forEach(k -> toVisitKeys.add(k)));
+            newCas.release();
+        }
+        // There are 4 documents in the target table with the correct hash so we expect the delegate key 4 times
+        assertThat(toVisitKeys).containsExactly("ThisIsTheVisitKey", "ThisIsTheVisitKey", "ThisIsTheVisitKey", "ThisIsTheVisitKey");
+    }
+
+    @Test
+    public void testHashComparison2() throws Exception {
+        JCas jCas = prepareCas();
+        TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
+        // In this test, we do not specify the keys to visit; the whole subsequent pipeline should be skipped.
+        // To indicate that, there should be ToVisit annotations but they should be null.
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(PMCDBMultiplier.class, tsDesc,
+                PMCDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
+                PMCDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
+                PMCDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text"
+        );
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
+        List<ToVisit> emptyToVisitAnnotation = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+            Collection<ToVisit> select = JCasUtil.select(newCas, ToVisit.class);
+            select.stream().filter(tv -> tv.getDelegateKeys() == null).forEach(emptyToVisitAnnotation::add);
+            newCas.release();
+        }
+        // There are 4 documents in the target table with the correct hash so we expect the delegate key 4 times
+        assertThat(emptyToVisitAnnotation).hasSize(4);
+    }
+}
diff --git a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java
new file mode 100644
index 000000000..f8d65f822
--- /dev/null
+++ b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java
@@ -0,0 +1,103 @@
+package de.julielab.jcore.multiplier.pmc;
+
+import de.julielab.costosys.Constants;
+import de.julielab.costosys.dbconnection.DBCIterator;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
+import de.julielab.jcore.db.test.DBTestUtils;
+import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.types.pubmed.Header;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.commons.configuration2.ex.ConfigurationException;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.JCasIterator;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.StringArray;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+import org.testcontainers.containers.PostgreSQLContainer;
+
+import java.io.File;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Spliterators;
+import java.util.stream.Collectors;
+import java.util.stream.StreamSupport;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+class PMCDBMultiplierTest {
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:" + DataBaseConnector.POSTGRES_VERSION);
+    private static String costosysConfig;
+
+    @BeforeAll
+    public static void setup() throws ConfigurationException {
+        postgres.start();
+        DBTestUtils.createAndSetHiddenConfig(Path.of("src", "test", "resources", "hiddenConfig").toString(), postgres);
+
+        DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
+        dbc.setActiveTableSchema("pmc");
+        costosysConfig = DBTestUtils.createTestCostosysConfig("pmc", 2, postgres);
+        new File(costosysConfig).deleteOnExit();
+        dbc.withConnectionExecute(d -> d.createTable(Constants.DEFAULT_DATA_TABLE_NAME, "Test data table."));
+        dbc.withConnectionExecute(d -> d.importFromXMLFile(Path.of("src", "test", "resources", "testdocs").toString(), Constants.DEFAULT_DATA_TABLE_NAME));
+        dbc.withConnectionExecute(d -> d.createSubsetTable("testsubset", Constants.DEFAULT_DATA_TABLE_NAME, "Test subset."));
+        dbc.withConnectionExecute(d -> d.initSubset("testsubset", Constants.DEFAULT_DATA_TABLE_NAME));
+        assertThat(dbc.countRowsOfDataTable(Constants.DEFAULT_DATA_TABLE_NAME, null));
+        DBCIterator<byte[][]> documentIterator = (DBCIterator<byte[][]>) dbc.withConnectionQuery(d -> d.queryDataTable(Constants.DEFAULT_DATA_TABLE_NAME, null));
+        // check that the documents are actually in the database as expected
+        List<String> docIds = StreamSupport.stream(Spliterators.spliteratorUnknownSize(documentIterator, 0), false).map(b -> new String(b[0], StandardCharsets.UTF_8)).collect(Collectors.toList());
+        assertThat(docIds).containsExactlyInAnyOrder("PMC6949206", "PMC7511315");
+    }
+
+    @Test
+    public void next() throws Exception {
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(PMCDBMultiplier.class);
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(prepareCas());
+        List<String> documentTexts = new ArrayList<>();
+        List<String> docIds = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+            documentTexts.add(newCas.getDocumentText());
+            docIds.add(JCasUtil.selectSingle(newCas, Header.class).getDocId());
+            newCas.release();
+        }
+        assertThat(docIds).containsExactlyInAnyOrder("PMC6949206", "PMC7511315");
+    }
+
+    /**
+     * Creates a JCas and adds a RowBatch for the test documents in the source XML table as well as the data table and subset table and schema names.
+     *
+     * @return A JCas prepared for the tests in this class.
+     * @throws UIMAException If some UIMA operation fails.
+     */
+    private JCas prepareCas() throws UIMAException {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.jcore-casflow-types");
+        RowBatch rowBatch = new RowBatch(jCas);
+        StringArray dataTable = new StringArray(jCas, 1);
+        dataTable.set(0, Constants.DEFAULT_DATA_TABLE_NAME);
+        rowBatch.setTables(dataTable);
+        StringArray tableSchema = new StringArray(jCas, 1);
+        tableSchema.set(0, "pmc");
+        rowBatch.setTableSchemas(tableSchema);
+        rowBatch.setTableName("testsubset");
+        FSArray pks = new FSArray(jCas, 2);
+        // Read all documents
+        List<String> pkStrings = List.of("PMC6949206", "PMC7511315");
+        for (String pkString : pkStrings) {
+            StringArray pk = new StringArray(jCas, 1);
+            pk.set(0, pkString);
+            pks = JCoReTools.addToFSArray(pks, pk);
+        }
+        rowBatch.setIdentifiers(pks);
+        rowBatch.setCostosysConfiguration(costosysConfig);
+        rowBatch.addToIndexes();
+        return jCas;
+    }
+}
\ No newline at end of file
diff --git a/jcore-pmc-db-reader/src/test/resources/testdocs/PMC6949206.xml b/jcore-pmc-db-reader/src/test/resources/testdocs/PMC6949206.xml
new file mode 100644
index 000000000..d7bbf8d2e
--- /dev/null
+++ b/jcore-pmc-db-reader/src/test/resources/testdocs/PMC6949206.xml
@@ -0,0 +1,6 @@
+<!DOCTYPE article
+PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN" "JATS-archivearticle1-3-mathml3.dtd">
+<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/"
+         article-type="research-article" dtd-version="1.3"><?properties open_access?><processing-meta base-tagset="archiving" mathml-version="3.0" table-model="xhtml" tagset-family="jats"><restricted-by>pmc</restricted-by></processing-meta><front><journal-meta><journal-id journal-id-type="nlm-ta">Protein Cell</journal-id><journal-id journal-id-type="iso-abbrev">Protein Cell</journal-id><journal-title-group><journal-title>Protein &#x00026; Cell</journal-title></journal-title-group><issn pub-type="ppub">1674-800X</issn><issn pub-type="epub">1674-8018</issn><publisher><publisher-name>Higher Education Press</publisher-name><publisher-loc>Beijing</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="pmid">31037510</article-id><article-id pub-id-type="pmc">PMC6949206</article-id><article-id pub-id-type="publisher-id">623</article-id><article-id pub-id-type="doi">10.1007/s13238-019-0623-2</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title>Rescue of premature aging defects in Cockayne syndrome stem cells by CRISPR/Cas9-mediated gene correction</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><name><surname>Wang</surname><given-names>Si</given-names></name><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff2">2</xref><xref ref-type="aff" rid="Aff5">5</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name><surname>Min</surname><given-names>Zheying</given-names></name><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff13">13</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name><surname>Ji</surname><given-names>Qianzhao</given-names></name><xref ref-type="aff" rid="Aff2">2</xref><xref ref-type="aff" rid="Aff4">4</xref></contrib><contrib contrib-type="author"><name><surname>Geng</surname><given-names>Lingling</given-names></name><xref ref-type="aff" rid="Aff5">5</xref></contrib><contrib contrib-type="author"><name><surname>Su</surname><given-names>Yao</given-names></name><xref ref-type="aff" rid="Aff5">5</xref></contrib><contrib contrib-type="author"><name><surname>Liu</surname><given-names>Zunpeng</given-names></name><xref ref-type="aff" rid="Aff3">3</xref><xref ref-type="aff" rid="Aff4">4</xref></contrib><contrib contrib-type="author"><name><surname>Hu</surname><given-names>Huifang</given-names></name><xref ref-type="aff" rid="Aff3">3</xref><xref ref-type="aff" rid="Aff4">4</xref></contrib><contrib contrib-type="author"><name><surname>Wang</surname><given-names>Lixia</given-names></name><xref ref-type="aff" rid="Aff2">2</xref><xref ref-type="aff" rid="Aff4">4</xref></contrib><contrib contrib-type="author"><name><surname>Zhang</surname><given-names>Weiqi</given-names></name><xref ref-type="aff" rid="Aff2">2</xref><xref ref-type="aff" rid="Aff4">4</xref><xref ref-type="aff" rid="Aff5">5</xref><xref ref-type="aff" rid="Aff6">6</xref><xref ref-type="aff" rid="Aff7">7</xref></contrib><contrib contrib-type="author"><name><surname>Suzuiki</surname><given-names>Keiichiro</given-names></name><xref ref-type="aff" rid="Aff9">9</xref><xref ref-type="aff" rid="Aff10">10</xref></contrib><contrib contrib-type="author"><name><surname>Huang</surname><given-names>Yu</given-names></name><xref ref-type="aff" rid="Aff11">11</xref></contrib><contrib contrib-type="author"><name><surname>Zhang</surname><given-names>Puyao</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Tang</surname><given-names>Tie-Shan</given-names></name><xref ref-type="aff" rid="Aff4">4</xref><xref ref-type="aff" rid="Aff6">6</xref><xref ref-type="aff" rid="Aff12">12</xref></contrib><contrib contrib-type="author" corresp="yes"><name><surname>Qu</surname><given-names>Jing</given-names></name><address><email>qujing@ioz.ac.cn</email></address><xref ref-type="aff" rid="Aff3">3</xref><xref ref-type="aff" rid="Aff4">4</xref><xref ref-type="aff" rid="Aff6">6</xref></contrib><contrib contrib-type="author" corresp="yes"><name><surname>Yu</surname><given-names>Yang</given-names></name><address><email>yuyang5012@hotmail.com</email></address><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><name><surname>Liu</surname><given-names>Guang-Hui</given-names></name><address><email>ghliu@ibp.ac.cn</email></address><xref ref-type="aff" rid="Aff2">2</xref><xref ref-type="aff" rid="Aff4">4</xref><xref ref-type="aff" rid="Aff5">5</xref><xref ref-type="aff" rid="Aff6">6</xref><xref ref-type="aff" rid="Aff8">8</xref></contrib><contrib contrib-type="author" corresp="yes"><name><surname>Qiao</surname><given-names>Jie</given-names></name><address><email>jie.qiao@263.net</email></address><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff13">13</xref></contrib><aff id="Aff1"><label>1</label><institution-wrap><institution-id institution-id-type="GRID">grid.411642.4</institution-id><institution-id institution-id-type="ISNI">0000 0004 0605 3760</institution-id><institution>Department of Obstetrics and Gynecology, Center for Reproductive Medicine, </institution><institution>Peking University Third Hospital, </institution></institution-wrap>Beijing, 100191 China </aff><aff id="Aff2"><label>2</label><institution-wrap><institution-id institution-id-type="GRID">grid.9227.e</institution-id><institution-id institution-id-type="ISNI">0000000119573309</institution-id><institution>National Laboratory of Biomacromolecules, CAS Center for Excellence in Biomacromolecules, Institute of Biophysics, </institution><institution>Chinese Academy of Sciences, </institution></institution-wrap>Beijing, 100101 China </aff><aff id="Aff3"><label>3</label><institution-wrap><institution-id institution-id-type="GRID">grid.9227.e</institution-id><institution-id institution-id-type="ISNI">0000000119573309</institution-id><institution>State Key Laboratory of Stem Cell and Reproductive Biology, Institute of Zoology, </institution><institution>Chinese Academy of Sciences, </institution></institution-wrap>Beijing, 100101 China </aff><aff id="Aff4"><label>4</label><institution-wrap><institution-id institution-id-type="GRID">grid.410726.6</institution-id><institution-id institution-id-type="ISNI">0000 0004 1797 8419</institution-id><institution>University of Chinese Academy of Sciences, </institution></institution-wrap>Beijing, 100049 China </aff><aff id="Aff5"><label>5</label><institution-wrap><institution-id institution-id-type="GRID">grid.413259.8</institution-id><institution-id institution-id-type="ISNI">0000 0004 0632 3337</institution-id><institution>Advanced Innovation Center for Human Brain Protection, National Clinical Research Center for Geriatric Disorders, </institution><institution>Xuanwu Hospital Capital Medical University, </institution></institution-wrap>Beijing, 100053 China </aff><aff id="Aff6"><label>6</label><institution-wrap><institution-id institution-id-type="GRID">grid.9227.e</institution-id><institution-id institution-id-type="ISNI">0000000119573309</institution-id><institution>Institute for Stem cell and Regeneration, </institution><institution>Chinese Academy of Sciences, </institution></institution-wrap>Beijing, 100101 China </aff><aff id="Aff7"><label>7</label><institution-wrap><institution-id institution-id-type="GRID">grid.9227.e</institution-id><institution-id institution-id-type="ISNI">0000000119573309</institution-id><institution>Key Laboratory of Genomic and Precision Medicine, Beijing Institute of Genomics, </institution><institution>Chinese Academy of Sciences, </institution></institution-wrap>Beijing, 100101 China </aff><aff id="Aff8"><label>8</label><institution-wrap><institution-id institution-id-type="GRID">grid.24696.3f</institution-id><institution-id institution-id-type="ISNI">0000 0004 0369 153X</institution-id><institution>Beijing Institute for Brain Disorders, </institution></institution-wrap>Beijing, 100069 China </aff><aff id="Aff9"><label>9</label><institution-wrap><institution-id institution-id-type="GRID">grid.136593.b</institution-id><institution-id institution-id-type="ISNI">0000 0004 0373 3971</institution-id><institution>Institute for Advanced Co-Creation Studies, </institution><institution>Osaka University, </institution></institution-wrap>Osaka, 560-8531 Japan </aff><aff id="Aff10"><label>10</label><institution-wrap><institution-id institution-id-type="GRID">grid.136593.b</institution-id><institution-id institution-id-type="ISNI">0000 0004 0373 3971</institution-id><institution>Graduate School of Engineering Science, </institution><institution>Osaka University, </institution></institution-wrap>Osaka, 560-8531 Japan </aff><aff id="Aff11"><label>11</label><institution-wrap><institution-id institution-id-type="GRID">grid.11135.37</institution-id><institution-id institution-id-type="ISNI">0000 0001 2256 9319</institution-id><institution>Department of Medical Genetics, School of Basic Medical Sciences, </institution><institution>Peking University Health Science Center, </institution></institution-wrap>Beijing, 100191 China </aff><aff id="Aff12"><label>12</label><institution-wrap><institution-id institution-id-type="GRID">grid.458458.0</institution-id><institution-id institution-id-type="ISNI">0000 0004 1792 6416</institution-id><institution>State Key Laboratory of Membrane Biology, </institution><institution>Institute of Zoology, Chinese Academy of Sciences, </institution></institution-wrap>Beijing, 100101 China </aff><aff id="Aff13"><label>13</label><institution-wrap><institution-id institution-id-type="GRID">grid.11135.37</institution-id><institution-id institution-id-type="ISNI">0000 0001 2256 9319</institution-id><institution>Peking-Tsinghua Center for Life Sciences, Academy for Advanced Interdisciplinary Studies, </institution><institution>Peking University, </institution></institution-wrap>Beijing, 100871 China </aff></contrib-group><pub-date pub-type="epub"><day>30</day><month>4</month><year>2019</year></pub-date><pub-date pub-type="pmc-release"><day>30</day><month>4</month><year>2019</year></pub-date><pub-date pub-type="ppub"><month>1</month><year>2020</year></pub-date><volume>11</volume><issue>1</issue><fpage>1</fpage><lpage>22</lpage><history><date date-type="received"><day>19</day><month>2</month><year>2019</year></date><date date-type="accepted"><day>12</day><month>3</month><year>2019</year></date></history><permissions><copyright-statement>&#x000a9; The Author(s) 2019</copyright-statement><license><ali:license_ref specific-use="textmining" content-type="ccbylicense">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p><bold>Open Access</bold>This article is distributed under the terms of the Creative Commons Attribution 4.0 International License (<ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">http://creativecommons.org/licenses/by/4.0/</ext-link>), which permits unrestricted use, distribution, and reproduction in any medium, provided you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made.</license-p></license></permissions><abstract id="Abs1"><p id="Par1">Cockayne syndrome (CS) is a rare autosomal recessive&#x000a0;inherited&#x000a0;disorder&#x000a0;characterized by a variety of clinical features, including&#x000a0;increased sensitivity&#x000a0;to&#x000a0;sunlight,&#x000a0;progressive neurological abnormalities, and the appearance&#x000a0;of&#x000a0;premature aging. However, the pathogenesis of CS remains unclear due to the limitations of current disease models. Here, we generate integration-free induced pluripotent stem cells (iPSCs) from fibroblasts from a CS patient bearing mutations in <italic>CSB</italic>/<italic>ERCC6</italic> gene and further derive isogenic gene-corrected CS-iPSCs (GC-iPSCs) using the CRISPR/Cas9 system. CS-associated phenotypic defects are recapitulated in CS-iPSC-derived mesenchymal&#x000a0;stem&#x000a0;cells (MSCs) and neural stem cells (NSCs), both of which display increased susceptibility to DNA damage stress. Premature aging defects in CS-MSCs are rescued by the targeted correction of mutant <italic>ERCC6</italic>. We next map the transcriptomic landscapes in CS-iPSCs and GC-iPSCs and their somatic stem cell derivatives (MSCs and NSCs) in the absence or presence of ultraviolet (UV)&#x000a0;and replicative stresses, revealing that defects in DNA repair account for CS pathologies. Moreover, we generate autologous GC-MSCs free of pathogenic mutation under a cGMP (Current Good Manufacturing Practice)-compliant condition, which hold potential for use as improved biomaterials for future stem cell replacement therapy for CS. Collectively, our models demonstrate novel disease features and molecular mechanisms and lay a foundation for the development of novel therapeutic strategies to treat CS.</p><sec><title>Electronic supplementary material</title><p>The online version of this article (10.1007/s13238-019-0623-2) contains supplementary material, which is available to authorized users.</p></sec></abstract><kwd-group xml:lang="en"><title>Keywords</title><kwd>Cockayne syndrome</kwd><kwd>CRISPR/Cas9</kwd><kwd>gene correction</kwd><kwd>disease modelling</kwd><kwd>mesenchymal stem cell</kwd><kwd>neural stem cell</kwd></kwd-group><custom-meta-group><custom-meta><meta-name>issue-copyright-statement</meta-name><meta-value>&#x000a9; The Author(s) 2020</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="Sec1" sec-type="introduction"><title>INTRODUCTION</title><p id="Par2">Cockayne syndrome (CS) is an autosomal recessive disorder characterized by progressive multisystem clinical features, including cachectic dwarfism, clinical photosensitivity, progressive neurological degeneration, and premature aging (Karikkineth et al., <xref ref-type="bibr" rid="CR22">2017</xref>). Two genes that are defective in Cockayne syndrome, <italic>CSA</italic>/<italic>ERCC8</italic> (ERCC excision repair 8, CSA ubiquitin ligase complex subunit) and <italic>CSB</italic>/ERCC6 (ERCC excision repair 6, chromatin remodeling factor), have been identified. To date, two-thirds of CS patients have been linked to mutations in the <italic>CSB</italic>/<italic>ERCC6</italic> gene, and one-third of CS patients have been linked to mutations in the <italic>CSA</italic>/<italic>ERCC8</italic> gene. At least 78 different mutations in <italic>ERCC6</italic>, including typical missense mutations, frameshifts, and deletions, have been identified (Cleaver et al., <xref ref-type="bibr" rid="CR9">2009</xref>; Laugel, <xref ref-type="bibr" rid="CR27">2013</xref>). However, the underlying molecular mechanisms linking genotype to phenotype need to be clarified.</p><p id="Par3">DNA damage caused by exogenous ultraviolet (UV) radiation-induced photoproducts or similar chemically induced products is sensed by the cellular nucleotide excision repair (NER) system (Friedberg, <xref ref-type="bibr" rid="CR13">2001</xref>, <xref ref-type="bibr" rid="CR14">2003</xref>; Cleaver et al., <xref ref-type="bibr" rid="CR9">2009</xref>; McKay and Cabrita, <xref ref-type="bibr" rid="CR36">2013</xref>). The NER system consists of two pathways: global genomic repair (GGR), in which damage to DNA regions not undergoing transcription is repaired, and transcription-coupled repair (TCR), in which damage to transcribed DNA regions is repaired. Bulky DNA adducts usually block transcription elongation by RNA polymerase II (RNAPII); then, the arrested RNAPII initiates the repair of transcription-blocking DNA lesions by TCR to permit the efficient recovery of mRNA synthesis. If TCR cannot be executed, widespread sustained transcription blockage eventually leads to apoptosis (McKay and Cabrita, <xref ref-type="bibr" rid="CR36">2013</xref>). ERCC6 is an ATP-stimulated ATPase that is required for the ubiquitylation of the carboxyterminal domain of RNAPII in TCR and the recovery of mRNA synthesis. In addition, ERCC6 has been reported as a member of the SWI/SNF family of proteins that contain a nucleotide-binding site and play a role in chromatin maintenance and remodelling by modulating the negative supercoiling of DNA and facilitating DNA strand exchange, possibly through the recruitment of the histone acetyltransferase p300 (Newman et al., <xref ref-type="bibr" rid="CR40">2006</xref>; Cleaver et al., <xref ref-type="bibr" rid="CR9">2009</xref>; Velez-Cruz and Egly, <xref ref-type="bibr" rid="CR63">2013</xref>).</p><p id="Par4">Mice deficient for <italic>Ercc6</italic> or <italic>Ercc8</italic> have been generated and used to mimic mild CS symptoms, including fat tissue reduction, photoreceptor cell loss, and mild but characteristic nervous system pathology (van der Horst et al., <xref ref-type="bibr" rid="CR60">1997</xref>, <xref ref-type="bibr" rid="CR61">2002</xref>; Gorgels et al., <xref ref-type="bibr" rid="CR19">2007</xref>; Jaarsma et al., <xref ref-type="bibr" rid="CR21">2011</xref>). These mild CS mouse models are converted to severe CS models with short life spans, progressive nervous system degeneration and cachectic dwarfism after synergistic complete inactivation of global genome NER. For example, previous studies have demonstrated the simultaneous deleterious effects of intercrossing xeroderma pigmentosum (XP) (<italic>Xpa</italic><sup>&#x02212;/&#x02212;</sup> or <italic>Xpc</italic><sup>&#x02212;/&#x02212;</sup>) mice with CS (<italic>Csa</italic><sup>&#x02212;/&#x02212;</sup>, <italic>Csb</italic><sup>&#x02212;/&#x02212;</sup>, <italic>Xpd</italic><sup><italic>XPCS</italic></sup>) mice, which results in double mutants with very short life spans and dramatic progeroid features (Murai et al., <xref ref-type="bibr" rid="CR38">2001</xref>; Andressoo et al., <xref ref-type="bibr" rid="CR4">2006</xref>; van der Pluijm et al., <xref ref-type="bibr" rid="CR62">2007</xref>). Due to the differences in genetic and anatomic features between humans and mice, a human CS model needs to be established to reveal the cellular defects and molecular mechanisms for translation into a CS treatment.</p><p id="Par5">In this study, we report the generation of induced pluripotent stem cells (iPSCs) from the fibroblasts of a CS patient bearing two novel heterogeneous mutations in the <italic>ERCC6</italic> gene: c.643G&#x0003e;T in exon 4 and c.3776C&#x0003e;A in exon 18. We further derived gene-corrected CS-iPSCs (GC-iPSCs) using the CRISPR/Cas9-mediated gene editing technique. CS-iPSCs and GC-iPSCs were further differentiated into mesenchymal&#x000a0;stem&#x000a0;cells (MSCs) and neural stem cells (NSCs). Gene correction resulted in the effective restoration of DNA repair abilities and the alleviation of apoptosis and premature senescence, especially after exposure to UV irradiation or replicative stress (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>A). RNA sequencing analysis indicated that the compromised DNA repair and cell cycle deregulation observed in CS cells account for various CS cellular pathologies. Finally, we obtained gene-corrected CS-iPSC-derived MSCs under a cGMP (Current Good Manufacturing Practice)-compliant condition, which display promising potential in autologous stem cell therapy.<fig id="Fig1"><label>Figure&#x000a0;1</label><caption><p><bold>Generation of CS-iPSCs and gene-corrected CS-iPSCs</bold>. (A) Schematic diagram of the generation of CS-iPSCs and GC-iPSCs, as well as their adult stem cell derivatives, for modelling Cockayne&#x000a0;syndrome. &#x0201c;Mut&#x0201d; represents mutant, &#x0201c;GC&#x0201d; represents gene corrected. (B) Genotype validation of two heterozygous mutations in the <italic>ERCC6</italic> gene by genomic DNA sequencing. Fibroblasts isolated from a healthy individual were used as a control. (C) Strategy for correcting the <italic>ERCC6</italic><sup>+/G643T</sup> mutation by the CRISPR/Cas9 system. The sequence of the gRNA is shown with the PAM sequence. Red crosses represent mutations in exon 4 and exon 18. The single-stranded oligodeoxynucleotide (ssODN) carrying a silent mutation (marked in green) was used as a repair template. (D) The correction of the <italic>ERCC6</italic><sup>+/G643T</sup> mutation was verified by genomic DNA sequencing. The red arrow highlights the corrected base pair. The green arrow indicates the inclusion of silent mutation introduced by the exogenous ssODN template. ERCC6<sup>mut</sup> represents CS-iPSCs, ERCC6<sup>GC</sup> represents GC-iPSCs. (E) Karyotyping analysis of CS-iPSCs and GC-iPSCs indicating their normal karyotypes. (F) No residual episomal vector element EBNA-1 was observed in CS-iPSCs or GC-iPSCs by qPCR analysis. CS-fibroblasts were electroporated with pCXLE-hOCT3/4-shp53-F, pCXLE-hSK and pCXLE-hUL. The fibroblasts were cultured for 4 more days after electroporation and then collected as the positive control, and human ESCs (line H9), GM00038-iPSCs and HFF-iPSCs were used as negative controls. Data are shown as the mean &#x000b1; SEM, <italic>n</italic> = 3. (G) No off-target mutations were observed in GC-iPSCs. Whole-genome sequencing was applied to detect potential off-target mutations in the GC-iPSC sample. NA, not applicable</p></caption><graphic xlink:href="13238_2019_623_Fig1_HTML" id="MO1"/></fig></p></sec><sec id="Sec2" sec-type="results"><title>RESULTS</title><sec id="Sec3"><title>Generation of non-integrative iPSCs from a CS patient</title><p id="Par6">We first isolated human primary fibroblasts from a Chinese CS patient and verified the presence of two nonsense mutations, c.643G&#x0003e;T (p.E215X) in exon 4 and c.3776C&#x0003e;A (p.S1259X) in exon 18, located at different alleles of the <italic>ERCC6</italic> gene by genomic DNA sequencing analysis (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>B). To generate patient-specific iPSCs (CS-iPSCs), a cocktail of integration-free episomal vectors expressing the reprogramming factors OCT4, SOX2, KLF4, L-MYC, LIN28, and sh-p53 was electroporated into fibroblasts according to a modified reprogramming protocol, as previously described (Hishiya and Watanabe, <xref ref-type="bibr" rid="CR20">2004</xref>; Okita et al., <xref ref-type="bibr" rid="CR41">2011</xref>; Liu et al., <xref ref-type="bibr" rid="CR34">2014</xref>; Ding et al., <xref ref-type="bibr" rid="CR11">2015</xref>; Fu et al., <xref ref-type="bibr" rid="CR15">2016</xref>; Wang et al., <xref ref-type="bibr" rid="CR68">2017</xref>; Ling et al., <xref ref-type="bibr" rid="CR30">2019</xref>). The derived iPSCs displayed normal karyotypes, and no residual episomal reprogramming vector element was detected in established CS-iPSCs (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>E and <xref rid="Fig2" ref-type="fig">2</xref>F). In addition, CS-iPSCs expressed comparable levels of pluripotency markers, including NANOG, OCT4, and SOX2 (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2</xref>B and <xref rid="Fig2" ref-type="fig">2</xref>C). After being implanted subcutaneously into immunocompromised mice, CS-iPSCs were able to form teratomas comprising cells from three germ lineages, as indicated by TUJ1, SMA and FOXA2 expression (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2</xref>D). These observations indicated that iPSCs bearing the CS-specific <italic>ERCC6</italic> mutation display normal pluripotency.<fig id="Fig2"><label>Figure&#x000a0;2</label><caption><p><bold>Characterization of CS-iPSCs and gene-corrected CS-iPSCs</bold>. (A) Western blot analysis showing increased protein levels of ERCC6 in GC-iPSCs. &#x003b2;-Actin was used as the loading control. (B) RT-PCR analysis of the pluripotency markers <italic>SOX2</italic>, <italic>OCT4</italic>, and <italic>NANOG</italic> in the CS-iPSCs and GC-iPSCs. 18S rRNA was used as the loading control. (C) Immunostaining of CS-iPSCs and GC-iPSCs for the pluripotency markers OCT4, NANOG, and SOX2. Nuclei were stained with Hoechst 33342. Scale bar, 50 &#x003bc;m. (D) Immunostaining of TUJ1 (ectoderm), SMA (mesoderm), and FOXA2 (endoderm) in teratomas derived from CS-iPSCs and GC-iPSCs. Nuclei were stained with Hoechst 33342. Scale bar, 50 &#x003bc;m. (E) The percentages of Ki67-positive cells in CS-iPSCs and GC-iPSCs were determined and compared. Nuclei were stained with Hoechst 33342. Scale bar, 50 &#x003bc;m. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, ns, not significant. (F) Cell cycle profiles showing comparable percentages of different cell cycle phases in CS-iPSCs and GC-iPSCs by PI staining. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3</p></caption><graphic xlink:href="13238_2019_623_Fig2_HTML" id="MO2"/></fig></p></sec><sec id="Sec4"><title>Targeted gene correction of the <italic>ERCC6</italic> mutation by CRISPR/Cas9 system</title><p id="Par7">To better elucidate the pathogenic mechanism underlying CS, we generated isogenic gene-corrected iPSC lines by targeted gene editing of one of the two compound heterozygous <italic>ERCC6</italic> mutations. Using the CRISPR/Cas9 system, we electroporated an expression vector encoding mCherry and a guide RNA targeting the mutation in exon 4, a plasmid for Cas9-2A-GFP, and the single-stranded oligodeoxynucleotide (ssODN) template into CS-iPSCs (Wang et al., <xref ref-type="bibr" rid="CR68">2017</xref>). After fluorescence-activated cell sorting (FACS) for mCherry (guide RNA) and GFP (Cas9) double-positive cells, gene-corrected CS-iPSC clones were successfully obtained (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>C). Site-specific gene correction of the c.643G&#x0003e;T mutation was confirmed by genomic DNA sequencing (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>D). As the exogenous repair template ssODN was designed to contain a silent mutation, the introduced silent mutation was also found in the GC-iPSC clones, further confirming successful gene editing at the corresponding genomic target sites (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>D). Similar to CS-iPSCs, we did not detect any residual episomal reprogramming vectors in GC-iPSCs (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>F). Whole-genome DNA sequencing indicated no mutations in potential off-target sites after gene editing (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>G). GC-iPSCs also showed a normal karyotype (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1</xref>E). Western blots demonstrated elevated levels of the ERCC6 protein in GC-iPSCs (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2</xref>A), implying that the correction of the pathogenic mutation recovered the protein expression of ERCC6. Additionally, GC-iPSCs normally expressed pluripotency markers, including OCT4, NANOG, and SOX2 (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2</xref>B and <xref rid="Fig2" ref-type="fig">2</xref>C), and formed teratomas <italic>in vivo</italic> (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2</xref>D). CS-iPSCs and GC-iPSCs were cultured for more than 50 passages without showing abnormal growth kinetics (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2</xref>E and <xref rid="Fig2" ref-type="fig">2</xref>F). Unlike the previous study (Andrade et al., <xref ref-type="bibr" rid="CR3">2012</xref>), we did not observe elevated cellular reactive oxygen species (ROS) in CS-iPSCs compared to GC-iPSCs (Fig. S3A). In addition, RT-qPCR demonstrated that the expression levels of genes involved in the oxidative stress response were comparable between GC-iPSCs and CS-iPSCs (Fig. S3B). Taken together, these results indicated that we successfully generated GC-iPSCs exhibiting normal pluripotency.</p></sec><sec id="Sec5"><title>Alleviation of aging defects in gene-corrected CS-MSCs</title><p id="Par8">CS patients frequently exhibit musculoskeletal abnormalities, such as kyphosis, contracture and osteoporosis (Hishiya and Watanabe, <xref ref-type="bibr" rid="CR20">2004</xref>; Karikkineth et al., <xref ref-type="bibr" rid="CR22">2017</xref>). MSCs&#x000a0;are&#x000a0;multipotent&#x000a0;mesodermal cells that can&#x000a0;differentiate&#x000a0;into a variety of mesodermal cell types, including&#x000a0;osteoblasts,&#x000a0;chondrocytes,&#x000a0;and&#x000a0;adipocytes, which serve as a good cell model for investigating the accelerated degeneration of&#x000a0;mesodermal tissues caused by genetic mutations (Liu et al., <xref ref-type="bibr" rid="CR34">2014</xref>; Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>, <xref ref-type="bibr" rid="CR79">2019</xref>; Kubben et al., <xref ref-type="bibr" rid="CR26">2016</xref>; Li et al., <xref ref-type="bibr" rid="CR29">2016</xref>; Pan et al., <xref ref-type="bibr" rid="CR45">2016</xref>; Geng et al., <xref ref-type="bibr" rid="CR16">2018</xref>; Wang et al., <xref ref-type="bibr" rid="CR70">2018b</xref>; Wu et al., <xref ref-type="bibr" rid="CR72">2018</xref>; Yan et al., <xref ref-type="bibr" rid="CR74">2019</xref>). Therefore, we first differentiated CS-iPSCs and GC-iPSCs into MSCs to investigate whether <italic>ERCC6</italic> mutations could result in accelerated attrition of the MSC pool. Both CS-MSCs and GC-MSCs were positive for mesenchymal progenitor markers, including CD73, CD90 and CD105 (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>A). Consistent with the successful correction of <italic>ERCC6</italic> gene mutation, increased ERCC6 protein content was observed in GC-MSCs (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>B). Next, we investigated whether normal ERCC6 activity is required for maintaining the cellular homeostasis of MSCs. Compared to isogenic gene-corrected control cells, CS-MSCs displayed features characteristic of premature senescence under replicative stress, including the early onset of cell growth arrest, reduced Ki67-positive cells, and increased senescence-associated (SA)-&#x003b2;-Gal activity (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>C&#x02013;E). In addition, the expression levels of senescence markers, including P16, P21 and IL-8, were upregulated, while the geroprotective proteins Lamin B1 and LAP2 were downregulated in CS-MSCs relative to GC-MSCs at late passages (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>F&#x02013;H). In line with the essential role of ERCC6 in NER, CS-MSCs exhibited increased expression of the DNA damage marker &#x003b3;H2AX (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>I), indicating compromised DNA repair in ERCC6-deficient MSCs. Next, we investigated whether CS-MSCs underwent accelerated attrition <italic>in vivo.</italic> Implanting CS-MSCs and GC-MSCs expressing luciferase into the tibialis anterior (TA) muscle of immunodeficient mice resulted in accelerated <italic>in vivo</italic> decay in CS-MSCs compared to GC-MSCs (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>J). Furthermore, we compared the multipotent differentiation potential of CS-MSCs and GC-MSCs. Relative to GC-MSCs, CS-MSCs exhibited impaired differentiation abilities towards osteoblasts, chondrocytes and white adipocytes (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>K and <xref rid="Fig3" ref-type="fig">3</xref>L). Altogether, these results showed that CS-MSCs displayed typical premature cellular senescence, which was rescued by the targeted correction of mutant <italic>ERCC6</italic>.<fig id="Fig3"><label>Figure&#x000a0;3</label><caption><p><bold>Alleviated cellular senescence in gene-corrected CS-MSCs</bold>. (A) FACS analysis indicating the expression of the cell surface markers CD73, CD90 and CD105 in CS-MSCs and GC-MSCs. ERCC6<sup>mut</sup> represents CS-MSCs, ERCC6<sup>GC</sup> represents GC-MSCs. (B) Western blot analysis showing increased protein levels of ERCC6 in GC-MSCs. &#x003b2;-Actin was used as the loading control. (C) Growth curves showing the cumulative population doublings of CS-MSCs and GC-MSCs. (D) Immunostaining of Ki67 showing the decreased cell proliferation of CS-MSCs compared to GC-MSCs. The percentages of Ki67-positive cells are shown in the right panel. Scale bar, 20 &#x003bc;m. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01, ***<italic>P</italic> &#x0003c; 0.001. EP, early passage (P6); LP, late passage (P28). (E) SA-&#x003b2;-Gal staining of CS-MSCs and GC-MSCs at EP (P6) and LP (P28), respectively. The percentages of SA-&#x003b2;-Gal-positive cells are shown in the right panel. Scale bar, 50 &#x003bc;m. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01, ns, not significant. (F) RT-qPCR analysis of the expression of senescence markers in CS-MSCs and GC-MSCs at passage 28. The mRNA levels were normalized to CS-MSCs. (G) Western blot analysis of P16, LAP2 and Lamin B1 in CS-MSCs and GC-MSCs. GAPDH was used as the loading control. (H) Immunostaining of LAP2 and Lamin B in CS-MSCs and GC-MSCs. The relative intensity of LAP2 was measured with ImageJ software, and the data are shown as the mean &#x000b1; SEM, ***<italic>P</italic> &#x0003c; 0.001. More than 300 nuclei for each group were used for calculations. Scale bar, 20 &#x003bc;m. a.u., arbitrary units. (I) Immunostaining of &#x003b3;H2AX in CS-MSCs and GC-MSCs. The relative intensity of &#x003b3;H2AX was measured with ImageJ software, and the data are shown as the mean &#x000b1; SEM, ***<italic>P</italic> &#x0003c; 0.001. More than 300 nuclei for each group were used for calculations. Scale bar, 20 &#x003bc;m. a.u., arbitrary units. (J) Accelerated attrition of CS-MSCs <italic>in vivo</italic> was detected by an <italic>in vivo</italic> imaging system (IVIS). CS-MSCs (1 &#x000d7; 10<sup>6</sup>, left) and GC-MSCs (1 &#x000d7; 10<sup>6</sup>, right) (passage 25) infected with luciferase lentivirus were injected into the tibialis anterior (TA) muscles of immunodeficient mice. Luciferase activities were imaged and quantified at days 0, 2, 4, and 6 after transplantation. Data are presented as the ratios of the luciferase intensity of CS-MSCs to that of GC-MSCs (fold), mean &#x000b1; SD, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01, ***<italic>P</italic> &#x0003c; 0.001. (K) Comparative analysis of the osteogenic, chondrogenic and adipogenic differentiation potential of CS-MSCs and GC-MSCs. Von Kossa, Alcian blue, and oil red O staining were used to characterize osteoblasts, chondrocytes, and adipocytes, respectively. Scale bar, 50 &#x003bc;m. (L) The intensity of von Kossa staining was calculated by ImageJ and compared in the left panel. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01. The cross-sectional area of chondrocyte spheres was measured and is shown in the middle panel. Data are presented as the mean &#x000b1; SD, <italic>n</italic> = 14, ***<italic>P</italic> &#x0003c; 0.001. The relative intensity of oil red O was measured and is shown in the right panel. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, ***<italic>P</italic> &#x0003c; 0.001</p></caption><graphic xlink:href="13238_2019_623_Fig3_HTML" id="MO3"/></fig></p></sec><sec id="Sec6"><title>Gene-corrected CS-MSCs display recovered DNA repair ability and resistance to UV-induced apoptosis and cell cycle arrest</title><p id="Par9">Next, we investigated whether mutations in <italic>ERCC6</italic> genes lead to impaired DNA damage repair ability after UV irradiation in MSCs. UV radiation usually results in the covalent dimerization of adjacent pyrimidines, typically thymine residues (thymine dimers), including cyclobutane pyrimidine dimers (CPDs) and (6-4) photoproducts (6-4PPs), in DNA (Setlow and Setlow, <xref ref-type="bibr" rid="CR50">1962</xref>; Friedberg, <xref ref-type="bibr" rid="CR14">2003</xref>; Cadet et al., <xref ref-type="bibr" rid="CR6">2005</xref>). Accordingly, we treated CS-MSCs and GC-MSCs with 10 J/m<sup>2</sup> UV irradiation and examined the levels of intranuclear CPDs by immunostaining. Both CS-MSCs and GC-MSCs showed low levels of CPDs in the absence of UV irradiation; however, CS-MSCs exhibited more CPD-positive cells than GC-MSCs did at 48 h after UV irradiation (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4</xref>A). These results demonstrated that CS-MSCs were deficient in eliminating CPD photolesions after UV-induced DNA damage, and this ability was restored by <italic>ERCC6</italic> correction. We then explored whether CS-MSCs are hypersensitive to UV-induced cellular apoptosis. CS-MSCs and GC-MSCs were cultured in the presence or absence of 10 J/m<sup>2</sup> UV irradiation. UV irradiation induced marked cellular apoptosis in CS-MSCs relative to GC-MSCs at 48 h after UV irradiation (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4</xref>B). Western blot analysis showed increased levels of cleaved PARP (c-PARP) in CS-MSCs following UV treatment (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4</xref>C). In addition, we treated MSCs with a lower dose (1 J/m<sup>2</sup>) of UV light at each passage starting from passage 4. In this context, relative to GC-MSCs, CS-MSCs displayed compromised self-renewal ability and increased SA-&#x003b2;-Gal-positive cells (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4</xref>D&#x02013;F), indicating that the ERCC6 deficiency rendered MSCs sensitive to replicative stress under low-dose chronic UV irradiation. Thus, CS-specific MSCs exhibited impaired DNA repair ability and increased susceptibility to UV-induced injury, and these phenotypes were rescued by the genetic correction of the pathogenic mutation.<fig id="Fig4"><label>Figure&#x000a0;4</label><caption><p><bold>Gene-corrected CS-MSCs display recovered DNA repair ability and counteract UV-induced apoptosis and senescence</bold>. (A) CPD immunostaining in CS-MSCs and GC-MSCs in the absence or presence of 10 J/m<sup>2</sup> UV exposure. Nuclei were stained with Hoechst 33342. Scale bar, 50 &#x003bc;m. More than 300 nuclei for each group were used for calculation. The data are shown as the mean &#x000b1; SEM, ns, not significant, ***<italic>P</italic> &#x0003c; 0.001. a.u., arbitrary units. (B) Apoptosis analysis of CS-MSCs and GC-MSCs at 48 h after 10 J/m<sup>2</sup> UV irradiation. Quantitative data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01, ***<italic>P</italic> &#x0003c; 0.001. (C) Western blots showing PARP cleavage in CS-MSCs and GC-MSCs in the absence or presence of 10 J/m<sup>2</sup> UV exposure. GAPDH was used as a loading control. Quantitative data are presented as the mean &#x000b1; SD, <italic>n</italic> = 3, ns, not significant, *<italic>P</italic> &#x0003c; 0.05. (D) Growth curves showing the cumulative population doublings of CS-MSCs and GC-MSCs in the absence (control) or presence (UV) of 1 J/m<sup>2</sup> UV exposure at each passage starting from passage 4. (E) Clonal expansion assay showing the cell proliferation ability of CS-MSCs and GC-MSCs in the absence (control) or presence (UV) of 1 J/m<sup>2</sup> UV exposure at passage 10. The cells were stained with crystal violet after two weeks of culture, and the relative intensity of the crystal violet staining was quantified. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, *<italic>P</italic> &#x0003c; 0.05, ***<italic>P</italic> &#x0003c; 0.001. (F) SA-&#x003b2;-Gal staining of CS-MSCs and GC-MSCs in the absence (control) or presence (UV) of 1 J/m<sup>2</sup> UV exposure at passage 10. The percentages of SA-&#x003b2;-Gal-positive cells are shown in the right panel. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01, ns, not significant</p></caption><graphic xlink:href="13238_2019_623_Fig4_HTML" id="MO4"/></fig></p></sec><sec id="Sec7"><title>Gene-corrected CS-NSCs display improved NER ability and reduced susceptibility to UV-induced apoptosis</title><p id="Par10">Due to the presence of obvious symptoms of neurodegeneration in CS patients (Cleaver et al., <xref ref-type="bibr" rid="CR9">2009</xref>; Natale, <xref ref-type="bibr" rid="CR39">2011</xref>; Laugel, <xref ref-type="bibr" rid="CR27">2013</xref>; Shehata et al., <xref ref-type="bibr" rid="CR52">2014</xref>), we next differentiated CS-iPSCs and GC-iPSCs into NSCs (referred to as CS-NSCs and GC-NSCs, respectively). Both CS-NSCs and GC-NSCs showed typical neural progenitor morphology and expressed the NSC markers Nestin, PAX6 and SOX2 (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5</xref>A). Western blots confirmed the increased protein expression of ERCC6 in GC-NSCs compared to that in uncorrected CS-NSCs (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5</xref>B). To investigate whether mutations in the <italic>ERCC6</italic> gene impair the DNA repair ability of NSCs, we treated CS-NSCs and GC-NSCs with 5 J/m<sup>2</sup> UV irradiation and then examined the levels of intranuclear CPDs. Similar to the results obtained with MSCs, higher levels of CPDs were observed in CS-NSCs than in GC-NSCs at 48 h after UV irradiation, indicating that targeted gene correction effectively rescued the hypersensitivity of CS-NSCs to UV irradiation (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5</xref>C). Consistent with this finding, gene correction resulted in decreased cellular apoptosis in CS-NSCs in the presence of UV treatment (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5</xref>D and <xref rid="Fig5" ref-type="fig">5</xref>E). Altogether, these results indicated that CS-NSCs, which are characterized by a DNA repair deficit, were prone to UV-induced apoptosis, while genetic correction resulted in the restoration of these phenotypic defects.<fig id="Fig5"><label>Figure&#x000a0;5</label><caption><p><bold>Gene-corrected CS-NSCs show increased NER ability and decreased susceptibility to UV-induced apoptosis</bold>. (A) Immunostaining of the NSC markers Nestin, PAX6, and SOX2 in the CS-NSCs and GC-NSCs. The nuclei were stained with Hoechst 33342. Scale bar, 50 &#x003bc;m. ERCC6<sup>mut</sup> represents CS-NSCs, ERCC6<sup>GC</sup> represents GC-NSCs. (B) Western blot analysis showing increased protein levels of ERCC6 in GC-NSCs. &#x003b2;-Actin was used as the loading control. (C) CPD immunostaining in CS-NSCs and GC-NSCs in the absence or presence of 5 J/m<sup>2</sup> UV exposure. Nuclei were stained with Hoechst 33342. Scale bar, 50 &#x003bc;m. Over 300 nuclei were used for calculations. The data are shown as the mean &#x000b1; SEM, ***<italic>P</italic> &#x0003c; 0.001. a.u., arbitrary units. (D) Apoptosis analysis of CS-NSCs and GC-NSCs at 48 h after 5 J/m<sup>2</sup> UV irradiation. Quantitative data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, *<italic>P</italic> &#x0003c; 0.05, ***<italic>P</italic> &#x0003c; 0.001. (E) Western blots showing PARP cleavage in CS-NSCs and GC-NSCs in the absence or presence of 5 J/m<sup>2</sup> UV exposure. GAPDH was used as a loading control. Quantitative data are presented as the mean &#x000b1; SD, <italic>n</italic> = 3, *<italic>P</italic> &#x0003c; 0.05, ns, not significant</p></caption><graphic xlink:href="13238_2019_623_Fig5_HTML" id="MO5"/></fig></p></sec><sec id="Sec8"><title>The <italic>ERCC6</italic> mutation results in gene expression changes associated with impaired DNA damage repair, chromatin disorganization, and compromised cell proliferation</title><p id="Par11">To investigate whether gene expression profiles were disrupted in CS-specific iPSCs, MSCs and NSCs, we performed genome-wide RNA sequencing (RNA-seq) analysis (Figs.&#x000a0;<xref rid="Fig6" ref-type="fig">6</xref>, S1 and S2). Principal component analysis (PCA) showed that the RNA profiles of MSCs, iPSCs and NSCs were separated as three independent subgroups (Fig.&#x000a0;<xref rid="Fig6" ref-type="fig">6</xref>A), implying the existence of unique RNA expression patterns in each cell type. While there were minimal gene expression changes between CS-iPSCs and GC-iPSCs and between CS-NSCs and GC-NSCs, the mutation of <italic>ERCC6</italic> resulted in marked changes in the transcriptome of MSCs (Figs.&#x000a0;<xref rid="Fig6" ref-type="fig">6</xref>B and S1C). These observations were in line with the most striking phenotypes in CS-MSCs relative to their gene-corrected counterparts under basal culture conditions (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>C&#x02013;E). UV treatment results in an increased difference in transcriptional profiles between GC-MSCs and CS-MSCs and between GC-NSCs and CS-NSCs (Figs.&#x000a0;<xref rid="Fig6" ref-type="fig">6</xref>B and S1C). Notably, UV treatment induced dramatic gene expression changes in CS-specific MSCs and CS-specific NSCs (Fig. S1E), which were associated with increased DNA damage, impaired transcription, and compromised cell growth; these changes, however, became insensitive in <italic>ERCC6</italic>-corrected MSCs and NSCs, indicating that gene correction resulted in the restoration of normal transcriptional and DNA repair activity under DNA damage stress (Fig.&#x000a0;<xref rid="Fig6" ref-type="fig">6</xref>C). After extensive passaging, we also observed a panel of upregulated genes related to cell division and DNA damage repair in <italic>ERCC</italic>6-corrected MSCs compared to diseased MSCs (Fig.&#x000a0;<xref rid="Fig6" ref-type="fig">6</xref>D), which is in line with the rescue of premature cellular senescence in gene-corrected MSCs (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3</xref>C&#x02013;J). Collectively, these transcriptomic changes support the improved cell proliferation and increased DNA damage repair ability in <italic>ERCC6</italic>-corrected adult stem cells.<fig id="Fig6"><label>Figure&#x000a0;6</label><caption><p><bold>The global gene expression profiles of CS-iPSCs and gene-corrected CS-iPSCs and their adult stem cell derivatives</bold>. (A) PCA of CS cells and GC cells in the absence or presence of UV (Ctrl or UV), as well as under replicative senescence (RS) stress. Each point represents a sample. Data points were computed based on Log<sub>2</sub>(FPKM + 1). (B) Volcano plots showing the differentially expressed genes between CS-iPSCs and GC-iPSCs, between CS-MSCs and GC-MSCs, and between CS-NSCs and GC-NSCs in the absence of UV (the upper panel) or in the presence of UV (the lower panel, UV), or under RS stress (the lower panel, RS). Red represents upregulated genes, and blue represents downregulated genes. (C) Gene Ontology Biological Process (GO-BP) enrichment analysis of significantly upregulated/downregulated genes in GC-MSCs compared to CS-MSCs upon UV treatment. Red represents upregulated genes, and blue represents downregulated genes. (D) Gene Ontology Biological Process (GO-BP) enrichment analysis of significantly upregulated/downregulated genes in GC-MSCs compared to CS-MSCs under RS stress. Red represents upregulated genes, and blue represents downregulated genes</p></caption><graphic xlink:href="13238_2019_623_Fig6_HTML" id="MO6"/></fig></p></sec><sec id="Sec9"><title>Gene-corrected CS-MSCs produced in accordance with cGMP compliance guidelines show alleviated senescence and increased resistance to UV-induced apoptosis</title><p id="Par12">Human mesenchymal stem cells hold the potential to be used for the treatment of aging-related disorders (Orozco et al., <xref ref-type="bibr" rid="CR42">2011</xref>, <xref ref-type="bibr" rid="CR43">2013</xref>, <xref ref-type="bibr" rid="CR44">2014</xref>; Golpanian et al., <xref ref-type="bibr" rid="CR17">2016</xref>, <xref ref-type="bibr" rid="CR18">2017</xref>; Tompkins et al., <xref ref-type="bibr" rid="CR58">2017</xref>; Yang et al., <xref ref-type="bibr" rid="CR75">2017</xref>; Yan et al., <xref ref-type="bibr" rid="CR74">2019</xref>). We next tested whether ERCC6-corrected CS-MSCs can be produced under a cGMP-compliant condition. Accordingly, we derived MSCs from iPSCs using a serum-free, animal component-free differentiation medium. The differentiation protocol was slightly modified from the serum-containing procedure (see experimental method). FACS analysis demonstrated that the derived MSCs expressed the mesenchymal progenitor cell-specific markers CD73, CD90 and CD105 (Fig.&#x000a0;<xref rid="Fig7" ref-type="fig">7</xref>A). The absence of pluripotent stem cell contamination in the derived MSCs was verified by RT-qPCR and immunostaining assays (Fig.&#x000a0;<xref rid="Fig7" ref-type="fig">7</xref>B and <xref rid="Fig7" ref-type="fig">7</xref>C). Whole-genome DNA sequencing further validated the genomic integrity during somatic cell reprogramming, gene correction, and directed differentiation to MSCs (Fig.&#x000a0;<xref rid="Fig7" ref-type="fig">7</xref>D and <xref rid="Fig7" ref-type="fig">7</xref>E). Sterility and pathogen testing demonstrated that there was no endotoxin, mycoplasma, bacteria, or virus contamination in the culture medium of the GC-MSCs (Fig.&#x000a0;<xref rid="Fig7" ref-type="fig">7</xref>F). To evaluate any potential risk of tumorigenesis <italic>in vivo</italic>, immunodeficient mice were subcutaneously injected with the <italic>ERCC6</italic>-corrected MSCs. Human ESC (line H9) and U2-OS osteosarcoma cell lines were implanted independently as positive controls. We observed that the GC-MSCs failed to form tumors, even at 8 months after implantation, in contrast with the teratomas formed from hESCs and tumors formed from U2-OS cells at 2 months post-injection (Fig.&#x000a0;<xref rid="Fig7" ref-type="fig">7</xref>G).<fig id="Fig7"><label>Figure&#x000a0;7</label><caption><p><bold>Safety analysis of gene-corrected CS-MSCs obtained under a cGMP-compliant condition</bold>. (A) FACS analysis indicated the expression of the cell surface markers CD73, CD90 and CD105 in CS-MSCs and GC-MSCs. (B) RT-qPCR analysis of the expression of pluripotency markers <italic>OCT4</italic>, <italic>NANOG</italic>, and <italic>SOX2</italic> in CS-MSCs and GC-MSCs. GC-iPSCs and CS-fibroblasts were used as positive and negative controls, respectively. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3. (C) Immunostaining of the pluripotency marker NANOG in CS-MSCs and GC-MSCs. GC-iPSCs were used as a positive control, Scale bar, 50 &#x003bc;m. (D) Whole-genome sequencing of single-nucleotide variants (SNVs) in CS-fibroblasts, CS-iPSCs, GC-iPSCs, CS-MSCs and GC-MSCs. Sites with a heterozygosity percentage ranging between 0% and 30% were considered as SNV sites, and sites with a heterozygosity of &#x0003e;30% were considered as single-nucleotide polymorphisms (SNPs). (E) Whole-genome sequencing of copy number variations (CNVs) in CS-fibroblasts, CS-iPSCs, GC-iPSCs, CS-MSCs and GC-MSCs. Each point represents normalized coverage depth of each 500-kb genomic region of each chromosome. (F) Sterility and pathogen testing of the conditioned medium of GC-MSCs. <sup>a</sup> Endotoxin was identified as negative when the concentration was &#x0003c; 0.25 EU/mL. <sup>b</sup> CMV was identified as negative when the ratio of the OD<sub>450</sub> value of sample to the cut-off value (S/Co) was &#x0003c; 1.0. <sup>c</sup> HAV was identified as negative when the ratio of the cut-off value to the OD450 nm value of the sample (Co/S) was &#x0003c; 0.9. <sup>d</sup> HCV was identified as negative when the ratio of the OD<sub>450</sub> value of the sample to the cut-off value (S/Co) was &#x0003c; 0.9. <sup>e</sup> HIV-1 was identified as negative when the concentration = 0 pg/mL. (G) Evaluation of the potential tumorigenesis risk of GC-MSCs <italic>in vivo</italic>. A subcutaneous injection of GC-MSCs was performed in immune-deficient mice. Human ESC (line H9) and U2-OS osteosarcoma cell lines were also implanted independently as positive controls. Representative images in the lower panel showing the teratoma and tumor formed from positive cells two months after transplantation, Scale bar, 0.5 cm. HE staining of a teratoma and tumor were shown in the upper panel. Scale bar, 100 &#x003bc;m. The <italic>in vivo</italic> tumor-formation incidence of each cell type was calculated. <italic>n</italic> = 4 for each positive cell group, <italic>n</italic> = 5 for the GC-MSC group</p></caption><graphic xlink:href="13238_2019_623_Fig7_HTML" id="MO7"/></fig></p><p id="Par13">Phenotypically, compared to diseased MSCs, gene-corrected MSCs generated following the cGMP compliance standard displayed increased cell proliferation and attenuated cellular senescence (Fig.&#x000a0;<xref rid="Fig8" ref-type="fig">8</xref>A and <xref rid="Fig8" ref-type="fig">8</xref>B). In addition, the GC-MSCs were insensitive to UV-induced apoptosis (Fig.&#x000a0;<xref rid="Fig8" ref-type="fig">8</xref>C and <xref rid="Fig8" ref-type="fig">8</xref>D). Consistent with an improved activity, these GC-MSCs exhibited better tri-lineage differentiation potential towards osteoblasts, chondrocytes and adipocytes (Fig. S3C&#x02013;D). A fat pad implantation assay further demonstrated the superior <italic>in vivo</italic> neovascularization ability of GC-MSCs (Fig.&#x000a0;<xref rid="Fig8" ref-type="fig">8</xref>E). Altogether, we successfully generated <italic>ERCC6</italic>-corrected MSCs with normal functional activity under a cGMP-compliant condition.<fig id="Fig8"><label>Figure&#x000a0;8</label><caption><p><bold>Gene-corrected CS-MSCs generated under a cGMP-compliant condition displayed alleviated aging defects and decreased susceptibility to UV-induced apoptosis</bold>. (A) Clonal expansion assay showing the cell proliferation ability of CS-MSCs and GC-MSCs. The cells were stained with crystal violet after a two-week culture, and the relative intensity of the crystal violet was quantified. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 4, **<italic>P</italic> &#x0003c; 0.01. Scale bar, 50 &#x003bc;m. (B) SA-&#x003b2;-Gal staining of CS-MSCs and GC-MSCs. The percentages of SA-&#x003b2;-Gal-positive cells are shown in the right panel. Data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, **<italic>P</italic> &#x0003c; 0.01. Scale bar, 50 &#x003bc;m. (C) Apoptosis analysis of CS-MSCs and GC-MSCs 48 h after 10 J/m<sup>2</sup> UV irradiation. Quantitative data are presented as the mean &#x000b1; SEM, <italic>n</italic> = 3, ***<italic>P</italic> &#x0003c; 0.001. (D) Western blots showing PARP cleavage of CS-MSCs and GC-MSCs in the presence of 10 J/m<sup>2</sup> UV exposure. &#x003b2;-Actin was used as a loading control. (E) Fat pad transplantation with CS-MSCs and GC-MSCs. Left: representative immunofluorescent images showing neovascularization; right: the number of hCD31-positive vessels calculated based on 24 slices from inconsecutive frozen sections. Data are presented as the mean &#x000b1; SD, <italic>n</italic> = 3 for each group, **<italic>P</italic> &#x0003c; 0.01. Scale bar, 50 &#x003bc;m</p></caption><graphic xlink:href="13238_2019_623_Fig8_HTML" id="MO8"/></fig></p></sec></sec><sec id="Sec10" sec-type="discussion"><title>DISCUSSION</title><p id="Par14">Although several mouse models exhibiting the clinical symptoms of CS have been generated and have provided valuable insights into the disease mechanism, there are still many differences in clinical features between CS patients and mouse models. For instance, in contrast to human CS patients, who do not develop skin cancer, <italic>ERCC6</italic> mutant mice show increased susceptibility to skin cancer (van der Horst et al., <xref ref-type="bibr" rid="CR60">1997</xref>, <xref ref-type="bibr" rid="CR61">2002</xref>). Thus, CS mouse models do not fully mimic the pathophysiology of CS patients, and the knowledge learned from animal models may be poorly translated to the clinic. CS patient-specific iPSCs were initially obtained by reprogramming fibroblasts from CS patients using retroviral vectors, and these cells exhibited an elevated cell death rate and increased ROS production (Andrade et al., <xref ref-type="bibr" rid="CR3">2012</xref>). Our study, however, did not identify increased oxidative stress or altered levels of <italic>TXNIP</italic> (Fig. S3A and S3B). These differences may be attributed to the reprogramming vectors. Luciana et al. used retroviral vectors, which may result in random genomic integration and genomic instability during the reprograming process. In addition, the same research group recently reported that CS-iPSC-derived neurons display reduced synapse density and altered neural network synchrony (Vessoni et al., <xref ref-type="bibr" rid="CR65">2016</xref>). Again, this study was based on a retroviral vector-mediated somatic reprograming technique. More importantly, due to the lack of an isogenic &#x0201c;disease-free&#x0201d; control iPSC line, it is hard to determine whether the phenotypic differences are caused by <italic>ERCC6</italic> gene mutations or genetic background variations between CS patients and control individuals. To faithfully recapitulate human CS pathogenesis, a reliable human iPSC-based disease model with isogenic gene-corrected cells is required. In this study, we generated transgene-free iPSCs from the fibroblasts of a CS patient bearing newly identified heterozygous disease-causing mutations in the <italic>ERCC6</italic> gene and obtained isogenic gene-corrected iPSCs using the CRISPR/Cas9 system. These iPSCs were further differentiated into two types of adult stem cells, MSCs and NSCs, which presented a panel of new disease phenotypes.</p><p id="Par15">Although previous studies have reported that the deficiency of functional DNA repair proteins may hinder somatic cell reprogramming and teratoma formation <italic>in vivo</italic> (i.e., WRN (Shimamoto et al., <xref ref-type="bibr" rid="CR53">2014</xref>; Wang et al., <xref ref-type="bibr" rid="CR71">2018c</xref>), p53 (Kawamura et al., <xref ref-type="bibr" rid="CR23">2009</xref>), and Fanconi genes (Muller et al., <xref ref-type="bibr" rid="CR37">2012</xref>)), we did not observe any defects in the derivation or pluripotency of CS patient-specific iPSC lines. Moreover, <italic>ERCC6</italic> gene mutations did not compromise the chromosomal integrity of iPSCs, as indicated by karyotype analysis. Our study also provides proof of concept that CRISPR/Cas9-mediated gene editing may be amenable to correcting <italic>ERCC6</italic> mutation in a therapeutic context. Whole-genome DNA sequencing demonstrated minimal mutational load in patient iPSCs after targeted gene correction.</p><p id="Par16">Although CS patients exhibit musculoskeletal abnormalities (Hishiya and Watanabe, <xref ref-type="bibr" rid="CR20">2004</xref>), there are limited reports concerning mesodermal cells. Using an iPSC-based system, we have for the first time generated CS-specific MSCs that display differentiation potential towards osteoblasts, chondrocytes and white adipocytes, and these cells serve as a good cell model to study mesodermal abnormalities in CS patients. Consistent with the premature degeneration of mesenchymal progenitor cells, CS-MSCs exhibit decreased cell proliferation, accelerated senescence, and compromised differentiation ability towards osteoblasts, chondrocytes and white adipocytes, which may constitute one of the causes of the observed defects in the musculoskeletal system. In addition, in agreement with previous reports showing confounding defects in the neural system in CS patients (Cleaver et al., <xref ref-type="bibr" rid="CR9">2009</xref>; Natale, <xref ref-type="bibr" rid="CR39">2011</xref>; Laugel, <xref ref-type="bibr" rid="CR27">2013</xref>; Sacco et al., <xref ref-type="bibr" rid="CR49">2013</xref>; Ciaffardini et al., <xref ref-type="bibr" rid="CR8">2014</xref>; Vessoni et al., <xref ref-type="bibr" rid="CR65">2016</xref>), our data indicated severe DNA repair defects and increased susceptibility to UV-induced apoptosis in CS-iPSC-derived NSCs, therefore providing in-depth mechanistic insights into CS-associated neurological disorders.</p><p id="Par17">Regarding the molecular mechanism, we have generated the first <italic>ERCC6</italic> mutation-associated disease transcriptome landscapes of human MSCs and NSCs using an isogenic iPSC-based research system. Under normal culture conditions, mutation of <italic>ERCC6</italic> resulted in the most dramatic gene expression changes in MSCs relative to NSCs and iPSCs. Consistent with this finding, CS-specific MSCs demonstrated cell type-specific accelerated senescence after serial passaging. These results suggest that the attrition of the MSC pool and the resulting mesodermal defects are a major syndrome of CS. UV radiation generates photoproducts in genomic DNA that promote genetic mutations that contribute to skin carcinogenesis or cellular senescence (Amaro-Ortiz et al., <xref ref-type="bibr" rid="CR1">2014</xref>; Kemp et al., <xref ref-type="bibr" rid="CR24">2017</xref>). In this study, we found that <italic>ERCC6</italic> mutant MSCs and NSCs were highly susceptible to UV radiation. A defect in the initiation of transcription by RNAPII in UV-treated CS and XP/CS cells has been observed in previous studies (Rockx et al., <xref ref-type="bibr" rid="CR48">2000</xref>; Yamada et al., <xref ref-type="bibr" rid="CR73">2002</xref>; Proietti-De-Santis et al., <xref ref-type="bibr" rid="CR47">2006</xref>; Velez-Cruz et al., <xref ref-type="bibr" rid="CR64">2013</xref>). In line with these results,&#x000a0;we observed that transcriptional blockage was rescued in gene-corrected CS-MSCs after UV irradiation. In addition, the presence of the <italic>ERCC6</italic> mutation is associated with defects in gene expression linked to &#x0201c;cellular response to DNA damage&#x0201d;, &#x0201c;cellular response to stress&#x0201d; and &#x0201c;cell division&#x0201d;, indicating that the defective DNA repair in CS-specific adult stem cells mediates UV-induced cell phenotypic abnormalities. In addition, the mutation of <italic>ERCC6</italic> also led to gene expression changes related to &#x0201c;regulation of chromatin organization&#x0201d; in both NSCs and MSCs. Therefore, the pathogenesis of CS may involve a complex interplay among defects in DNA damage repair, chromatin organization, and cell cycle control.</p><p id="Par18">In the context of disease therapy, stem cell-based replacement therapy holds great promise toward restoring tissue homeostasis, e.g., for premature aging disorders (Golpanian et al., <xref ref-type="bibr" rid="CR18">2017</xref>; Tompkins et al., <xref ref-type="bibr" rid="CR58">2017</xref>). We and others have produced adult stem cells and other terminally differentiated cells from iPSCs derived from various human aging-related disorders, including Hutchinson-Gilford progeria syndrome (HGPS), Werner syndrome (WS), Fanconi anemia (FA), XP, amyotrophic lateral sclerosis (ALS), and Parkinson&#x02019;s disease (PD) (Liu et al., <xref ref-type="bibr" rid="CR31">2011a</xref>, <xref ref-type="bibr" rid="CR33">2012</xref>, <xref ref-type="bibr" rid="CR34">2014</xref>; Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>; Fu et al., <xref ref-type="bibr" rid="CR15">2016</xref>; Wang et al., <xref ref-type="bibr" rid="CR68">2017</xref>). Using targeted gene editing techniques, we have also edited/corrected pathogenic mutations in these patient-derived iPSCs (Liu et al., <xref ref-type="bibr" rid="CR32">2011b</xref>, <xref ref-type="bibr" rid="CR33">2012</xref>, <xref ref-type="bibr" rid="CR34">2014</xref>; Wang et al., <xref ref-type="bibr" rid="CR68">2017</xref>). MSCs can differentiate into osteoblasts,&#x000a0;chondrocytes,&#x000a0;myocytes and&#x000a0;adipocytes. Previous studies have shown that MSCs ameliorate aging frailty in clinical trials (Golpanian et al., <xref ref-type="bibr" rid="CR17">2016</xref>, <xref ref-type="bibr" rid="CR18">2017</xref>; Tompkins et al., <xref ref-type="bibr" rid="CR58">2017</xref>). Recently, the generation of allogeneic or autologous MSCs from pluripotent stem cells has emerged as a promising new strategy for stem cell-based therapy (Yang et al., <xref ref-type="bibr" rid="CR75">2017</xref>; Castro-Vinuelas et al., <xref ref-type="bibr" rid="CR7">2018</xref>; Soontararak et al., <xref ref-type="bibr" rid="CR54">2018</xref>; Yan et al., <xref ref-type="bibr" rid="CR74">2019</xref>). In the present study, we have derived MSCs from gene-corrected CS-iPSCs under a cGMP-compliant condition. These MSCs demonstrated superior cellular activity compared to uncorrected diseased cells, retained high genomic stability, and did not form tumors <italic>in vivo</italic>. Therefore, clinical-grade GC-MSCs may represent important biomaterials for achieving autologous stem cell treatment for CS.</p><p id="Par19">In summary, the isogenic CS stem cell models established in this study provide a valuable platform for studying CS pathogenesis, discovering innovative drugs, and the development of new cell replacement therapies. The transcriptomic profiles underlying disease phenotypes may be useful for discovering biomarkers for diagnosis and the development of new therapeutic approaches.</p></sec><sec id="Sec11" sec-type="materials|methods"><title>MATERIALS AND METHODS</title><sec id="Sec12"><title>Antibodies and reagents</title><p id="Par20">The primary antibodies used were as follows (company, catalogue number): anti-ERCC6 (Abcam, ab96098), anti-NANOG (Abcam, ab21624), anti-SOX2 (Santa Cruz, sc-17320), anti-OCT4 (Santa Cruz, sc-5279), anti-SMA (Sigma, A5228), anti-TUJ1 (Sigma, T2200), anti-FOXA2 (Cell Signaling Technology, 8186S), anti-CD90-FITC (BD Bioscience, 555595), anti-CD73-PE (BD Bioscience, 550257), anti-CD105-APC (BD Bioscience, 17-1057-42), anti-IgG-FITC (BD Biosciences, 555748), anti-IgG-PE (BD Biosciences, 555749), anti-IgG-APC (BD Biosciences, 555751), anti-Lamin B (Santa Cruz, sc-6217), anti-LAP2 (BD Bioscience, 611000), anti-Ki67 (ZSGB-BIO, ZM0166), anti-P16 (BD Bioscience, 550834), anti-&#x003b3;-H2AX (Millipore, 05-636), anti-Nestin (Millipore, MAB5326), anti-PAX6 (Covance, PRB-278P), anti-CPD (Cosmo Bio, TMD-2), anti-cleaved PARP (Cell Signaling Technology, 9541), anti-&#x003b2;-Actin (Santa Cruz, sc69879), anti-GAPDH (Santa Cruz, sc-25778), and anti-hCD31 (BD Bioscience, 555445).</p></sec><sec id="Sec13"><title>Generation and genotyping of CS-specific fibroblasts</title><p id="Par21">CS-specific fibroblasts were generated from the skin biopsy of a CS patient carrying two heterozygous <italic>ERCC6</italic> mutations: c.643G&#x0003e;T in exon 4 and c.3776C&#x0003e;A in exon 18. Fibroblasts were cultured with high-glucose DMEM (HyClone) containing 10% fetal bovine serum (FBS, Gemcell), 1% penicillin/streptomycin (Gibco), and 0.1 mmol/L non-essential amino acids (Gibco). Genotyping of CS-specific fibroblasts was performed using a genomic DNA PCR assay with the primers listed in Table S1. Genomic DNA from the fibroblasts of healthy donor was used as a control, as previously described (Fu et al., <xref ref-type="bibr" rid="CR15">2016</xref>).</p></sec><sec id="Sec14"><title>iPSC generation and culture</title><p id="Par22">CS patient-specific iPSCs were generated by the electroporation of fibroblasts with episomal vectors, including pCXLE-hSK, pCXLE-hOCT3/4-shp53-F and pCXLE-hUL, as previously described (Okita et al., <xref ref-type="bibr" rid="CR41">2011</xref>; Liu et al., <xref ref-type="bibr" rid="CR33">2012</xref>, <xref ref-type="bibr" rid="CR34">2014</xref>; Fu et al., <xref ref-type="bibr" rid="CR15">2016</xref>; Wang et al., <xref ref-type="bibr" rid="CR68">2017</xref>). The derived iPSC lines were cultured on mitomycin C-treated MEF feeder cells in human ESC medium or on Matrigel (BD Biosciences)-coated plates in mTeSR medium (STEMCELL Technology). The ESC medium consisted of DMEM/F12 (Invitrogen) supplemented with 20% KnockOut Serum Replacement (Invitrogen), 0.1 mmol/L non-essential amino acids (NEAA, Invitrogen), 1% penicillin/streptomycin (Gibco), 2 mmol/L GlutaMAX (Invitrogen), 55 &#x003bc;mol/L &#x003b2;-mercaptoethanol (Invitrogen), and 10 ng/mL bFGF (Joint Protein Central).</p></sec><sec id="Sec15"><title>Plasmid construction</title><p id="Par23">Guide RNA (gRNA) was designed with <ext-link ext-link-type="uri" xlink:href="http://crispr.mit.edu">http://crispr.mit.edu</ext-link>. The gRNAs were cloned into the pCAG-mCherry-gRNA vector (Addgene #87110). For the expression of Cas9 and GFP (Cas9-2A-GFP), the pCAG-1BPNLS-Cas9-1BPNLS-2AGFP plasmid (Addgene #87109) was used (Suzuki et al., <xref ref-type="bibr" rid="CR55">2016</xref>). The sequences for the gRNA target and ssODN used to repair mutant alleles are as follows: Exon 4-gRNA: GGATCACGCCAGTCTGGAGTAGG. <italic>ERCC6</italic>-ssODN, 5&#x02032;-CTAAAGAGACACCCTCCACTGACTACAGGCATCAGGCATCAATTCAAGAACACAGAGAAACTGCTCCTAGCATCCTCACCTGCATCCTCtTCCAGACTGGCGTGATCTAGTTCAATTTTCACCTCTG-3&#x02032;.</p></sec><sec id="Sec16"><title>Targeted gene correction in CS-iPSCs via the CRISPR/Cas9 system</title><p id="Par24">CRISPR/Cas9-mediated gene correction of <italic>ERCC6</italic> mutation was performed as previously described with some modifications (Peters et al., <xref ref-type="bibr" rid="CR46">2008</xref>). Briefly, 5 &#x000d7; 10<sup>6</sup> iPSCs were resuspended in 100 &#x003bc;L of Opti-MEM (Gibco) supplemented with 8 &#x003bc;g of Cas9-2A-GFP, 4 &#x003bc;g of gRNA-mCherry, and 8 &#x003bc;g of ssODN. After electroporation, the cells were cultured on Matrigel-coated plates in mTeSR medium. At forty-eight hours after electroporation, mCherry<sup>+</sup>/GFP<sup>+</sup> cells were collected by FACS and replated onto MEF feeder cells. Two weeks later, the iPSC clones were picked and identified by genomic DNA PCR and sequencing. The primers used are listed in Table S1.</p></sec><sec id="Sec17"><title>MSC generation and characterization</title><p id="Par25">The differentiation of CS-iPSCs and GC-iPSCs into MSCs was performed as previously described (Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>; Pan et al., <xref ref-type="bibr" rid="CR45">2016</xref>; Wang et al., <xref ref-type="bibr" rid="CR70">2018b</xref>). Briefly, embryoid bodies were plated onto Matrigel-coated plates in differentiation medium (&#x003b1;MEM (Invitrogen) supplemented with 10% FBS (Gemcell), 10 ng/mL bFGF (Joint Protein Central, JPC), 5 ng/mL TGF&#x003b2; (Human Zyme), 0.1 mmol/L NEAA (Gibco) and 1% penicillin/ streptomycin (Gibco)). The differentiated cells were then subjected to FACS to purify the CD73/CD90/CD105 (MSC-specific surface markers) triple-positive MSCs. The purified MSCs were then cultured in &#x003b1;MEM medium supplemented with 10% FBS, 1 ng/mL bFGF, 1% penicillin/streptomycin, and 0.1 mmol/L NEAA.</p><p id="Par26">Clinical-grade MSC differentiation and culture were performed in the cGMP level cell culture facility (Clinical-grade Stem Cell Research Center, Peking University Third Hospital) following the cGMP compliance guidelines. First, differentiation of iPSCs into MSCs was achieved using process similar to that used for general MSCs except prepared in a xeno-free and serum-free condition. Briefly, embryoid bodies were plated onto vitronectin (Gibco, A14700)-coated plates in differentiation medium (BM MSC medium (Dakewe, DKW34-BM20500) supplemented with 5% serum replacement (Helios, GMP grade, HPCFDCGL50), 5 ng/mL TGF&#x003b2; (Human Zyme), 6 ng/mL bFGF (Joint Protein Central, JPC), 10 ng/mL EGF (Joint Protein Central, JPC), 10 ng/mL PDGF (Joint Protein Central, JPC) and 1% penicillin/streptomycin (Gibco)). Next, the differentiated cells were subjected to FACS to purify the CD73/CD90/CD105 triple-positive MSCs. The purified MSCs were then cultured in BM MSC medium supplemented with 5% serum replacement and 1% penicillin/streptomycin.</p><p id="Par27">The differentiation potential of the MSCs towards chondrocytes, osteoblasts and adipocytes was evaluated by staining with Alcian blue (chondrogenesis), von Kossa (osteogenesis) and an oil red O (adipogenesis) kit (IHC World) after differentiation of the indicated lineage, as previously described (Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>; Pan et al., <xref ref-type="bibr" rid="CR45">2016</xref>; Wang et al., <xref ref-type="bibr" rid="CR70">2018b</xref>).</p></sec><sec id="Sec18"><title>Sterility and pathogen testing of MSCs generated under a cGMP-compliant condition</title><p id="Par28">The conditioned medium of GC-MSCs was collected for the following test. Cell debris in the conditioned medium was removed by centrifugation at 12,000 rpm and 4 &#x000b0;C for 5 min. In addition, the cell culture supernatant was immediately assayed. For CMV, HAV, HCV and HIV-1 ELISA detection, the optical density (O.D.) value for each sample was determined using a microplate reader set to 450 nm (OD<sub>450</sub>). The duplicate readings for each standard, control, and experimental sample were averaged, and the average zero standard O.D. was subtracted.<list list-type="alpha-lower"><list-item><p id="Par29">Mycoplasma detection</p></list-item></list></p><p id="Par30">Mycoplasma in the supernatant of the conditioned medium was detected by PCR. The primer sequences are listed in Table S1.<list list-type="simple"><list-item><label>b.</label><p id="Par31">Endotoxin detection</p></list-item></list></p><p id="Par32">Endotoxin in the supernatant of the conditioned medium was detected with the ToxinSensor Gel Clot Endotoxin Assay Kit (GenScript, Cat. No. L00351) according to the manufacturer&#x02019;s protocol. Briefly, 100 &#x003bc;L of the supernatants from the positive control (PC), negative control (NC) or experimental samples was transferred to the LAL reagent. The vials were capped and mixed thoroughly. All vials were placed in the incubation rack and incubated at 37 &#x000b0;C for 60 min. Then, the vials were inverted and checked to determine whether a gel was formed. The formation of the gel was considered endotoxin positive. The endotoxin level in the positive sample was higher than 0.25 EU/mL.<list list-type="simple"><list-item><label>c.</label><p id="Par33">CMV detection</p></list-item></list></p><p id="Par34">CMV IgM in the conditioned medium was detected by ELISA (MEDSON) according to the manufacturer&#x02019;s instructions. Briefly, 100 &#x003bc;L of the supernatants from the PC, NC or experimental samples was pipetted onto the microplate. After incubation with antigen and conjugate solution, the absorbance of the samples was determined at 450 nm. The test results are interpreted as a ratio of the sample (S) OD450 nm and the cut-off (Co) value (S/Co) according to the following standard: S/Co &#x0003c; 1.0 was considered negative; S/Co &#x0003e; 1.2 was considered positive. Co = NC + 0.25.<list list-type="simple"><list-item><label>d.</label><p id="Par35">HAV detection</p></list-item></list></p><p id="Par36">HAV IgM and IgG in the conditioned medium were detected by ELISA (DIA. PRO) following the manufacturer&#x02019;s protocol. Briefly, 100 &#x003bc;L of the supernatants from the PC, NC or experimental samples was pipetted onto the microplate. After incubation with antigen and conjugate solution, the absorbance of the samples was determined at 450 nm. The test results are interpreted as the ratio of the cut-off value to the sample OD<sub>450</sub> (Co/S) according to the following standard: Co/S &#x0003c; 0.9 was considered negative; Co/S &#x0003e; 1.1 was considered positive. Co = (NC + PC) / 3.<list list-type="simple"><list-item><label>e.</label><p id="Par37">HCV detection</p></list-item></list></p><p id="Par38">HCV IgM and IgG in the conditioned medium were detected by ELISA (DIA. PRO) according to the manufacturer&#x02019;s guidelines. First, 100 &#x003bc;L of the supernatants from the PC, NC or experimental samples was pipetted onto the microplate. After incubation with antigen and conjugate solution, the test results are interpreted as the ratio of OD<sub>450</sub> of the sample to the cut-off value (S/Co) according to the following standard: S/Co &#x0003c; 0.9 was considered negative; S/Co &#x0003e; 1.1 was considered positive. Co = NC + 0.35.<list list-type="simple"><list-item><label>f.</label><p id="Par39">HIV-1 detection</p></list-item></list></p><p id="Par40">HIV-1 Gap p24 in the conditioned medium was detected by ELISA (R&#x00026;D SYSTEMS) according to the manufacturer&#x02019;s protocol. Briefly, 100 &#x003bc;L of the supernatants from the standard, control or experimental samples was pipetted onto the microplate. After incubation with conjugate solution, the concentration of each sample was calculated by OD<sub>450</sub>. The minimum detectable dose of HIV-1 Gag p24 ranged from 0.24&#x02013;3.25 pg/mL.<list list-type="simple"><list-item><label>g.</label><p id="Par41">Febrile pathogen detection</p></list-item></list></p><p id="Par42">Pathogens in the conditioned medium were detected by the Febrile Antigens Kit (Rapid Labs). Briefly, 80 &#x003bc;L of the supernatants from the PC, NC or experimental samples was dispensed onto a 3 cm diameter circle. One drop of the antigen suspension was added to the sample. The reaction mixture was mixed well using a stirring stick, and the slide was rocked gently by hand for 1 min. The slides were immediately observed under suitable light for any degree of agglutination. Nonreactive: smooth suspension with no visible agglutination, as shown by the NC. Reactive: any degree of agglutination visible macroscopically.</p></sec><sec id="Sec19"><title>NSC generation and characterization</title><p id="Par43">NSC differentiation was conducted as previously described (Liu et al., <xref ref-type="bibr" rid="CR33">2012</xref>; Duan et al., <xref ref-type="bibr" rid="CR12">2015</xref>). In brief, iPSCs cultured on MEF feeder cells were differentiated with NIM-1 medium [50% Advanced DMEM/F12 (Invitrogen), 50% Neurobasal Medium (Invitrogen), 1&#x000d7; N2 Supplement (Invitrogen), 1&#x000d7; B27 Supplement (Invitrogen), 4 &#x000b5;mol/L CHIR99021 (Cellagentech), 3 &#x000b5;mol/L SB431542 (Cellagentech), 10 ng/mL human leukemia inhibitory factor (hLIF, Millipore), 2 &#x000b5;mol/L dorsomorphin (Sigma), 0.1 &#x000b5;mol/L Compound E (EMD Chemicals Inc.) and 2 mmol/L GlutaMAX (Invitrogen)]. Two days later, the medium was changed to NIM-2 medium (50% Advanced DMEM/F12, 50% Neurobasal Medium, 1&#x000d7; N2 Supplement, 1&#x000d7; B27 Supplement, 4 &#x000b5;mol/L CHIR99021, 3 &#x000b5;mol/L SB431542, 10 ng/mL hLIF, 0.1 &#x000b5;mol/L Compound E and 2 mmol/L GlutaMAX) for five more days. The NSCs were then generated and further cultured in NSC maintenance medium containing 50% Neurobasal Medium, 50% Advanced DMEM/F12, 1&#x000d7; N2 Supplement, 1&#x000d7; B27 Supplement, 2 mmol/L GlutaMAX, 3 &#x003bc;mol/L CHIR99021, 2 &#x003bc;mol/L SB431542 and 10 ng/mL hLIF.</p></sec><sec id="Sec20"><title>Animal experiments</title><p id="Par44">All animal experiments performed in this study were approved by the Chinese Academy of Science Institutional Animal Care and Use Committee. For the teratoma formation assay, 6-week-old male NOD-SCID mice were injected subcutaneously with 3 &#x000d7; 10<sup>6</sup> CS-iPSCs or GC-iPSCs in a Matrigel/mTeSR solution, as previously described (Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>). Teratomas with a size of approximately 10 mm in diameter were collected and subjected to immunostaining. For the MSC <italic>in vivo</italic> imaging assay, 10<sup>6</sup> CS-MSCs or GC-MSCs expressing luciferase were transplanted into the TA muscle of 6-week-old male nude mice. The grafted cells were imaged with an IVIS spectrum imaging system (XENOGEN, Caliper) by detecting luciferase activity. To evaluate the potential tumorigenesis risk of GC-MSCs <italic>in vivo</italic>, a subcutaneous injection of GC-MSCs was performed in NSG mice. Human ESC (line H9) and U2-OS osteosarcoma cell lines were also implanted independently as positive controls. Fat pad transplantation was performed as previously described (Yu et al., <xref ref-type="bibr" rid="CR76">2016</xref>; Geng et al., <xref ref-type="bibr" rid="CR16">2018</xref>). CS-MSCs or GC-MSCs (1.5 &#x000d7; 10<sup>5</sup>) were freshly collected and resuspended in Matrigel mixture containing 50% Matrigel, 20% FBS in PBS, and 0.01% Trypan Blue (Sigma). The mixture was then injected into the fat pads of 3-week-old female NOD-SCID mice. Four weeks later, the fat pads were harvested for measuring MSC-derived vessel regeneration by immunofluorescence staining.</p></sec><sec id="Sec21"><title><bold>Senescence</bold>-<bold>associated &#x003b2;</bold>-<bold>galactosidase (SA</bold>-<bold>&#x003b2;</bold>-<bold>Gal) staining assay</bold></title><p id="Par45">SA-&#x003b2;-Gal staining was performed according to a previously described method (Debacq-Chainiaux et al., <xref ref-type="bibr" rid="CR10">2009</xref>; Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>; Pan et al., <xref ref-type="bibr" rid="CR45">2016</xref>; Geng et al., <xref ref-type="bibr" rid="CR16">2018</xref>; Wang et al., <xref ref-type="bibr" rid="CR70">2018b</xref>). Each experiment was performed in three independent replicates.</p></sec><sec id="Sec22"><title>Clonal expansion assay</title><p id="Par46">Approximately 2000 cells were seeded into each well of 12-well plates and cultured for 2 weeks. Then, the cells were stained with 0.2% crystal violet, and the intensity of the crystal violet staining was quantified by ImageJ software. Each experiment was performed in three independent replicates.</p></sec><sec id="Sec23"><title><bold>RT</bold>-<bold>qPCR</bold></title><p id="Par47">Total RNA was extracted with TRIzol reagent (Invitrogen), and 2 &#x003bc;g of total RNA was used for cDNA synthesis using a reverse transcription master mix (Promega). Quantitative real-time PCR was conducted with the iTaq Universal SYBR Green Super Mix (Bio-Rad) with the CFX384 Real-Time PCR system (Bio-Rad). All data were normalized to the 18S rRNA transcript and calculated using the &#x00394;&#x00394;Cq method. All RT-qPCR primer pairs are listed in Table S1.</p></sec><sec id="Sec24"><title>Western blot</title><p id="Par48">Western blot was performed as previously described (Wang et al., <xref ref-type="bibr" rid="CR66">2015</xref>, <xref ref-type="bibr" rid="CR67">2016</xref>). Briefly, protein quantification was conducted using a BCA Kit. Protein lysates were subjected to SDS-PAGE and subsequently electrotransferred to a polyvinylidene fluoride membrane (Millipore). The membrane was incubated with the indicated primary antibodies overnight at 4 &#x000b0;C and HRP-conjugated secondary antibodies for 1 h at room temperate (RT), followed by visualization using the ChemiDoc XRS system (Bio-Rad). Quantification was performed with ImageJ software.</p></sec><sec id="Sec25"><title>Immunofluorescence</title><p id="Par49">Immunofluorescence was conducted as previously described (Wang et al., <xref ref-type="bibr" rid="CR67">2016</xref>). Briefly, the cells were fixed with 4% paraformaldehyde for 25 min, permeabilized with Triton X-100 (0.3% in PBS) for 25 min, incubated with blocking buffer (10% donkey serum in PBS) for 1 h at RT, and stained with primary antibodies overnight at 4 &#x000b0;C. Then, the cells were incubated with secondary antibodies for 1 h at RT. Hoechst 33342 (Invitrogen) was used to stain nuclear DNA.</p></sec><sec id="Sec26"><title>Analysis of apoptosis by flow cytometry</title><p id="Par50">A FACS-based apoptosis analysis was performed as previously described (Fu et al., <xref ref-type="bibr" rid="CR15">2016</xref>; Pan et al., <xref ref-type="bibr" rid="CR45">2016</xref>). For ROS measurement, cells were collected and incubated with 1 &#x003bc;mol/L H2DCFDA for 30 min using ROS Detection Reagents (Molecular Probes, C6827). The cells were later analysed using the BD LSRFortessa cell analyser.</p></sec><sec id="Sec27"><title>RNA sequencing library construction</title><p id="Par51">Total RNA for each sample was extracted using the RNeasy Mini Kit (Qiagen) according to the manufacturer&#x02019;s instructions. After quantification of the RNA by a fragment analyzer (Advanced Analytical), RNA sequencing libraries were constructed using the TruSeq RNA Sample Preparation Kit (Illumina) according to the manufacturer&#x02019;s protocols. Paired-end sequencing was performed using Illumina Hiseq X Ten platform.</p></sec><sec id="Sec28"><title>RNA sequencing data processing</title><p id="Par52">RNA-seq data processing was performed as previously described (Zhang et al., <xref ref-type="bibr" rid="CR77">2015</xref>, <xref ref-type="bibr" rid="CR79">2019</xref>; Geng et al., <xref ref-type="bibr" rid="CR16">2018</xref>; Wang et al., <xref ref-type="bibr" rid="CR69">2018a</xref>; Ling et al., <xref ref-type="bibr" rid="CR30">2019</xref>). In brief, sequencing reads were trimmed and mapped to the <italic>H. sapiens</italic> reference genome (hg19) with HISAT2 software (v2.0.4) (Kim et al., <xref ref-type="bibr" rid="CR25">2015</xref>). HTSeq (v0.10.0) was used to determine the transcriptional expression level of each gene (Anders et al., <xref ref-type="bibr" rid="CR2">2015</xref>). Differentially expressed genes (DEGs) were computed at a cut-off adjusted <italic>P</italic> value (Benjamini-Hochberg) less than 0.05 and |Log<sub>2</sub>(fold change)| more than 1 using DESeq2 (Love et al., <xref ref-type="bibr" rid="CR35">2014</xref>). Pearson&#x02019;s correlation coefficient (<italic>R</italic>) and the Euclidian distance were calculated using <italic>R</italic> to evaluate the correlation between the replicates of each sample, which were based on Log<sub>2</sub>(FPKM + 1). PCA was also performed using <italic>R</italic>&#x000a0;based on Log<sub>2</sub>(FPKM + 1). Gene ontology (GO) enrichment analysis was computed by Metascape (Tripathi et al., <xref ref-type="bibr" rid="CR59">2015</xref>). The enrichment networks were visualized using Cytoscape (Shannon et al., <xref ref-type="bibr" rid="CR51">2003</xref>). Protein-protein interaction networks of overlapping genes were drawn based on the search tool for the retrieval of interacting genes (STRING) database (Szklarczyk et al., <xref ref-type="bibr" rid="CR56">2017</xref>). The aging-associated genes were obtained from the human aging genomic resources (HAGR) database (Tacutu et al., <xref ref-type="bibr" rid="CR57">2013</xref>).</p></sec><sec id="Sec29"><title>DNA extraction, library construction and sequencing</title><p id="Par53">Genomic DNA was extracted from each sample using the QIAamp&#x000ae; DNA Mini Kit (Qiagen), according to the manufacturer&#x02019;s protocol. DNA was randomly fragmented into ~300 bp lengths using a Covaris ultrasonic processor. DNA libraries were prepared with the NEBNext&#x000ae; Ultra<sup>TM</sup> DNA library Prep Kit (Illumina) and quantified using a Qubit 2.0 Fluorometer (Life Technologies). The insert sizes of the fragments in the libraries were determined by the Agilent Bioanalyzer 2100. Paired-end sequencing was performed using the Illumina HiSeq X Ten platform.</p></sec><sec id="Sec30"><title>Bioinformatics analyses of copy number variations, single-nucleotide variants and off-target sites</title><p id="Par54">The pipeline of whole genome sequencing data processing used in this study has been described previously (Zhang et al., <xref ref-type="bibr" rid="CR78">2018</xref>). In brief, sequencing data were mapped to the <italic>H</italic>. <italic>sapiens</italic> reference genome (hg19) without repeat regions using the Burrows-Wheeler Aligner (BWA, version 0.7.17) (Li and Durbin, <xref ref-type="bibr" rid="CR28">2009</xref>). The genomic coverage for each 500 kb bin window was calculated and normalized by the average sequencing depth. The copy number variation (CNV) scatterplot was drawn by ggplot2. For the single-nucleotide variant (SNV) analysis, the read base sites with an incorrect base probability &#x0003e;0.001 were masked with N, and base distribution for each chromosomal location was calculated by pysamstats (version 1.0.1) (<ext-link ext-link-type="uri" xlink:href="https://github.com/alimanfoo/pysamstats">https://github.com/alimanfoo/pysamstats</ext-link>). The heterozygosity of each site was defined as the percentage of the second enriched base depth. SNV sites were defined by base heterozygosity (0%&#x02013;30%). Potential indel sites were extracted with pysamstats (version 1.0.1) under default setting. Then indel sites were screened with sites existing in CS-iPSC genomic sequencing datasets, repeats and low-complexity regions annotated by RepeatMasker (db20170127), indel-type SNPs in humans and homopolymers. Simultaneously, 2034 off-target sites with no more than five mismatched sites were identified by Cas-OFFinder (Bae et al., <xref ref-type="bibr" rid="CR5">2014</xref>). None of these regions included indel sites identified by whole genome sequencing.</p></sec><sec id="Sec31"><title>Statistical analysis</title><p id="Par55">All results are presented as the mean &#x000b1; SEM or mean &#x000b1; SD. The data were statistically analysed using a two-tailed Student&#x02019;s <italic>t</italic>-test to compare differences between treatments assuming equal variance with PRISM software (GraphPad 5 Software). <italic>P</italic> values &#x0003c;0.05, &#x0003c;0.01, and &#x0003c;0.001 were considered statistically significant (*, **, and ***, respectively).</p></sec><sec id="Sec32"><title>Accession numbers</title><p id="Par56">The sequencing data have been deposited in the NCBI Gene Expression Omnibus (GEO) under the accession number GSE124208, NCBI Sequence Read Archive under accession number SRP174074.
+</p></sec></sec><sec sec-type="supplementary-material"><title>Electronic supplementary material</title><sec id="Sec33"><p>Below is the link to the electronic supplementary material.
+<supplementary-material content-type="local-data" id="MOESM1"><media xlink:href="13238_2019_623_MOESM1_ESM.pdf"><caption><p>Supplementary material 1 (PDF 3822 kb)</p></caption></media></supplementary-material><supplementary-material content-type="local-data" id="MOESM2"><media xlink:href="13238_2019_623_MOESM2_ESM.xlsx"><caption><p>Supplementary material 2 (XLSX 13 kb)</p></caption></media></supplementary-material></p></sec></sec></body><back><fn-group><fn><p>Si Wang, Zheying Min, and Qianzhao Ji have contributed equally.</p></fn><fn><p><bold>Change history</bold></p><p>1/15/2022</p><p>A Correction to this paper has been published: 10.1007/s13238-021-00901-3</p></fn></fn-group><ack><title>Acknowledgements</title><p>The authors acknowledge L. Bai, R. Bai, Q. Chu, J. Lu, S. Ma and Y. Yang for administrative assistance and W. Li, J. Jia and X. Zhang for assistance with animal experiments. This work was supported by the National Key Research and Development Program of China (2018YFC2000100), the Strategic Priority Research Program of the Chinese Academy of Sciences (XDA16010100), the National Key Research and Development Program of China (2018YFA0107203, 2017YFA0103304, 2017YFA0102802, 2016YFC1000601, 2015CB964800, 2014CB910503, and 2018YFA0108500), the National Natural Science Foundation of China (Grant Nos. 81625009, 81330008, 91749202, 91749123, 31671429, 81671377, 81771515, 31601109, 31601158, 81701388, 81601233, 81822018, 81801399, 31801010, 81801370, 81861168034, 81571400, and 81771580), the Program of the Beijing Municipal Science and Technology Commission (Z151100003915072), the Key Research Program of the Chinese Academy of Sciences (KJZDEWTZ-L05), the Beijing Municipal Commission of Health and Family Planning (PXM2018_026283_000002) and the Advanced Innovation Center for Human Brain Protection (117212, 3500-1192012).</p></ack><ref-list id="Bib1"><title>References</title><ref id="CR1"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Amaro-Ortiz</surname><given-names>A</given-names></name><name><surname>Yan</surname><given-names>B</given-names></name><name><surname>D&#x02019;Orazio</surname><given-names>JA</given-names></name></person-group><article-title>Ultraviolet radiation, aging and the skin: prevention of damage by topical cAMP manipulation</article-title><source>Molecules</source><year>2014</year><volume>19</volume><fpage>6202</fpage><lpage>6219</lpage><pub-id pub-id-type="pmid">24838074</pub-id></element-citation></ref><ref id="CR2"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Anders</surname><given-names>S</given-names></name><name><surname>Pyl</surname><given-names>PT</given-names></name><name><surname>Huber</surname><given-names>W</given-names></name></person-group><article-title>HTSeq: a Python framework to work with high-throughput sequencing data</article-title><source>Bioinformatics</source><year>2015</year><volume>31</volume><fpage>166</fpage><lpage>169</lpage><pub-id pub-id-type="pmid">25260700</pub-id></element-citation></ref><ref id="CR3"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andrade</surname><given-names>LN</given-names></name><name><surname>Nathanson</surname><given-names>JL</given-names></name><name><surname>Yeo</surname><given-names>GW</given-names></name><name><surname>Menck</surname><given-names>CF</given-names></name><name><surname>Muotri</surname><given-names>AR</given-names></name></person-group><article-title>Evidence for premature aging due to oxidative stress in iPSCs from Cockayne syndrome</article-title><source>Hum Mol Genet</source><year>2012</year><volume>21</volume><fpage>3825</fpage><lpage>3834</lpage><pub-id pub-id-type="pmid">22661500</pub-id></element-citation></ref><ref id="CR4"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Andressoo</surname><given-names>JO</given-names></name><name><surname>Mitchell</surname><given-names>JR</given-names></name><name><surname>de Wit</surname><given-names>J</given-names></name><name><surname>Hoogstraten</surname><given-names>D</given-names></name><name><surname>Volker</surname><given-names>M</given-names></name><name><surname>Toussaint</surname><given-names>W</given-names></name><name><surname>Speksnijder</surname><given-names>E</given-names></name><name><surname>Beems</surname><given-names>RB</given-names></name><name><surname>van Steeg</surname><given-names>H</given-names></name><name><surname>Jans</surname><given-names>J</given-names></name><etal/></person-group><article-title>An Xpd mouse model for the combined xeroderma pigmentosum/Cockayne syndrome exhibiting both cancer predisposition and segmental progeria</article-title><source>Cancer Cell</source><year>2006</year><volume>10</volume><fpage>121</fpage><lpage>132</lpage><pub-id pub-id-type="pmid">16904611</pub-id></element-citation></ref><ref id="CR5"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bae</surname><given-names>S</given-names></name><name><surname>Park</surname><given-names>J</given-names></name><name><surname>Kim</surname><given-names>JS</given-names></name></person-group><article-title>Cas-OFFinder: a fast and versatile algorithm that searches for potential off-target sites of Cas9 RNA-guided endonucleases</article-title><source>Bioinformatics</source><year>2014</year><volume>30</volume><fpage>1473</fpage><lpage>1475</lpage><pub-id pub-id-type="pmid">24463181</pub-id></element-citation></ref><ref id="CR6"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cadet</surname><given-names>J</given-names></name><name><surname>Sage</surname><given-names>E</given-names></name><name><surname>Douki</surname><given-names>T</given-names></name></person-group><article-title>Ultraviolet radiation-mediated damage to cellular DNA</article-title><source>Mutat Res</source><year>2005</year><volume>571</volume><fpage>3</fpage><lpage>17</lpage><pub-id pub-id-type="pmid">15748634</pub-id></element-citation></ref><ref id="CR7"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Castro-Vinuelas</surname><given-names>R</given-names></name><name><surname>Sanjurjo-Rodriguez</surname><given-names>C</given-names></name><name><surname>Pineiro-Ramil</surname><given-names>M</given-names></name><name><surname>Hermida-Gomez</surname><given-names>T</given-names></name><name><surname>Fuentes-Boquete</surname><given-names>IM</given-names></name><name><surname>de Toro-Santos</surname><given-names>FJ</given-names></name><name><surname>Blanco-Garcia</surname><given-names>FJ</given-names></name><name><surname>Diaz-Prado</surname><given-names>SM</given-names></name></person-group><article-title>Induced pluripotent stem cells for cartilage repair: current status and future perspectives</article-title><source>Eur Cell Mater</source><year>2018</year><volume>36</volume><fpage>96</fpage><lpage>109</lpage><pub-id pub-id-type="pmid">30204229</pub-id></element-citation></ref><ref id="CR8"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ciaffardini</surname><given-names>F</given-names></name><name><surname>Nicolai</surname><given-names>S</given-names></name><name><surname>Caputo</surname><given-names>M</given-names></name><name><surname>Canu</surname><given-names>G</given-names></name><name><surname>Paccosi</surname><given-names>E</given-names></name><name><surname>Costantino</surname><given-names>M</given-names></name><name><surname>Frontini</surname><given-names>M</given-names></name><name><surname>Balajee</surname><given-names>AS</given-names></name><name><surname>Proietti-De-Santis</surname><given-names>L</given-names></name></person-group><article-title>The cockayne syndrome B protein is essential for neuronal differentiation and neuritogenesis</article-title><source>Cell Death Dis</source><year>2014</year><volume>5</volume><fpage>e1268</fpage><pub-id pub-id-type="pmid">24874740</pub-id></element-citation></ref><ref id="CR9"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cleaver</surname><given-names>JE</given-names></name><name><surname>Lam</surname><given-names>ET</given-names></name><name><surname>Revet</surname><given-names>I</given-names></name></person-group><article-title>Disorders of nucleotide excision repair: the genetic and molecular basis of heterogeneity</article-title><source>Nat Rev Genet</source><year>2009</year><volume>10</volume><fpage>756</fpage><lpage>768</lpage><pub-id pub-id-type="pmid">19809470</pub-id></element-citation></ref><ref id="CR10"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Debacq-Chainiaux</surname><given-names>F</given-names></name><name><surname>Erusalimsky</surname><given-names>JD</given-names></name><name><surname>Campisi</surname><given-names>J</given-names></name><name><surname>Toussaint</surname><given-names>O</given-names></name></person-group><article-title>Protocols to detect senescence-associated beta-galactosidase (SA-betagal) activity, a biomarker of senescent cells in culture and in vivo</article-title><source>Nat Protoc</source><year>2009</year><volume>4</volume><fpage>1798</fpage><lpage>1806</lpage><pub-id pub-id-type="pmid">20010931</pub-id></element-citation></ref><ref id="CR11"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ding</surname><given-names>Z</given-names></name><name><surname>Sui</surname><given-names>L</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Liu</surname><given-names>Y</given-names></name><name><surname>Xu</surname><given-names>X</given-names></name><name><surname>Fu</surname><given-names>L</given-names></name><name><surname>Bai</surname><given-names>R</given-names></name><name><surname>Yuan</surname><given-names>T</given-names></name><name><surname>Hao</surname><given-names>Y</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><etal/></person-group><article-title>A widely adaptable approach to generate integration-free iPSCs from non-invasively acquired human somatic cells</article-title><source>Protein Cell</source><year>2015</year><volume>6</volume><fpage>386</fpage><lpage>389</lpage><pub-id pub-id-type="pmid">25412771</pub-id></element-citation></ref><ref id="CR12"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Duan</surname><given-names>S</given-names></name><name><surname>Yuan</surname><given-names>G</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Wu</surname><given-names>J</given-names></name><name><surname>Xu</surname><given-names>X</given-names></name><name><surname>Fu</surname><given-names>L</given-names></name><name><surname>Li</surname><given-names>Y</given-names></name><etal/></person-group><article-title>PTEN deficiency reprogrammes human neural stem cells towards a glioblastoma stem cell-like phenotype</article-title><source>Nat Commun</source><year>2015</year><volume>6</volume><fpage>10068</fpage><pub-id pub-id-type="pmid">26632666</pub-id></element-citation></ref><ref id="CR13"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Friedberg</surname><given-names>EC</given-names></name></person-group><article-title>How nucleotide excision repair protects against cancer</article-title><source>Nat Rev Cancer</source><year>2001</year><volume>1</volume><fpage>22</fpage><lpage>33</lpage><pub-id pub-id-type="pmid">11900249</pub-id></element-citation></ref><ref id="CR14"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Friedberg</surname><given-names>EC</given-names></name></person-group><article-title>DNA damage and repair</article-title><source>Nature</source><year>2003</year><volume>421</volume><fpage>436</fpage><lpage>440</lpage><pub-id pub-id-type="pmid">12540918</pub-id></element-citation></ref><ref id="CR15"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fu</surname><given-names>LN</given-names></name><name><surname>Xu</surname><given-names>XL</given-names></name><name><surname>Ren</surname><given-names>RT</given-names></name><name><surname>Wu</surname><given-names>J</given-names></name><name><surname>Zhang</surname><given-names>WQ</given-names></name><name><surname>Yang</surname><given-names>JP</given-names></name><name><surname>Ren</surname><given-names>XQ</given-names></name><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Zhao</surname><given-names>Y</given-names></name><name><surname>Sun</surname><given-names>L</given-names></name><etal/></person-group><article-title>Modeling xeroderma pigmentosum associated neurological pathologies with patients-derived iPSCs</article-title><source>Protein Cell</source><year>2016</year><volume>7</volume><fpage>210</fpage><lpage>221</lpage><pub-id pub-id-type="pmid">26874523</pub-id></element-citation></ref><ref id="CR16"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Geng</surname><given-names>L</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Li</surname><given-names>W</given-names></name><name><surname>Wu</surname><given-names>Z</given-names></name><name><surname>Wang</surname><given-names>W</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Su</surname><given-names>Y</given-names></name><name><surname>Wang</surname><given-names>P</given-names></name><name><surname>Sun</surname><given-names>L</given-names></name><etal/></person-group><article-title>Chemical screen identifies a geroprotective role of quercetin in premature aging</article-title><source>Protein Cell</source><year>2018</year><pub-id pub-id-type="doi">10.1007/s13238-018-0567-y</pub-id><pub-id pub-id-type="pmid">30069858</pub-id></element-citation></ref><ref id="CR17"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Golpanian</surname><given-names>S</given-names></name><name><surname>DiFede</surname><given-names>DL</given-names></name><name><surname>Pujol</surname><given-names>MV</given-names></name><name><surname>Lowery</surname><given-names>MH</given-names></name><name><surname>Levis-Dusseau</surname><given-names>S</given-names></name><name><surname>Goldstein</surname><given-names>BJ</given-names></name><name><surname>Schulman</surname><given-names>IH</given-names></name><name><surname>Longsomboon</surname><given-names>B</given-names></name><name><surname>Wolf</surname><given-names>A</given-names></name><name><surname>Khan</surname><given-names>A</given-names></name><etal/></person-group><article-title>Rationale and design of the allogeneiC human mesenchymal stem cells (hMSC) in patients with aging fRAilTy via intraveno US delivery (CRATUS) study: A phase I/II, randomized, blinded and placebo controlled trial to evaluate the safety and potential efficacy of allogeneic human mesenchymal stem cell infusion in patients with aging frailty</article-title><source>Oncotarget</source><year>2016</year><volume>7</volume><fpage>11899</fpage><lpage>11912</lpage><pub-id pub-id-type="pmid">26933813</pub-id></element-citation></ref><ref id="CR18"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Golpanian</surname><given-names>S</given-names></name><name><surname>DiFede</surname><given-names>DL</given-names></name><name><surname>Khan</surname><given-names>A</given-names></name><name><surname>Schulman</surname><given-names>IH</given-names></name><name><surname>Landin</surname><given-names>AM</given-names></name><name><surname>Tompkins</surname><given-names>BA</given-names></name><name><surname>Heldman</surname><given-names>AW</given-names></name><name><surname>Miki</surname><given-names>R</given-names></name><name><surname>Goldstein</surname><given-names>BJ</given-names></name><name><surname>Mushtaq</surname><given-names>M</given-names></name><etal/></person-group><article-title>Allogeneic human mesenchymal stem cell infusions for aging frailty</article-title><source>J Gerontol A</source><year>2017</year><volume>72</volume><fpage>1505</fpage><lpage>1512</lpage></element-citation></ref><ref id="CR19"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Gorgels</surname><given-names>TG</given-names></name><name><surname>van der Pluijm</surname><given-names>I</given-names></name><name><surname>Brandt</surname><given-names>RM</given-names></name><name><surname>Garinis</surname><given-names>GA</given-names></name><name><surname>van Steeg</surname><given-names>H</given-names></name><name><surname>van den Aardweg</surname><given-names>G</given-names></name><name><surname>Jansen</surname><given-names>GH</given-names></name><name><surname>Ruijter</surname><given-names>JM</given-names></name><name><surname>Bergen</surname><given-names>AA</given-names></name><name><surname>van Norren</surname><given-names>D</given-names></name><etal/></person-group><article-title>Retinal degeneration and ionizing radiation hypersensitivity in a mouse model for Cockayne syndrome</article-title><source>Mol Cell Biol</source><year>2007</year><volume>27</volume><fpage>1433</fpage><lpage>1441</lpage><pub-id pub-id-type="pmid">17145777</pub-id></element-citation></ref><ref id="CR20"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hishiya</surname><given-names>A</given-names></name><name><surname>Watanabe</surname><given-names>K</given-names></name></person-group><article-title>Progeroid syndrome as a model for impaired bone formation in senile osteoporosis</article-title><source>J Bone Miner Metab</source><year>2004</year><volume>22</volume><fpage>399</fpage><lpage>403</lpage><pub-id pub-id-type="pmid">15316860</pub-id></element-citation></ref><ref id="CR21"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Jaarsma</surname><given-names>D</given-names></name><name><surname>van der Pluijm</surname><given-names>I</given-names></name><name><surname>de Waard</surname><given-names>MC</given-names></name><name><surname>Haasdijk</surname><given-names>ED</given-names></name><name><surname>Brandt</surname><given-names>R</given-names></name><name><surname>Vermeij</surname><given-names>M</given-names></name><name><surname>Rijksen</surname><given-names>Y</given-names></name><name><surname>Maas</surname><given-names>A</given-names></name><name><surname>van Steeg</surname><given-names>H</given-names></name><name><surname>Hoeijmakers</surname><given-names>JH</given-names></name><etal/></person-group><article-title>Age-related neuronal degeneration: complementary roles of nucleotide excision repair and transcription-coupled repair in preventing neuropathology</article-title><source>PLoS Genet</source><year>2011</year><volume>7</volume><fpage>e1002405</fpage><pub-id pub-id-type="pmid">22174697</pub-id></element-citation></ref><ref id="CR22"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Karikkineth</surname><given-names>AC</given-names></name><name><surname>Scheibye-Knudsen</surname><given-names>M</given-names></name><name><surname>Fivenson</surname><given-names>E</given-names></name><name><surname>Croteau</surname><given-names>DL</given-names></name><name><surname>Bohr</surname><given-names>VA</given-names></name></person-group><article-title>Cockayne syndrome: clinical features, model systems and pathways</article-title><source>Ageing Res Rev</source><year>2017</year><volume>33</volume><fpage>3</fpage><lpage>17</lpage><pub-id pub-id-type="pmid">27507608</pub-id></element-citation></ref><ref id="CR23"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kawamura</surname><given-names>T</given-names></name><name><surname>Suzuki</surname><given-names>J</given-names></name><name><surname>Wang</surname><given-names>YV</given-names></name><name><surname>Menendez</surname><given-names>S</given-names></name><name><surname>Morera</surname><given-names>LB</given-names></name><name><surname>Raya</surname><given-names>A</given-names></name><name><surname>Wahl</surname><given-names>GM</given-names></name><name><surname>Izpisua Belmonte</surname><given-names>JC</given-names></name></person-group><article-title>Linking the p53 tumour suppressor pathway to somatic cell reprogramming</article-title><source>Nature</source><year>2009</year><volume>460</volume><fpage>1140</fpage><lpage>1144</lpage><pub-id pub-id-type="pmid">19668186</pub-id></element-citation></ref><ref id="CR24"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kemp</surname><given-names>MG</given-names></name><name><surname>Spandau</surname><given-names>DF</given-names></name><name><surname>Travers</surname><given-names>JB</given-names></name></person-group><article-title>Impact of age and insulin-like growth factor-1 on DNA damage responses in UV-irradiated human skin</article-title><source>Molecules</source><year>2017</year><volume>22</volume><fpage>356</fpage></element-citation></ref><ref id="CR25"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kim</surname><given-names>D</given-names></name><name><surname>Langmead</surname><given-names>B</given-names></name><name><surname>Salzberg</surname><given-names>SL</given-names></name></person-group><article-title>HISAT: a fast spliced aligner with low memory requirements</article-title><source>Nat Methods</source><year>2015</year><volume>12</volume><fpage>357</fpage><lpage>360</lpage><pub-id pub-id-type="pmid">25751142</pub-id></element-citation></ref><ref id="CR26"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kubben</surname><given-names>N</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Wang</surname><given-names>L</given-names></name><name><surname>Voss</surname><given-names>TC</given-names></name><name><surname>Yang</surname><given-names>J</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Misteli</surname><given-names>T</given-names></name></person-group><article-title>Repression of the antioxidant NRF2 pathway in premature aging</article-title><source>Cell</source><year>2016</year><volume>165</volume><fpage>1361</fpage><lpage>1374</lpage><pub-id pub-id-type="pmid">27259148</pub-id></element-citation></ref><ref id="CR27"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Laugel</surname><given-names>V</given-names></name></person-group><article-title>Cockayne syndrome: the expanding clinical and mutational spectrum</article-title><source>Mech Ageing Dev</source><year>2013</year><volume>134</volume><fpage>161</fpage><lpage>170</lpage><pub-id pub-id-type="pmid">23428416</pub-id></element-citation></ref><ref id="CR28"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>H</given-names></name><name><surname>Durbin</surname><given-names>R</given-names></name></person-group><article-title>Fast and accurate short read alignment with Burrows&#x02013;Wheeler transform</article-title><source>Bioinformatics</source><year>2009</year><volume>25</volume><fpage>1754</fpage><lpage>1760</lpage><pub-id pub-id-type="pmid">19451168</pub-id></element-citation></ref><ref id="CR29"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Li</surname><given-names>Y</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Chang</surname><given-names>L</given-names></name><name><surname>Han</surname><given-names>Y</given-names></name><name><surname>Sun</surname><given-names>L</given-names></name><name><surname>Gong</surname><given-names>X</given-names></name><name><surname>Tang</surname><given-names>H</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Deng</surname><given-names>H</given-names></name><name><surname>Ye</surname><given-names>Y</given-names></name><etal/></person-group><article-title>Vitamin C alleviates aging defects in a stem cell model for Werner syndrome</article-title><source>Protein Cell</source><year>2016</year><volume>7</volume><fpage>478</fpage><lpage>488</lpage><pub-id pub-id-type="pmid">27271327</pub-id></element-citation></ref><ref id="CR30"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ling</surname><given-names>C</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Song</surname><given-names>M</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Ma</surname><given-names>S</given-names></name><name><surname>Sun</surname><given-names>S</given-names></name><name><surname>Fu</surname><given-names>L</given-names></name><name><surname>Chu</surname><given-names>Q</given-names></name><etal/></person-group><article-title>Modeling CADASIL vascular pathologies with patient-derived induced pluripotent stem cells</article-title><source>Protein Cell.</source><year>2019</year><volume>10</volume><fpage>249</fpage><lpage>271</lpage><pub-id pub-id-type="pmid">30778920</pub-id></element-citation></ref><ref id="CR31"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Barkho</surname><given-names>BZ</given-names></name><name><surname>Ruiz</surname><given-names>S</given-names></name><name><surname>Diep</surname><given-names>D</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Yang</surname><given-names>SL</given-names></name><name><surname>Panopoulos</surname><given-names>AD</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Kurian</surname><given-names>L</given-names></name><name><surname>Walsh</surname><given-names>C</given-names></name><etal/></person-group><article-title>Recapitulation of premature ageing with iPSCs from Hutchinson&#x02013;Gilford progeria syndrome</article-title><source>Nature</source><year>2011</year><volume>472</volume><fpage>221</fpage><lpage>225</lpage><pub-id pub-id-type="pmid">21346760</pub-id></element-citation></ref><ref id="CR32"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Sancho-Martinez</surname><given-names>I</given-names></name><name><surname>Yi</surname><given-names>F</given-names></name><name><surname>Li</surname><given-names>M</given-names></name><name><surname>Kumar</surname><given-names>S</given-names></name><name><surname>Nivet</surname><given-names>E</given-names></name><name><surname>Kim</surname><given-names>J</given-names></name><name><surname>Soligalla</surname><given-names>RD</given-names></name><etal/></person-group><article-title>Targeted gene correction of laminopathy-associated LMNA mutations in patient-specific iPSCs</article-title><source>Cell Stem Cell</source><year>2011</year><volume>8</volume><fpage>688</fpage><lpage>694</lpage><pub-id pub-id-type="pmid">21596650</pub-id></element-citation></ref><ref id="CR33"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Nivet</surname><given-names>E</given-names></name><name><surname>Li</surname><given-names>M</given-names></name><name><surname>Montserrat</surname><given-names>N</given-names></name><name><surname>Yi</surname><given-names>F</given-names></name><name><surname>Xu</surname><given-names>X</given-names></name><name><surname>Ruiz</surname><given-names>S</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><etal/></person-group><article-title>Progressive degeneration of human neural stem cells caused by pathogenic LRRK2</article-title><source>Nature</source><year>2012</year><volume>491</volume><fpage>603</fpage><lpage>607</lpage><pub-id pub-id-type="pmid">23075850</pub-id></element-citation></ref><ref id="CR34"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Li</surname><given-names>M</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Montserrat</surname><given-names>N</given-names></name><name><surname>Tarantino</surname><given-names>C</given-names></name><name><surname>Gu</surname><given-names>Y</given-names></name><name><surname>Yi</surname><given-names>F</given-names></name><name><surname>Xu</surname><given-names>X</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><etal/></person-group><article-title>Modelling Fanconi anemia pathogenesis and therapeutics using integration-free patient-derived iPSCs</article-title><source>Nat Commun</source><year>2014</year><volume>5</volume><fpage>4330</fpage><pub-id pub-id-type="pmid">24999918</pub-id></element-citation></ref><ref id="CR35"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Love</surname><given-names>MI</given-names></name><name><surname>Huber</surname><given-names>W</given-names></name><name><surname>Anders</surname><given-names>S</given-names></name></person-group><article-title>Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2</article-title><source>Genome Biol</source><year>2014</year><volume>15</volume><fpage>550</fpage><pub-id pub-id-type="pmid">25516281</pub-id></element-citation></ref><ref id="CR36"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>McKay</surname><given-names>BC</given-names></name><name><surname>Cabrita</surname><given-names>MA</given-names></name></person-group><article-title>Arresting transcription and sentencing the cell: the consequences of blocked transcription</article-title><source>Mech Ageing Dev</source><year>2013</year><volume>134</volume><fpage>243</fpage><lpage>252</lpage><pub-id pub-id-type="pmid">23542592</pub-id></element-citation></ref><ref id="CR37"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Muller</surname><given-names>LU</given-names></name><name><surname>Milsom</surname><given-names>MD</given-names></name><name><surname>Harris</surname><given-names>CE</given-names></name><name><surname>Vyas</surname><given-names>R</given-names></name><name><surname>Brumme</surname><given-names>KM</given-names></name><name><surname>Parmar</surname><given-names>K</given-names></name><name><surname>Moreau</surname><given-names>LA</given-names></name><name><surname>Schambach</surname><given-names>A</given-names></name><name><surname>Park</surname><given-names>IH</given-names></name><name><surname>London</surname><given-names>WB</given-names></name><etal/></person-group><article-title>Overcoming reprogramming resistance of Fanconi anemia cells</article-title><source>Blood</source><year>2012</year><volume>119</volume><fpage>5449</fpage><lpage>5457</lpage><pub-id pub-id-type="pmid">22371882</pub-id></element-citation></ref><ref id="CR38"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Murai</surname><given-names>M</given-names></name><name><surname>Enokido</surname><given-names>Y</given-names></name><name><surname>Inamura</surname><given-names>N</given-names></name><name><surname>Yoshino</surname><given-names>M</given-names></name><name><surname>Nakatsu</surname><given-names>Y</given-names></name><name><surname>van der Horst</surname><given-names>GT</given-names></name><name><surname>Hoeijmakers</surname><given-names>JH</given-names></name><name><surname>Tanaka</surname><given-names>K</given-names></name><name><surname>Hatanaka</surname><given-names>H</given-names></name></person-group><article-title>Early postnatal ataxia and abnormal cerebellar development in mice lacking Xeroderma pigmentosum Group A and Cockayne syndrome Group B DNA repair genes</article-title><source>Proc Natl Acad Sci USA</source><year>2001</year><volume>98</volume><fpage>13379</fpage><lpage>13384</lpage><pub-id pub-id-type="pmid">11687625</pub-id></element-citation></ref><ref id="CR39"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Natale</surname><given-names>V</given-names></name></person-group><article-title>A comprehensive description of the severity groups in Cockayne syndrome</article-title><source>Am J Med Genet A</source><year>2011</year><volume>155A</volume><fpage>1081</fpage><lpage>1095</lpage><pub-id pub-id-type="pmid">21480477</pub-id></element-citation></ref><ref id="CR40"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Newman</surname><given-names>JC</given-names></name><name><surname>Bailey</surname><given-names>AD</given-names></name><name><surname>Weiner</surname><given-names>AM</given-names></name></person-group><article-title>Cockayne syndrome group B protein (CSB) plays a general role in chromatin maintenance and remodeling</article-title><source>Proc Natl Acad Sci USA</source><year>2006</year><volume>103</volume><fpage>9613</fpage><lpage>9618</lpage><pub-id pub-id-type="pmid">16772382</pub-id></element-citation></ref><ref id="CR41"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Okita</surname><given-names>K</given-names></name><name><surname>Matsumura</surname><given-names>Y</given-names></name><name><surname>Sato</surname><given-names>Y</given-names></name><name><surname>Okada</surname><given-names>A</given-names></name><name><surname>Morizane</surname><given-names>A</given-names></name><name><surname>Okamoto</surname><given-names>S</given-names></name><name><surname>Hong</surname><given-names>H</given-names></name><name><surname>Nakagawa</surname><given-names>M</given-names></name><name><surname>Tanabe</surname><given-names>K</given-names></name><name><surname>Tezuka</surname><given-names>K</given-names></name><etal/></person-group><article-title>A more efficient method to generate integration-free human iPS cells</article-title><source>Nat Methods</source><year>2011</year><volume>8</volume><fpage>409</fpage><lpage>412</lpage><pub-id pub-id-type="pmid">21460823</pub-id></element-citation></ref><ref id="CR42"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Orozco</surname><given-names>L</given-names></name><name><surname>Soler</surname><given-names>R</given-names></name><name><surname>Morera</surname><given-names>C</given-names></name><name><surname>Alberca</surname><given-names>M</given-names></name><name><surname>Sanchez</surname><given-names>A</given-names></name><name><surname>Garcia-Sancho</surname><given-names>J</given-names></name></person-group><article-title>Intervertebral disc repair by autologous mesenchymal bone marrow cells: a pilot study</article-title><source>Transplantation</source><year>2011</year><volume>92</volume><fpage>822</fpage><lpage>828</lpage><pub-id pub-id-type="pmid">21792091</pub-id></element-citation></ref><ref id="CR43"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Orozco</surname><given-names>L</given-names></name><name><surname>Munar</surname><given-names>A</given-names></name><name><surname>Soler</surname><given-names>R</given-names></name><name><surname>Alberca</surname><given-names>M</given-names></name><name><surname>Soler</surname><given-names>F</given-names></name><name><surname>Huguet</surname><given-names>M</given-names></name><name><surname>Sentis</surname><given-names>J</given-names></name><name><surname>Sanchez</surname><given-names>A</given-names></name><name><surname>Garcia-Sancho</surname><given-names>J</given-names></name></person-group><article-title>Treatment of knee osteoarthritis with autologous mesenchymal stem cells: a pilot study</article-title><source>Transplantation</source><year>2013</year><volume>95</volume><fpage>1535</fpage><lpage>1541</lpage><pub-id pub-id-type="pmid">23680930</pub-id></element-citation></ref><ref id="CR44"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Orozco</surname><given-names>L</given-names></name><name><surname>Munar</surname><given-names>A</given-names></name><name><surname>Soler</surname><given-names>R</given-names></name><name><surname>Alberca</surname><given-names>M</given-names></name><name><surname>Soler</surname><given-names>F</given-names></name><name><surname>Huguet</surname><given-names>M</given-names></name><name><surname>Sentis</surname><given-names>J</given-names></name><name><surname>Sanchez</surname><given-names>A</given-names></name><name><surname>Garcia-Sancho</surname><given-names>J</given-names></name></person-group><article-title>Treatment of knee osteoarthritis with autologous mesenchymal stem cells: two-year follow-up results</article-title><source>Transplantation</source><year>2014</year><volume>97</volume><fpage>e66</fpage><lpage>e68</lpage><pub-id pub-id-type="pmid">24887752</pub-id></element-citation></ref><ref id="CR45"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Pan</surname><given-names>H</given-names></name><name><surname>Guan</surname><given-names>D</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>Wang</surname><given-names>L</given-names></name><name><surname>Wu</surname><given-names>J</given-names></name><name><surname>Zhou</surname><given-names>J</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Li</surname><given-names>Y</given-names></name><etal/></person-group><article-title>SIRT6 safeguards human mesenchymal stem cells from oxidative stress by coactivating NRF2</article-title><source>Cell Res</source><year>2016</year><volume>26</volume><fpage>190</fpage><lpage>205</lpage><pub-id pub-id-type="pmid">26768768</pub-id></element-citation></ref><ref id="CR46"><mixed-citation publication-type="other">Peters DT, Cowan CA, Musunuru K (2008) Genome editing in human pluripotent stem cells. In: StemBook, Cambridge</mixed-citation></ref><ref id="CR47"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Proietti-De-Santis</surname><given-names>L</given-names></name><name><surname>Drane</surname><given-names>P</given-names></name><name><surname>Egly</surname><given-names>JM</given-names></name></person-group><article-title>Cockayne syndrome B protein regulates the transcriptional program after UV irradiation</article-title><source>EMBO J</source><year>2006</year><volume>25</volume><fpage>1915</fpage><lpage>1923</lpage><pub-id pub-id-type="pmid">16601682</pub-id></element-citation></ref><ref id="CR48"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rockx</surname><given-names>DA</given-names></name><name><surname>Mason</surname><given-names>R</given-names></name><name><surname>van Hoffen</surname><given-names>A</given-names></name><name><surname>Barton</surname><given-names>MC</given-names></name><name><surname>Citterio</surname><given-names>E</given-names></name><name><surname>Bregman</surname><given-names>DB</given-names></name><name><surname>van Zeeland</surname><given-names>AA</given-names></name><name><surname>Vrieling</surname><given-names>H</given-names></name><name><surname>Mullenders</surname><given-names>LH</given-names></name></person-group><article-title>UV-induced inhibition of transcription involves repression of transcription initiation and phosphorylation of RNA polymerase II</article-title><source>Proc Natl Acad Sci USA</source><year>2000</year><volume>97</volume><fpage>10503</fpage><lpage>10508</lpage><pub-id pub-id-type="pmid">10973477</pub-id></element-citation></ref><ref id="CR49"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sacco</surname><given-names>R</given-names></name><name><surname>Tamblyn</surname><given-names>L</given-names></name><name><surname>Rajakulendran</surname><given-names>N</given-names></name><name><surname>Bralha</surname><given-names>FN</given-names></name><name><surname>Tropepe</surname><given-names>V</given-names></name><name><surname>Laposa</surname><given-names>RR</given-names></name></person-group><article-title>Cockayne syndrome b maintains neural precursor function</article-title><source>DNA Repair</source><year>2013</year><volume>12</volume><fpage>110</fpage><lpage>120</lpage><pub-id pub-id-type="pmid">23245699</pub-id></element-citation></ref><ref id="CR50"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Setlow</surname><given-names>RB</given-names></name><name><surname>Setlow</surname><given-names>JK</given-names></name></person-group><article-title>Evidence that ultraviolet-induced thymine dimers in DNA cause biological damage</article-title><source>Proc Natl Acad Sci USA</source><year>1962</year><volume>48</volume><fpage>1250</fpage><lpage>1257</lpage><pub-id pub-id-type="pmid">13910967</pub-id></element-citation></ref><ref id="CR51"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shannon</surname><given-names>P</given-names></name><name><surname>Markiel</surname><given-names>A</given-names></name><name><surname>Ozier</surname><given-names>O</given-names></name><name><surname>Baliga</surname><given-names>NS</given-names></name><name><surname>Wang</surname><given-names>JT</given-names></name><name><surname>Ramage</surname><given-names>D</given-names></name><name><surname>Amin</surname><given-names>N</given-names></name><name><surname>Schwikowski</surname><given-names>B</given-names></name><name><surname>Ideker</surname><given-names>T</given-names></name></person-group><article-title>Cytoscape: a software environment for integrated models of biomolecular interaction networks</article-title><source>Genome Res</source><year>2003</year><volume>13</volume><fpage>2498</fpage><lpage>2504</lpage><pub-id pub-id-type="pmid">14597658</pub-id></element-citation></ref><ref id="CR52"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shehata</surname><given-names>L</given-names></name><name><surname>Simeonov</surname><given-names>DR</given-names></name><name><surname>Raams</surname><given-names>A</given-names></name><name><surname>Wolfe</surname><given-names>L</given-names></name><name><surname>Vanderver</surname><given-names>A</given-names></name><name><surname>Li</surname><given-names>X</given-names></name><name><surname>Huang</surname><given-names>Y</given-names></name><name><surname>Garner</surname><given-names>S</given-names></name><name><surname>Boerkoel</surname><given-names>CF</given-names></name><name><surname>Thurm</surname><given-names>A</given-names></name><etal/></person-group><article-title>ERCC6 dysfunction presenting as progressive neurological decline with brain hypomyelination</article-title><source>Am J Med Genet A</source><year>2014</year><volume>164A</volume><fpage>2892</fpage><lpage>2900</lpage><pub-id pub-id-type="pmid">25251875</pub-id></element-citation></ref><ref id="CR53"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Shimamoto</surname><given-names>A</given-names></name><name><surname>Kagawa</surname><given-names>H</given-names></name><name><surname>Zensho</surname><given-names>K</given-names></name><name><surname>Sera</surname><given-names>Y</given-names></name><name><surname>Kazuki</surname><given-names>Y</given-names></name><name><surname>Osaki</surname><given-names>M</given-names></name><name><surname>Oshimura</surname><given-names>M</given-names></name><name><surname>Ishigaki</surname><given-names>Y</given-names></name><name><surname>Hamasaki</surname><given-names>K</given-names></name><name><surname>Kodama</surname><given-names>Y</given-names></name><etal/></person-group><article-title>Reprogramming suppresses premature senescence phenotypes of Werner syndrome cells and maintains chromosomal stability over long-term culture</article-title><source>PLoS ONE</source><year>2014</year><volume>9</volume><fpage>e112900</fpage><pub-id pub-id-type="pmid">25390333</pub-id></element-citation></ref><ref id="CR54"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Soontararak</surname><given-names>S</given-names></name><name><surname>Chow</surname><given-names>L</given-names></name><name><surname>Johnson</surname><given-names>V</given-names></name><name><surname>Coy</surname><given-names>J</given-names></name><name><surname>Wheat</surname><given-names>W</given-names></name><name><surname>Regan</surname><given-names>D</given-names></name><name><surname>Dow</surname><given-names>S</given-names></name></person-group><article-title>Mesenchymal stem cells (MSC) derived from induced pluripotent stem cells (iPSC) equivalent to adipose-derived MSC in promoting intestinal healing and microbiome normalization in mouse inflammatory bowel disease model</article-title><source>Stem Cells Transl Med</source><year>2018</year><volume>7</volume><fpage>456</fpage><lpage>467</lpage><pub-id pub-id-type="pmid">29635868</pub-id></element-citation></ref><ref id="CR55"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Tsunekawa</surname><given-names>Y</given-names></name><name><surname>Hernandez-Benitez</surname><given-names>R</given-names></name><name><surname>Wu</surname><given-names>J</given-names></name><name><surname>Zhu</surname><given-names>J</given-names></name><name><surname>Kim</surname><given-names>EJ</given-names></name><name><surname>Hatanaka</surname><given-names>F</given-names></name><name><surname>Yamamoto</surname><given-names>M</given-names></name><name><surname>Araoka</surname><given-names>T</given-names></name><name><surname>Li</surname><given-names>Z</given-names></name><etal/></person-group><article-title>In vivo genome editing via CRISPR/Cas9 mediated homology-independent targeted integration</article-title><source>Nature</source><year>2016</year><volume>540</volume><fpage>144</fpage><lpage>149</lpage><pub-id pub-id-type="pmid">27851729</pub-id></element-citation></ref><ref id="CR56"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Szklarczyk</surname><given-names>D</given-names></name><name><surname>Morris</surname><given-names>JH</given-names></name><name><surname>Cook</surname><given-names>H</given-names></name><name><surname>Kuhn</surname><given-names>M</given-names></name><name><surname>Wyder</surname><given-names>S</given-names></name><name><surname>Simonovic</surname><given-names>M</given-names></name><name><surname>Santos</surname><given-names>A</given-names></name><name><surname>Doncheva</surname><given-names>NT</given-names></name><name><surname>Roth</surname><given-names>A</given-names></name><name><surname>Bork</surname><given-names>P</given-names></name><etal/></person-group><article-title>The STRING database in 2017: quality-controlled protein-protein association networks, made broadly accessible</article-title><source>Nucleic Acids Res</source><year>2017</year><volume>45</volume><fpage>D362</fpage><lpage>D368</lpage><pub-id pub-id-type="pmid">27924014</pub-id></element-citation></ref><ref id="CR57"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tacutu</surname><given-names>R</given-names></name><name><surname>Craig</surname><given-names>T</given-names></name><name><surname>Budovsky</surname><given-names>A</given-names></name><name><surname>Wuttke</surname><given-names>D</given-names></name><name><surname>Lehmann</surname><given-names>G</given-names></name><name><surname>Taranukha</surname><given-names>D</given-names></name><name><surname>Costa</surname><given-names>J</given-names></name><name><surname>Fraifeld</surname><given-names>VE</given-names></name><name><surname>de Magalhaes</surname><given-names>JP</given-names></name></person-group><article-title>Human ageing genomic resources: integrated databases and tools for the biology and genetics of ageing</article-title><source>Nucleic Acids Res</source><year>2013</year><volume>41</volume><fpage>D1027</fpage><lpage>D1033</lpage><pub-id pub-id-type="pmid">23193293</pub-id></element-citation></ref><ref id="CR58"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tompkins</surname><given-names>BA</given-names></name><name><surname>DiFede</surname><given-names>DL</given-names></name><name><surname>Khan</surname><given-names>A</given-names></name><name><surname>Landin</surname><given-names>AM</given-names></name><name><surname>Schulman</surname><given-names>IH</given-names></name><name><surname>Pujol</surname><given-names>MV</given-names></name><name><surname>Heldman</surname><given-names>AW</given-names></name><name><surname>Miki</surname><given-names>R</given-names></name><name><surname>Goldschmidt-Clermont</surname><given-names>PJ</given-names></name><name><surname>Goldstein</surname><given-names>BJ</given-names></name><etal/></person-group><article-title>Allogeneic mesenchymal stem cells ameliorate aging frailty: a phase II randomized, double-blind, placebo-controlled clinical trial</article-title><source>J Gerontol A</source><year>2017</year><volume>72</volume><fpage>1513</fpage><lpage>1522</lpage></element-citation></ref><ref id="CR59"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Tripathi</surname><given-names>S</given-names></name><name><surname>Pohl</surname><given-names>MO</given-names></name><name><surname>Zhou</surname><given-names>Y</given-names></name><name><surname>Rodriguez-Frandsen</surname><given-names>A</given-names></name><name><surname>Wang</surname><given-names>G</given-names></name><name><surname>Stein</surname><given-names>DA</given-names></name><name><surname>Moulton</surname><given-names>HM</given-names></name><name><surname>DeJesus</surname><given-names>P</given-names></name><name><surname>Che</surname><given-names>J</given-names></name><name><surname>Mulder</surname><given-names>LC</given-names></name><etal/></person-group><article-title>Meta- and orthogonal integration of influenza &#x0201c;OMICs&#x0201d; data defines a role for UBR4 in virus budding</article-title><source>Cell Host Microbe</source><year>2015</year><volume>18</volume><fpage>723</fpage><lpage>735</lpage><pub-id pub-id-type="pmid">26651948</pub-id></element-citation></ref><ref id="CR60"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Horst</surname><given-names>GT</given-names></name><name><surname>van Steeg</surname><given-names>H</given-names></name><name><surname>Berg</surname><given-names>RJ</given-names></name><name><surname>van Gool</surname><given-names>AJ</given-names></name><name><surname>de Wit</surname><given-names>J</given-names></name><name><surname>Weeda</surname><given-names>G</given-names></name><name><surname>Morreau</surname><given-names>H</given-names></name><name><surname>Beems</surname><given-names>RB</given-names></name><name><surname>van Kreijl</surname><given-names>CF</given-names></name><name><surname>de Gruijl</surname><given-names>FR</given-names></name><etal/></person-group><article-title>Defective transcription-coupled repair in Cockayne syndrome B mice is associated with skin cancer predisposition</article-title><source>Cell</source><year>1997</year><volume>89</volume><fpage>425</fpage><lpage>435</lpage><pub-id pub-id-type="pmid">9150142</pub-id></element-citation></ref><ref id="CR61"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Horst</surname><given-names>GT</given-names></name><name><surname>Meira</surname><given-names>L</given-names></name><name><surname>Gorgels</surname><given-names>TG</given-names></name><name><surname>de Wit</surname><given-names>J</given-names></name><name><surname>Velasco-Miguel</surname><given-names>S</given-names></name><name><surname>Richardson</surname><given-names>JA</given-names></name><name><surname>Kamp</surname><given-names>Y</given-names></name><name><surname>Vreeswijk</surname><given-names>MP</given-names></name><name><surname>Smit</surname><given-names>B</given-names></name><name><surname>Bootsma</surname><given-names>D</given-names></name><etal/></person-group><article-title>UVB radiation-induced cancer predisposition in Cockayne syndrome group A (Csa) mutant mice</article-title><source>DNA Repair</source><year>2002</year><volume>1</volume><fpage>143</fpage><lpage>157</lpage><pub-id pub-id-type="pmid">12509261</pub-id></element-citation></ref><ref id="CR62"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>van der Pluijm</surname><given-names>I</given-names></name><name><surname>Garinis</surname><given-names>GA</given-names></name><name><surname>Brandt</surname><given-names>RM</given-names></name><name><surname>Gorgels</surname><given-names>TG</given-names></name><name><surname>Wijnhoven</surname><given-names>SW</given-names></name><name><surname>Diderich</surname><given-names>KE</given-names></name><name><surname>de Wit</surname><given-names>J</given-names></name><name><surname>Mitchell</surname><given-names>JR</given-names></name><name><surname>van Oostrom</surname><given-names>C</given-names></name><name><surname>Beems</surname><given-names>R</given-names></name><etal/></person-group><article-title>Impaired genome maintenance suppresses the growth hormone&#x02013;insulin-like growth factor 1 axis in mice with Cockayne syndrome</article-title><source>PLoS Biol</source><year>2007</year><volume>5</volume><fpage>e2</fpage><pub-id pub-id-type="pmid">17326724</pub-id></element-citation></ref><ref id="CR63"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Velez-Cruz</surname><given-names>R</given-names></name><name><surname>Egly</surname><given-names>JM</given-names></name></person-group><article-title>Cockayne syndrome group B (CSB) protein: at the crossroads of transcriptional networks</article-title><source>Mech Ageing Dev</source><year>2013</year><volume>134</volume><fpage>234</fpage><lpage>242</lpage><pub-id pub-id-type="pmid">23562425</pub-id></element-citation></ref><ref id="CR64"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Velez-Cruz</surname><given-names>R</given-names></name><name><surname>Zadorin</surname><given-names>AS</given-names></name><name><surname>Coin</surname><given-names>F</given-names></name><name><surname>Egly</surname><given-names>JM</given-names></name></person-group><article-title>Sirt1 suppresses RNA synthesis after UV irradiation in combined xeroderma pigmentosum group D/Cockayne syndrome (XP-D/CS) cells</article-title><source>Proc Natl Acad Sci USA</source><year>2013</year><volume>110</volume><fpage>E212</fpage><lpage>E220</lpage><pub-id pub-id-type="pmid">23267107</pub-id></element-citation></ref><ref id="CR65"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Vessoni</surname><given-names>AT</given-names></name><name><surname>Herai</surname><given-names>RH</given-names></name><name><surname>Karpiak</surname><given-names>JV</given-names></name><name><surname>Leal</surname><given-names>AM</given-names></name><name><surname>Trujillo</surname><given-names>CA</given-names></name><name><surname>Quinet</surname><given-names>A</given-names></name><name><surname>Agnez Lima</surname><given-names>LF</given-names></name><name><surname>Menck</surname><given-names>CF</given-names></name><name><surname>Muotri</surname><given-names>AR</given-names></name></person-group><article-title>Cockayne syndrome-derived neurons display reduced synapse density and altered neural network synchrony</article-title><source>Hum Mol Genet</source><year>2016</year><volume>25</volume><fpage>1271</fpage><lpage>1280</lpage><pub-id pub-id-type="pmid">26755826</pub-id></element-citation></ref><ref id="CR66"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Wang</surname><given-names>X</given-names></name><name><surname>Wu</surname><given-names>Y</given-names></name><name><surname>Han</surname><given-names>C</given-names></name></person-group><article-title>IGF-1R signaling is essential for the proliferation of cultured mouse spermatogonial stem cells by promoting the G2/M progression of the cell cycle</article-title><source>Stem Cells Dev</source><year>2015</year><volume>24</volume><fpage>471</fpage><lpage>483</lpage><pub-id pub-id-type="pmid">25356638</pub-id></element-citation></ref><ref id="CR67"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Wang</surname><given-names>X</given-names></name><name><surname>Ma</surname><given-names>L</given-names></name><name><surname>Lin</surname><given-names>X</given-names></name><name><surname>Zhang</surname><given-names>D</given-names></name><name><surname>Li</surname><given-names>Z</given-names></name><name><surname>Wu</surname><given-names>Y</given-names></name><name><surname>Zheng</surname><given-names>C</given-names></name><name><surname>Feng</surname><given-names>X</given-names></name><name><surname>Liao</surname><given-names>S</given-names></name><etal/></person-group><article-title>Retinoic acid is sufficient for the in vitro induction of mouse spermatocytes</article-title><source>Stem Cell Rep</source><year>2016</year><volume>7</volume><fpage>80</fpage><lpage>94</lpage></element-citation></ref><ref id="CR68"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>LX</given-names></name><name><surname>Yi</surname><given-names>F</given-names></name><name><surname>Fu</surname><given-names>LN</given-names></name><name><surname>Yang</surname><given-names>JP</given-names></name><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Wang</surname><given-names>ZX</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Sun</surname><given-names>L</given-names></name><name><surname>Xu</surname><given-names>XL</given-names></name><name><surname>Yu</surname><given-names>Y</given-names></name><etal/></person-group><article-title>CRISPR/Cas9-mediated targeted gene correction in amyotrophic lateral sclerosis patient iPSCs</article-title><source>Protein Cell</source><year>2017</year><volume>8</volume><fpage>365</fpage><lpage>378</lpage><pub-id pub-id-type="pmid">28401346</pub-id></element-citation></ref><ref id="CR69"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>P</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Zhang</surname><given-names>X</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>Sun</surname><given-names>L</given-names></name><name><surname>Ju</surname><given-names>Z</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>Chan</surname><given-names>P</given-names></name><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><etal/></person-group><article-title>CRISPR/Cas9-mediated gene knockout reveals a guardian role of NF-kappaB/RelA in maintaining the homeostasis of human vascular cells</article-title><source>Protein Cell</source><year>2018</year><volume>9</volume><fpage>945</fpage><lpage>965</lpage><pub-id pub-id-type="pmid">29968158</pub-id></element-citation></ref><ref id="CR70"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Hu</surname><given-names>B</given-names></name><name><surname>Ding</surname><given-names>Z</given-names></name><name><surname>Dang</surname><given-names>Y</given-names></name><name><surname>Wu</surname><given-names>J</given-names></name><name><surname>Li</surname><given-names>D</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Xiao</surname><given-names>B</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><etal/></person-group><article-title>ATF6 safeguards organelle homeostasis and cellular aging in human mesenchymal stem cells</article-title><source>Cell Discov</source><year>2018</year><volume>4</volume><fpage>2</fpage><pub-id pub-id-type="pmid">29423270</pub-id></element-citation></ref><ref id="CR71"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Ye</surname><given-names>Y</given-names></name><name><surname>Li</surname><given-names>B</given-names></name><name><surname>Liu</surname><given-names>T</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Liu</surname><given-names>GH</given-names></name><name><surname>Zhang</surname><given-names>YA</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Xu</surname><given-names>D</given-names></name><etal/></person-group><article-title>Ectopic hTERT expression facilitates reprograming of fibroblasts derived from patients with Werner syndrome as a WS cellular model</article-title><source>Cell Death Dis</source><year>2018</year><volume>9</volume><fpage>923</fpage><pub-id pub-id-type="pmid">30206203</pub-id></element-citation></ref><ref id="CR72"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Wu</surname><given-names>Z</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Song</surname><given-names>M</given-names></name><name><surname>Wang</surname><given-names>W</given-names></name><name><surname>Wei</surname><given-names>G</given-names></name><name><surname>Li</surname><given-names>W</given-names></name><name><surname>Lei</surname><given-names>J</given-names></name><name><surname>Huang</surname><given-names>Y</given-names></name><name><surname>Sang</surname><given-names>Y</given-names></name><name><surname>Chan</surname><given-names>P</given-names></name><etal/></person-group><article-title>Differential stem cell aging kinetics in Hutchinson&#x02013;Gilford progeria syndrome and Werner syndrome</article-title><source>Protein Cell</source><year>2018</year><volume>9</volume><fpage>333</fpage><lpage>350</lpage><pub-id pub-id-type="pmid">29476423</pub-id></element-citation></ref><ref id="CR73"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yamada</surname><given-names>A</given-names></name><name><surname>Masutani</surname><given-names>C</given-names></name><name><surname>Hanaoka</surname><given-names>F</given-names></name></person-group><article-title>Detection of reduced RNA synthesis in UV-irradiated Cockayne syndrome group B cells using an isolated nuclear system</article-title><source>Biochim Biophys Acta</source><year>2002</year><volume>1592</volume><fpage>129</fpage><lpage>134</lpage><pub-id pub-id-type="pmid">12379475</pub-id></element-citation></ref><ref id="CR74"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yan</surname><given-names>P</given-names></name><name><surname>Li</surname><given-names>Q</given-names></name><name><surname>Wang</surname><given-names>L</given-names></name><name><surname>Lu</surname><given-names>P</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Lei</surname><given-names>J</given-names></name><name><surname>Li</surname><given-names>W</given-names></name><name><surname>He</surname><given-names>X</given-names></name><name><surname>Wang</surname><given-names>S</given-names></name><etal/></person-group><article-title>FOXO3-engineered human ESC-derived vascular cells Promote vascular protection and regeneration</article-title><source>Cell Stem Cell</source><year>2019</year><pub-id pub-id-type="doi">10.1016/j.stem.2018.12.002</pub-id><pub-id pub-id-type="pmid">31173712</pub-id></element-citation></ref><ref id="CR75"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yang</surname><given-names>J</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Wu</surname><given-names>J</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Chan</surname><given-names>P</given-names></name><name><surname>Izpisua Belmonte</surname><given-names>JC</given-names></name><etal/></person-group><article-title>Genetic enhancement in cultured human adult stem cells conferred by a single nucleotide recoding</article-title><source>Cell Res</source><year>2017</year><volume>27</volume><fpage>1178</fpage><lpage>1181</lpage><pub-id pub-id-type="pmid">28685772</pub-id></element-citation></ref><ref id="CR76"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Yu</surname><given-names>QC</given-names></name><name><surname>Song</surname><given-names>W</given-names></name><name><surname>Wang</surname><given-names>D</given-names></name><name><surname>Zeng</surname><given-names>YA</given-names></name></person-group><article-title>Identification of blood vascular endothelial stem cells by the expression of protein C receptor</article-title><source>Cell Res</source><year>2016</year><volume>26</volume><fpage>1079</fpage><lpage>1098</lpage><pub-id pub-id-type="pmid">27364685</pub-id></element-citation></ref><ref id="CR77"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Li</surname><given-names>J</given-names></name><name><surname>Suzuki</surname><given-names>K</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Wang</surname><given-names>P</given-names></name><name><surname>Zhou</surname><given-names>J</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Xu</surname><given-names>X</given-names></name><name><surname>Ocampo</surname><given-names>A</given-names></name><etal/></person-group><article-title>Aging stem cells. A Werner syndrome stem cell model unveils heterochromatin alterations as a driver of human aging</article-title><source>Science</source><year>2015</year><volume>348</volume><fpage>1160</fpage><lpage>1163</lpage><pub-id pub-id-type="pmid">25931448</pub-id></element-citation></ref><ref id="CR78"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>W</given-names></name><name><surname>Wan</surname><given-names>H</given-names></name><name><surname>Feng</surname><given-names>G</given-names></name><name><surname>Qu</surname><given-names>J</given-names></name><name><surname>Wang</surname><given-names>J</given-names></name><name><surname>Jing</surname><given-names>Y</given-names></name><name><surname>Ren</surname><given-names>R</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Zhang</surname><given-names>L</given-names></name><name><surname>Chen</surname><given-names>Z</given-names></name><etal/></person-group><article-title>SIRT6 deficiency results in developmental retardation in cynomolgus monkeys</article-title><source>Nature</source><year>2018</year><volume>560</volume><fpage>661</fpage><lpage>665</lpage><pub-id pub-id-type="pmid">30135584</pub-id></element-citation></ref><ref id="CR79"><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>X</given-names></name><name><surname>Liu</surname><given-names>Z</given-names></name><name><surname>Liu</surname><given-names>X</given-names></name><name><surname>Wang</surname><given-names>S</given-names></name><name><surname>Zhang</surname><given-names>Y</given-names></name><name><surname>He</surname><given-names>X</given-names></name><name><surname>Sun</surname><given-names>S</given-names></name><name><surname>Ma</surname><given-names>S</given-names></name><name><surname>Shyh-Chang</surname><given-names>N</given-names></name><name><surname>Liu</surname><given-names>F</given-names></name><etal/></person-group><article-title>Telomere-dependent and telomere-independent roles of RAP1 in regulating human stem cell homeostasis</article-title><source>Protein Cell</source><year>2019</year><pub-id pub-id-type="doi">10.1007/s13238-019-0610-7</pub-id><pub-id pub-id-type="pmid">31781970</pub-id></element-citation></ref></ref-list></back></article>
diff --git a/jcore-pmc-db-reader/src/test/resources/testdocs/PMC7511315.xml b/jcore-pmc-db-reader/src/test/resources/testdocs/PMC7511315.xml
new file mode 100644
index 000000000..b28626ba1
--- /dev/null
+++ b/jcore-pmc-db-reader/src/test/resources/testdocs/PMC7511315.xml
@@ -0,0 +1,28 @@
+<!DOCTYPE article
+PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Archiving and Interchange DTD with MathML3 v1.3 20210610//EN" "JATS-archivearticle1-3-mathml3.dtd">
+<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.3"><?properties open_access?><processing-meta base-tagset="archiving" mathml-version="3.0" table-model="xhtml" tagset-family="jats"><restricted-by>pmc</restricted-by></processing-meta><front><journal-meta><journal-id journal-id-type="nlm-ta">Nat Commun</journal-id><journal-id journal-id-type="iso-abbrev">Nat Commun</journal-id><journal-title-group><journal-title>Nature Communications</journal-title></journal-title-group><issn pub-type="epub">2041-1723</issn><publisher><publisher-name>Nature Publishing Group UK</publisher-name><publisher-loc>London</publisher-loc></publisher></journal-meta><article-meta><article-id pub-id-type="pmid">32968055</article-id><article-id pub-id-type="pmc">PMC7511315</article-id><article-id pub-id-type="publisher-id">18396</article-id><article-id pub-id-type="doi">10.1038/s41467-020-18396-7</article-id><article-categories><subj-group subj-group-type="heading"><subject>Article</subject></subj-group></article-categories><title-group><article-title>Transcriptomic profiling of human cardiac cells predicts protein kinase inhibitor-associated cardiotoxicity</article-title></title-group><contrib-group><contrib contrib-type="author" equal-contrib="yes"><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0002-1664-7314</contrib-id><name><surname>van Hasselt</surname><given-names>J. G. Coen</given-names></name><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff2">2</xref></contrib><contrib contrib-type="author" equal-contrib="yes"><name><surname>Rahman</surname><given-names>Rayees</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0002-1362-6534</contrib-id><name><surname>Hansen</surname><given-names>Jens</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Stern</surname><given-names>Alan</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Shim</surname><given-names>Jaehee V.</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Xiong</surname><given-names>Yuguang</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Pickard</surname><given-names>Amanda</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Jayaraman</surname><given-names>Gomathi</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Hu</surname><given-names>Bin</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Mahajan</surname><given-names>Milind</given-names></name><xref ref-type="aff" rid="Aff3">3</xref></contrib><contrib contrib-type="author"><name><surname>Gallo</surname><given-names>James M.</given-names></name><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff4">4</xref></contrib><contrib contrib-type="author"><name><surname>Goldfarb</surname><given-names>Joseph</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><name><surname>Sobie</surname><given-names>Eric A.</given-names></name><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0002-0341-0705</contrib-id><name><surname>Birtwistle</surname><given-names>Marc R.</given-names></name><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff5">5</xref></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0003-4007-7814</contrib-id><name><surname>Schlessinger</surname><given-names>Avner</given-names></name><address><email>avner.schlessinger@mssm.edu</email></address><xref ref-type="aff" rid="Aff1">1</xref></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0001-6137-109X</contrib-id><name><surname>Azeloglu</surname><given-names>Evren U.</given-names></name><address><email>evren.azeloglu@mssm.edu</email></address><xref ref-type="aff" rid="Aff1">1</xref><xref ref-type="aff" rid="Aff6">6</xref></contrib><contrib contrib-type="author" corresp="yes"><contrib-id contrib-id-type="orcid">http://orcid.org/0000-0002-7814-0180</contrib-id><name><surname>Iyengar</surname><given-names>Ravi</given-names></name><address><email>ravi.iyengar@mssm.edu</email></address><xref ref-type="aff" rid="Aff1">1</xref></contrib><aff id="Aff1"><label>1</label><institution-wrap><institution-id institution-id-type="GRID">grid.59734.3c</institution-id><institution-id institution-id-type="ISNI">0000 0001 0670 2351</institution-id><institution>Department of Pharmacological Sciences and Systems Biology Center New York, </institution><institution>Icahn School of Medicine at Mount Sinai, </institution></institution-wrap>New York, NY USA </aff><aff id="Aff2"><label>2</label><institution-wrap><institution-id institution-id-type="GRID">grid.5132.5</institution-id><institution-id institution-id-type="ISNI">0000 0001 2312 1970</institution-id><institution>Division of Systems Biomedicine and Pharmacology, Leiden Academic Centre for Drug Research, Leiden University, </institution></institution-wrap>Leiden, Netherlands </aff><aff id="Aff3"><label>3</label><institution-wrap><institution-id institution-id-type="GRID">grid.59734.3c</institution-id><institution-id institution-id-type="ISNI">0000 0001 0670 2351</institution-id><institution>Department of Genetics and Genomic Sciences, and Icahn Institute for Genomic Sciences and Multiscale Biology, </institution><institution>Icahn School of Medicine at Mount Sinai, </institution></institution-wrap>New York, NY USA </aff><aff id="Aff4"><label>4</label><institution-wrap><institution-id institution-id-type="GRID">grid.273335.3</institution-id><institution-id institution-id-type="ISNI">0000 0004 1936 9887</institution-id><institution>Department of Pharmaceutical Sciences, </institution><institution>School of Pharmacy and Pharmaceutical Sciences, University at Buffalo, </institution></institution-wrap>Buffalo, NY USA </aff><aff id="Aff5"><label>5</label><institution-wrap><institution-id institution-id-type="GRID">grid.26090.3d</institution-id><institution-id institution-id-type="ISNI">0000 0001 0665 0280</institution-id><institution>Department of Chemical and Biomolecular Engineering, </institution><institution>Clemson University, </institution></institution-wrap>Clemson, SC USA </aff><aff id="Aff6"><label>6</label><institution-wrap><institution-id institution-id-type="GRID">grid.59734.3c</institution-id><institution-id institution-id-type="ISNI">0000 0001 0670 2351</institution-id><institution>Deparment of Medicine, </institution><institution>Division of Nephrology, Icahn School of Medicine at Mount Sinai, </institution></institution-wrap>New York, NY USA </aff></contrib-group><pub-date pub-type="epub"><day>23</day><month>9</month><year>2020</year></pub-date><pub-date pub-type="pmc-release"><day>23</day><month>9</month><year>2020</year></pub-date><pub-date pub-type="collection"><year>2020</year></pub-date><volume>11</volume><elocation-id>4809</elocation-id><history><date date-type="received"><day>1</day><month>2</month><year>2017</year></date><date date-type="accepted"><day>18</day><month>8</month><year>2020</year></date></history><permissions><copyright-statement>&#x000a9; The Author(s) 2020</copyright-statement><license><ali:license_ref specific-use="textmining" content-type="ccbylicense">https://creativecommons.org/licenses/by/4.0/</ali:license_ref><license-p><bold>Open Access</bold> This article is licensed under a Creative Commons Attribution 4.0 International License, which permits use, sharing, adaptation, distribution and reproduction in any medium or format, as long as you give appropriate credit to the original author(s) and the source, provide a link to the Creative Commons license, and indicate if changes were made. The images or other third party material in this article are included in the article&#x02019;s Creative Commons license, unless indicated otherwise in a credit line to the material. If material is not included in the article&#x02019;s Creative Commons license and your intended use is not permitted by statutory regulation or exceeds the permitted use, you will need to obtain permission directly from the copyright holder. To view a copy of this license, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">http://creativecommons.org/licenses/by/4.0/</ext-link>.</license-p></license></permissions><abstract id="Abs1"><p id="Par1">Kinase inhibitors (KIs) represent an important class of anti-cancer drugs. Although cardiotoxicity is a serious adverse event associated with several KIs, the reasons remain poorly understood, and its prediction remains challenging. We obtain transcriptional profiles of human heart-derived primary cardiomyocyte like cell lines treated with a panel of 26 FDA-approved KIs and classify their effects on subcellular pathways and processes. Individual cardiotoxicity patient reports for these KIs, obtained from the FDA Adverse Event Reporting System, are used to compute relative risk scores. These are then combined with the cell line-derived transcriptomic datasets through elastic net regression analysis to identify a gene signature that can predict risk of cardiotoxicity. We also identify relationships between cardiotoxicity risk and structural/binding profiles of individual KIs. We conclude that acute transcriptomic changes in cell-based assays combined with drug substructures are predictive of KI-induced cardiotoxicity risk, and that they can be informative for future drug discovery.</p></abstract><abstract id="Abs2" abstract-type="web-summary"><p id="Par2">Cardiotoxic adverse events associated with kinase inhibitors are a growing concern in clinical oncology. Here the authors use cellular transcriptomic responses of human cardiomyocytes treated with protein kinase inhibitors and the associated drug structural signatures to determine an integrated predictive signature of cardiotoxicity.</p></abstract><kwd-group kwd-group-type="npg-subject"><title>Subject terms</title><kwd>Toxicology</kwd><kwd>Predictive markers</kwd><kwd>Cardiology</kwd></kwd-group><funding-group><award-group><funding-source><institution-wrap><institution-id institution-id-type="FundRef">https://doi.org/10.13039/100000051</institution-id><institution>U.S. Department of Health &#x00026; Human Services | NIH | National Human Genome Research Institute (NHGRI)</institution></institution-wrap></funding-source><award-id>U54HG008098</award-id><principal-award-recipient><name><surname>Iyengar</surname><given-names>Ravi</given-names></name></principal-award-recipient></award-group></funding-group><custom-meta-group><custom-meta><meta-name>issue-copyright-statement</meta-name><meta-value>&#x000a9; The Author(s) 2020</meta-value></custom-meta></custom-meta-group></article-meta></front><body><sec id="Sec1" sec-type="introduction"><title>Introduction</title><p id="Par3">Protein kinase inhibitors (KIs) are an important class of therapeutics used for the treatment of various forms of cancer<sup><xref ref-type="bibr" rid="CR1">1</xref>,<xref ref-type="bibr" rid="CR2">2</xref></sup> and other diseases. There are currently more than 48 KIs approved for clinical use by the U.S. Food and Drug Administration (FDA)&#x000a0;and other regulatory agencies<sup><xref ref-type="bibr" rid="CR3">3</xref></sup>, and more than 250 KIs are undergoing clinical trials or are in development<sup><xref ref-type="bibr" rid="CR4">4</xref>&#x02013;<xref ref-type="bibr" rid="CR6">6</xref></sup>. The clinical effectiveness of KIs as cancer drugs has led to a broad effort to develop drugs that are more efficacious and have reduced the propensity for adverse events. Cardiotoxicity (CT) is a clinically important adverse event associated with several KIs<sup><xref ref-type="bibr" rid="CR7">7</xref>&#x02013;<xref ref-type="bibr" rid="CR10">10</xref></sup>. KI-associated CT manifests as loss of cardiomyocyte function, which can lead to heart failure<sup><xref ref-type="bibr" rid="CR11">11</xref></sup>. Given the extensive therapeutic potential of KIs, approaches to identify and subsequently mitigate the risk for CT during early development of novel KIs and during clinical administration are urgently required.</p><p id="Par4">We do not yet sufficiently understand the mechanisms underlying KI-associated CT. The human kinome consists of more than 500 protein kinases<sup><xref ref-type="bibr" rid="CR12">12</xref></sup>. Given that many KIs exhibit multitarget pharmacology<sup><xref ref-type="bibr" rid="CR13">13</xref></sup>, inhibition of multiple protein kinases in cardiomyocytes may lead to adverse drug effects such as CT<sup><xref ref-type="bibr" rid="CR14">14</xref></sup>. For individual KIs, pathways involved in mitochondrial function<sup><xref ref-type="bibr" rid="CR8">8</xref>,<xref ref-type="bibr" rid="CR15">15</xref>,<xref ref-type="bibr" rid="CR16">16</xref></sup>, endoplasmic reticulum stress response<sup><xref ref-type="bibr" rid="CR16">16</xref></sup>, and AMPK inhibition<sup><xref ref-type="bibr" rid="CR17">17</xref></sup>, have been shown to be associated with KI-induced CT<sup><xref ref-type="bibr" rid="CR18">18</xref></sup>. Overall, however, the general mechanisms of KI-induced CT are still poorly understood<sup><xref ref-type="bibr" rid="CR18">18</xref></sup>.</p><p id="Par5">Obtaining quantitative clinical risk scores for KI-associated CT is also challenging, as the risk for KI-associated CT has not been systematically studied. The FDA adverse event report system (FAERS) database has been previously applied to quantify the risk of ADRs<sup><xref ref-type="bibr" rid="CR19">19</xref>&#x02013;<xref ref-type="bibr" rid="CR21">21</xref></sup>. The FAERS database contains over 9 million individual drug-associated adverse-event reports reported by industry and physicians. Through statistical analyses of the FAERS database, relatively unbiased estimates for the relative risk for specific ADRs can be computed. Such risk scores are clinically relevant as they are based on real-life patient population, and they are not solely based on selected patient cohorts. We previously used such analyses of the FAERS database in combination with systems&#x02019; pharmacology-based approaches to obtain mechanistic insights into adverse-event mechanisms<sup><xref ref-type="bibr" rid="CR21">21</xref>,<xref ref-type="bibr" rid="CR22">22</xref></sup>.</p><p id="Par6">In the current study, generated as part of the NIH-funded Library of Integrated Network Based Cellular Signatures (LINCS) Drug Toxicity Signature Generation Center (DToxS), we take a top&#x02013;down global approach to determine if a comprehensive profiling of gene expression changes in human cardiomyocytes can provide insight into pathways associated with KI-induced CT, and to potentially predict the risk of CT. The rationale for this approach is based on the central assumption that CT largely originates from cardiomyocytes where one or more protein kinases contribute to the pathophysiology. Since progression to heart failure takes several months to manifest, it is not immediately obvious if gene expression changes measured after drug treatment for a few days would have predictive value. Thus, a second important assumption is that early changes in gene expression upon drug treatment of cardiomyocytes are indicative of later physiological events. We test the validity of our assumptions by experimentally obtaining gene-expression patterns for the different KIs, and if these patterns could be selectively associated with the clinical risk of CT for each KI, thereby providing gene-expression signatures for KI-associated CT.</p><p id="Par7">We report the generation of transcriptomic profiles from four human primary cardiomyocyte-like cell lines. These profiles are generated using 23 KIs that were FDA-approved and used extensively at the time of experimental design, such that an adequate number of clinical reports have been collected. Drugs are used at their imputed therapeutic concentrations. Through this pan-KI transcriptomic profiling, we obtained insights into the affected pathways that may be related to KI-associated CT. We show that selective patterns of gene expression can be associated with the FAERS-derived clinical risk for KI-associated CT, which may be highly relevant to identify KI drug candidates at risk for showing clinical CT. We also describe the relationships between KI CT risk and structural properties of KIs, highlighting the potential for re-engineering small molecules that exhibit a high risk for CT.</p></sec><sec id="Sec2" sec-type="results"><title>Results</title><sec id="Sec3"><title>Differences in CT risk of kinase inhibitors</title><p id="Par8">In order to obtain unbiased estimates of clinical risk of KI-associated CT, we analyzed individual adverse-event reporting data from FAERS (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1a</xref>). Reporting odds ratios (RORs) were derived based on the relative frequencies of AE occurrence of each KI compared to all KIs. These risk scores provide a relative ranking of KI-associated toxicity. Kinase inhibitors were shown to have pronounced differences in the relative risk of CT (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1b</xref>). When comparing the ranking of risk scores derived from FAERS with adverse drug-reaction (ADR) reporting data from the World Health Organization (WHO) ADR reporting database, we find that the ranking from these databases largely agrees (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1c</xref>), indicating the general consistency of the clinical risk scores across databases.<fig id="Fig1"><label>Fig. 1</label><caption><title>Cardiotoxicity of protein kinase inhibitors.</title><p><bold>a</bold> Approach to quantify relative clinical cardiotoxicity risk scores for kinase inhibitors from the FDA Adverse Event Reporting System (FAERS) database. <bold>b</bold> Reporting odds ratio (mean and 95% confidence interval of computed odds ratio) for cardiotoxicity across kinase inhibitors from FAERS. <bold>c</bold> Comparison of ranking derived from FAERS and WHO Pharmacovigilance data shows agreement. <bold>d</bold> Literature-reported in vitro and in vivo preclinical assays to predict KI-associated cardiotoxicity poorly correlated with clinical FAERS-derived risk scores for cardiotoxicity at clinical drug concentrations. <bold>e</bold> In vitro dose&#x02013;response experiments for selected KIs for viability and mitochondrial stress poorly correlate with clinical FAERS-derived risk scores for cardiotoxicity. Source data are provided in source data file.</p></caption><graphic xlink:href="41467_2020_18396_Fig1_HTML" id="d32e544"/></fig></p></sec><sec id="Sec4"><title>Phenotypic assays poorly correlate with CT</title><p id="Par9">We performed a literature review for in vitro and in vivo experimental datasets that aimed to predict CT risk based on phenotypic readouts, such as cell viability or beating rate from in vitro cardiomyocyte or animal models, to determine if such phenotypic experiments can predict the clinical risk scores for CT. Studies in which drugs at the clinical concentration induced more than a 20% change in various phenotypic readouts compared to control experiments were classified as predicting potential CT (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1d</xref>). Across these studies, it was apparent that there was no identifiable relationship between apparent experimental toxicity in comparison to the relative incidence of CT in patients as derived from FAERS.</p><p id="Par10">We conducted dose&#x02013;response experiments with selected KIs that had varying risks for CT using the cardiomyocyte cell lines that were used in the current study for transcriptomic profiling, quantifying cell viability, and mitochondrial stress after 48&#x02009;h of exposure to the selected KIs. We again assessed if drugs caused more than a 20% change in cell viability and mitochondrial stress at the typical clinically used concentration (Supplementary Table&#x000a0;<xref rid="MOESM1" ref-type="media">1</xref>). These studies showed a similar lack of correlation with clinical risk (Fig.&#x000a0;<xref rid="Fig1" ref-type="fig">1e</xref>, Supplementary Fig.&#x000a0;<xref rid="MOESM1" ref-type="media">1</xref>). These findings underscore the need for alternative approaches such as early molecular signatures for CT. This identified lack of the predictiveness of preclinical in vitro and in vivo phenotypic assays, as has been noted by others<sup><xref ref-type="bibr" rid="CR7">7</xref></sup>.</p></sec><sec id="Sec5"><title>Transcriptomic profiling of human primary cardiomyocyte-like cell lines</title><p id="Par11">To study the transcriptomic response to KIs associated with CT, we obtained four primary cardiomyocyte lines that were isolated from ventricles of healthy adult human heart (two male and two female, PromoCell GmbH, Germany). Culture conditions, detailed phenotypic characterization of each cell line, including gene and protein expression, morphology, and functional assays, can be found on the DToxS Center website (<ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>) under the &#x0201c;Cellular Metadata&#x0201d; section.</p><p id="Par12">Confluent cardiomyocyte-like cells were treated with drugs for 48&#x02009;h at concentrations similar to their clinical concentration (Supplementary Table&#x000a0;<xref rid="MOESM1" ref-type="media">1</xref>) with 3&#x02013;4 replicates and 3&#x02013;4 cell lines (Supplementary Table&#x000a0;<xref rid="MOESM1" ref-type="media">2</xref>), after which RNA was extracted and sequenced using the 3&#x02032; digital gene-expression method<sup><xref ref-type="bibr" rid="CR23">23</xref></sup> (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2a</xref>).<fig id="Fig2"><label>Fig. 2</label><caption><title>Overview of pan-KI transcriptomic profiling in human primary cardiomyocyte-like cells.</title><p><bold>a</bold> Overview of experimental approach to generate transcriptomic data. For each drug, genes were ranked by absolute mean fold-change gene-expression value across replicates (&#x0003e;3 biological replicates) and cell lines (a total of 1309 experiments), and the top 250 genes for each KI were kept. Information about the total number of replicates can be found in the source data file. <bold>b</bold> Jaccard similarity of gene-expression signature of PromoCell cardiomyocyte cell lines (102 samples) to gene-expression signatures of tissues available in the GTeX database (17,382 total samples). Boxplot whiskers refer to the upper and lower quartile of all pairwise Jaccard coefficients between each sample, within each tissue type. Information about each boxplot&#x02019;s sample size, minima, maxima, and center is provided in the source data file. <bold>c</bold> Heatmap depicting the Jaccard index that indicates the magnitude of similarity in top-ranking differentially expressed genes for all KI pairs. <bold>d</bold> First three principal components (PCs) based on full mean fold-change gene-expression profiles across KIs. Source data are provided in source data file.</p></caption><graphic xlink:href="41467_2020_18396_Fig2_HTML" id="d32e614"/></fig></p><p id="Par13">We investigated if transcriptomic profiles of PromoCell cardiomyocytes are related to human heart tissue and hence a good model to study CT. We compared the gene-expression similarity of untreated PromoCell cardiomyocytes against tissues available in the Genotype-Tissue Expression (GTEx) project, which contains gene-expression data from many human tissues, including the heart (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2b</xref>)<sup><xref ref-type="bibr" rid="CR24">24</xref></sup>. Using the Jaccard distance for the top expressed 250 genes (based on transcript per million counts) for both untreated PromoCell and GTEx tissues, we observe that PromoCell cardiomyocytes&#x02019; expression exhibits a gene expression similar to blood (rank 2), muscle (rank 4), and heart (rank 10) tissue. Based on these results, we conclude that the PromoCell cardiomyocytes can offer comparable gene-expression changes to that of cardiomyocytes.</p></sec><sec id="Sec6"><title>Limited overlap in differentially expressed genes across KIs</title><p id="Par14">Differential gene-expression fold-change values were computed across the four cell lines. Initial analyses showed that the DEGs generally clustered more strongly by drugs than by cells. We calculated median fold-change values for each KI across cell lines, resulting in a single gene- expression profile for each KI. Ranked gene lists for each KI were generated by ranking by differential gene-expression <italic>p</italic> value and keeping the top 250 genes. To assess the similarity between genes present in the top 250 genes for each KI, the Jaccard index was calculated for each ranked list of KI-specific genes, which indicated a limited overlap (&#x0003c;0.25) between the top 250 genes across KIs (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2c</xref>). Principal component analysis showed variable gene-expression patterns for nine KIs, while for the remaining KIs, little variation in gene expression was seen (Fig.&#x000a0;<xref rid="Fig2" ref-type="fig">2d</xref>), even though these remaining KIs included drugs for which CT is well established. We concluded that ranked differential gene-expression values would not be sufficient to provide clear insights into gene-expression profiles associated with CT.</p></sec><sec id="Sec7"><title>Pathways correlated with KI-associated CT</title><p id="Par15">To identify pathways and subcellular processes across KIs and their potential involvement with CT, we performed enrichment analysis for protein kinases and KEGG terms using the top 250 differentially expressed genes ranked by <italic>p</italic> value across cell lines and KIs. We then correlated <italic>p</italic> values of enriched terms with clinical FAERS-derived risk scores to identify potential kinases and pathways associated with CT risk (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3a</xref>). The protein kinase LIMK2, which is involved in actin cytoskeleton reorganization pathways, ranked the highest in its correlation specifically enriched for KIs with a higher risk score (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3b</xref>). Sucrose- and pyruvate-metabolism pathways were the most strongly enriched pathways correlating with high risk scores (Fig.&#x000a0;<xref rid="Fig3" ref-type="fig">3c</xref>). However, since no directionality in pathways is considered in these enrichment analyses, both the positively and negatively correlated processes may play a role in the development of CT. When considering enriched protein kinases and KEGG processes across all KIs without considering correlation to CT risk, multiple pathways were identified (Supplementary Fig.&#x000a0;<xref rid="MOESM1" ref-type="media">2</xref>). These findings indicate that there is likely substantial complexity underlying the action of KI in cardiomyocytes, although currently these analyses remain correlational and do not offer proof of causal relationships.<fig id="Fig3"><label>Fig. 3</label><caption><title>Analysis of transcriptomic profiling data in relation to cardiotoxicity risk.</title><p><bold>a</bold> Flowchart indicating ranked lists of top 250 differentially expressed genes ranked by p value for each kinase inhibitor across cell lines from the transcriptomic cardiomyocyte profiling, which were then enriched and subsequently related to clinical cardiotoxicity risk scores. Enriched kinases (<bold>b</bold>) and enriched KEGG pathways (<bold>c</bold>) (<italic>p</italic>&#x02009;&#x0003c;&#x02009;0.05) that show a correlation coefficient&#x02009;&#x0003e;&#x02009;|0.25| with cardiotoxicity risk scores and the associated enrichment <italic>p</italic> values. Source data are provided in source data file.</p></caption><graphic xlink:href="41467_2020_18396_Fig3_HTML" id="d32e686"/></fig></p></sec><sec id="Sec8"><title>Transcriptomic signature to predict CT risk</title><p id="Par16">We tested if our KI-wide fold-change gene-expression profiles correlated with the KI-specific clinical risk scores for CT to identify a predictive transcriptomic signature for CT risk. Given the limited similarity between top-ranking gene-expression profiles across KIs, the entirety of the gene- expression profiles for different KIs were considered as potential predictors for KI-associated CT risk. KI-specific expression profiles of 10,749 genes were available as potential predictors for KI-specific CT risk scores. To identify genes most strongly associated with CT risk, we used an elastic net-penalized regression approach, which aims to select the most predictive variables while avoiding overfitting<sup><xref ref-type="bibr" rid="CR25">25</xref></sup>.</p><p id="Par17">A two-stage regression analysis was performed (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4a</xref>). From the available 23 KIs with the associated clinical CT risk scores, we randomly left out 2 KIs for external validation of the model (test set, 10% of data). The differential gene-expression profiles of 21 remaining KIs were then used to train the model. Given the limited number of available drugs, small changes in expression patterns for drug were expected to affect the identity of the overall set of predictor genes. Therefore, we generated bootstrap datasets by random resampling of KI risk and the associated gene-expression profiles. These bootstrapped datasets were then fit using elastic net models. This first step was performed to identify gene-based predictors that could consistently predict CT risk and contributed significantly to the prediction of this risk. The bootstrap analysis resulted in stable selection of potential predictors. Predictors to be included in the final elastic net regression model were selected based on their minimal root-mean-squared prediction error (RMSE) after cross- validation. Based on this cross-validation, the gene-expression-based predictors in the final elastic net models consisted of 26 genes with the associated variable importance values (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4b</xref>).<fig id="Fig4"><label>Fig. 4</label><caption><title>Regression analysis for transcriptomic signatures to predict clinical risk.</title><p><bold>a</bold> Overview of processing and elastic net regression analysis of transcriptomic data in combination with FAERS-derived clinical risk scores. <bold>b</bold> Transcriptomic signature genes selected to predict cardiotoxicity risk score indicating their variable importance. <bold>c</bold> Observed and predicted risk scores from the elastic net cross- validation analysis (mean and standard deviation). <bold>d</bold> External validation of the signature for six kinase inhibitors: regorafenib (REG), sunitinib (SUN), ibrutinib (IBR), lenvatinib (LEN), nintendinib (NIN), and ceritinib (CER).</p></caption><graphic xlink:href="41467_2020_18396_Fig4_HTML" id="d32e725"/></fig></p><p id="Par18">Repeated cross-validation analyses indicated good predictive performance of the model for left-out KIs (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4c</xref>). We evaluated our 26-gene signature for predicting CT risk on an independent validation set of six KIs, of which three KIs were previously untested (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4d</xref>). We note that the independent validation set was performed 1 year after the original signatures were generated, using a different experimental protocol for the transcriptomic assay that was based on mRNA detection using random primers. We observed accurate predictive performance for five out of six KIs tested. The outlier, ibrutinib, had the lowest, albeit acceptable, predictive performance, with an error of 0.493 between the predicted and observed risk scores. Taken together, the developed signature can be of relevance to support risk prioritization of newly developed KIs. When we tested which of the 21 KIs drove the prediction strength of the model, we found that excluding any of four low-CT risk drugs (cabozantinib, tofacitinib, pazopanib, and erlotinib) increased the error substantially, indicating that these KIs contribute distinct information to the signature. In contrast, several of the high-ranking CT drugs could be excluded without sacrificing accuracy (Supplementary Fig.&#x000a0;<xref rid="MOESM1" ref-type="media">3</xref>).</p><p id="Par19">We then used the 26-gene signature to construct a protein&#x02013;protein interaction network analysis to identify protein kinases and transcription factors associated with the signature (Supplementary Fig.&#x000a0;<xref rid="MOESM1" ref-type="media">4</xref>). Several protein kinases were retrieved that are both known targets of the studied KIs, and which may be associated with the occurrence of KI-induced CT.</p></sec><sec id="Sec9"><title>Chemical structures of KIs inform CT risk</title><p id="Par20">Off-target binding or polypharmacology is commonly observed in KIs<sup><xref ref-type="bibr" rid="CR23">23</xref></sup>. Since off-target binding is dependent on the structure of the drug, we investigated the relationship between kinase inhibitor chemical structure, binding target profile, and CT risk. To do this, we generated a structure&#x02013;activity&#x02013;similarity (SAS) map of the 26 tested inhibitors (in both the training and validation set) and their CT-risk score (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5A</xref>)<sup><xref ref-type="bibr" rid="CR26">26</xref></sup>. SAS maps can be divided into four quadrants: the upper-left quadrant shows KI pairs with low chemical similarity and large changes in CT risk. The lower-left quadrant describes largely dissimilar KI pairs with small changes in CT risk. The lower-right quadrant describes KI pairs that exhibit a &#x0201c;smooth&#x0201d; structure&#x02013;activity relationship, that is, small changes in chemical similarity are associated with small changes in CT risk. Finally, the upper-right quadrant indicates highly chemically similar compounds with large changes in CT risk.<fig id="Fig5"><label>Fig. 5</label><caption><title>Structure&#x02013;activity&#x02013;similarity (SAS) maps of kinase inhibitor activity and cardiotoxicity.</title><p><bold>a</bold> A SAS map relating pairwise chemical similarity measured by Tanimoto coefficient (Tc) derived from a weighted average of 4 chemical fingerprints (ECFP4, ECFP2, Daylight, and MACCS), between pairs of 26 kinase inhibitors (Table&#x000a0;<xref rid="Tab1" ref-type="table">1</xref>) and their difference in cardiotoxicity scores (DCS). The threshold for chemical similarity was the top 10% value in the distribution of Tc values: 0.38. The threshold value for DCS was half of the maximum DCS score: 0.82. <bold>b</bold> Highlighted chemical scaffolds for distinct kinase inhibitors observed in the upper- and lower-right regions. <bold>c</bold> Binding profile of kinase inhibitors based on data from Klaeger et al.<sup><xref ref-type="bibr" rid="CR5">5</xref></sup>. Kinase inhibitors were hierarchically clustered based on chemical similarity, and kinase inhibitors are annotated by their binding mode (e.g., type I, type I1/2, type II, type III, type IV, or type VI)<sup><xref ref-type="bibr" rid="CR6">6</xref></sup>. <bold>d</bold> Kinase inhibitor selectivity scores at 500&#x02009;nM <italic>K</italic><sub>d</sub>. <bold>e</bold> Observed cardiotoxicity risk scores were normalized to zero and ordered based on hierarchical clustering of the kinase inhibitors. <bold>f</bold> Predicted cardiotoxicity risk scores were normalized to zero and ordered based on hierarchical clustering of the kinase inhibitors. <bold>g</bold> Absolute error from observed and predicted cardiotoxicity risk scores. Source data are provided in source data file.</p></caption><graphic xlink:href="41467_2020_18396_Fig5_HTML" id="d32e805"/></fig></p><p id="Par21">KI pairs in the upper-right region represent activity cliffs, that is, that small changes in chemical structure are associated with large changes in CT risk. In this region, we find several KI pairs, in particular, we observe large activity cliffs between afatinib and bosutinib as well as bosutinib and erlotinib. Here, all four compounds have the same chemical core (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5</xref>b); however, both afatinib and erlotinib show respectively lower CT risk scores compared to bosutinib. We hypothesized that harmonization of drug substructure, similarity, and promiscuity in the context of kinase inhibitor type may inform on our ability to predict CT risk (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5c</xref>).</p><p id="Par22">By investigating their KI target profiles, we observe that both afatinib and erlotinib are less promiscuous KIs compared to bosutinib (which is one of the most promiscuous KIs in this set, Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5d</xref>), and they both inhibit EGFR at nanomolar concentrations. On the other hand, less promiscuous KIs, such as lapatinib and gefitinib, exhibit a comparably lower CT risk score (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5e</xref>). Indeed, we observe a correlation between kinase inhibitor promiscuity and the observed CT risk score (Supplementary Fig.&#x000a0;<xref rid="MOESM1" ref-type="media">5</xref>). However, KI promiscuity may not be the sole determinant of CT risk. For example, KIs such as imatinib and nilotinib are not as promiscuous as bosutinib; however, both exhibit relatively high CT risk scores. In this case, both imatinib and nilotinib CT may be explained due to their similar chemical structure and high specificity for protein kinases such as DDR1 and ABL.</p><p id="Par23">Finally, kinase inhibitors have distinct binding modes against their targets<sup><xref ref-type="bibr" rid="CR6">6</xref>,<xref ref-type="bibr" rid="CR27">27</xref>,<xref ref-type="bibr" rid="CR28">28</xref></sup>. Kinase inhibitors that bind their kinase targets can be classified based on their binding mode, including the kinase conformation they bind and/or type of interactions they make with their kinase targets (e.g., covalent vs. noncovalent)<sup><xref ref-type="bibr" rid="CR6">6</xref>,<xref ref-type="bibr" rid="CR27">27</xref>,<xref ref-type="bibr" rid="CR29">29</xref></sup>. For example, type I inhibitors bind an active kinase conformation, while Type I1/2, II&#x02013;V bind distinct inactive states (Methods); type VI KI binds the kinase target covalently. We do not observe a clear relationship between kinase inhibitor-binding mode and CT. For example, the type II inhibitors imatinib and nilotinib are observed to have a high CT risk, while the type II inhibitors sorafenib and regorafenib have comparatively lower observed CT risk. However, both pairs of inhibitors are highly chemically similar and have similar binding targets. Taken together, the observed CT risk of a KI may be related to both a kinase inhibitor&#x02019;s selectivity and its chemical structure. Furthermore, we observe a relationship between chemical structure and binding target similarity to the predictive performance of our signature (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5e&#x02013;g</xref>).</p></sec></sec><sec id="Sec10" sec-type="discussion"><title>Discussion</title><p id="Par24">The occurrence of drug treatment-associated CT, leading to decreased cardiac function, follows the therapeutic effects of the drugs, and is only observed in a subset of the patients using the drug. This raises the question of whether it would be possible to obtain early cell-based signatures predictive for drug toxicity. Here we addressed this question by attempting associating drug treatment-induced gene-expression patterns with the clinical risk for the adverse events of interest.</p><p id="Par25">By estimating clinical risk from the FAERS database, our method utilizes a relevant and unbiased approach for the quantification of CT risk. As a result, our CT risk scores lack notable pitfalls such as selection bias associated with tightly controlled clinical trials, which underestimate adverse-event risks due to cohort size, trial duration, and selective inclusion criteria for subjects. Nevertheless, there are limitations to the FAERS database as well, which we have discussed and addressed in previous work<sup><xref ref-type="bibr" rid="CR22">22</xref></sup>. Specifically, use of the FAERS resource may confound demographics information such as age and sex, which was observed not to vary across different KIs. Moreover, CT risk score does not reflect absolute risk for developing CT. Rather, it reflects the relative risk for a subset of patients for which drug-associated adverse events were reported. In addition, there may be some systematic biases based on the sampling frequency of drugs by institution.</p><p id="Par26">It remains unclear if all KIs induce CT through similar mechanisms, and to what extent ultimate clinical pathologies are similar. While the FAERS database allows us to distinguish between different types of CT, the annotation is not uniform and may either refer to distinct pathophysiological descriptions or rather more general clinical presentations of heart failure. To this end, we chose to lump all forms of heart failure, while excluding cardiac AEs that have known and unrelated origin such as coronary artery disease and arrhythmias.</p><p id="Par27">We compared KI-associated transcriptomic response profiles generated from cultured human primary cardiomyocyte-like cells with clinical CT risk scores to obtain a reduced set of genes that may predict the relative risk for KI-associated CT. Using the clinically weighted signatures and the associated regression coefficients identified in the elastic net model, the relative risk for CT can be predicted. The risks predicted by our signatures and the associated regression model can be used in drug development to rank the potential risk of novel KIs with respect to existing KIs with better characterized clinical risks for CT.</p><p id="Par28">The signatures generally showed good prediction of CT risk during cross-validation as well as on an independent set of KIs (Fig.&#x000a0;<xref rid="Fig4" ref-type="fig">4</xref>), while the only poorly performant KI, ibrutinib, inhibitor of Bruton nonreceptor protein-tyrosine kinase, represents a unique KI in terms of binding mode (i.e., type VI inhibitor) and high promiscuity (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5</xref>). Specifically, it is a member of an emerging class of kinase inhibitor drugs that bind their targets covalently (type VI KIs). These drugs are highly underrepresented in the databases used in this analysis, explaining the misclassification of ibrutinib<sup><xref ref-type="bibr" rid="CR30">30</xref></sup>.</p><p id="Par29">The four cell lines we studied are insufficient to fully capture such human variability to KIs. Therefore, in our analysis, we used median fold-change gene-expression profiles across multiple cell lines. The resulting averaged gene-expression profiles thus reflect relatively consistent changes in gene expression across cell lines, i.e., changes in gene expression that are less likely to be highly variable across cell lines, yet may also reflect a set of predictors that may be more consistent in the population. Given that the FAERS CT risk scores also reflect a population-level CT risk, the use of these median values in fold-change gene-expression values is a reasonable starting point for our analyses.</p><p id="Par30">The experimental underpinning of the transcriptomic profiles generated in this study makes them likely to be of value in selecting drug candidates with a low risk for CT as an adverse event. Our analysis is based on primary human heart-derived cardiomyocyte-like cells. Although these cell lines do have phenotypic limitations due to dedifferentiation, the signatures obtained from the cells could be relevant for prediction of clinical drug effects. These cell lines may be reflective of human cardiac pharmacology, i.e., in comparison with animal-derived cardiomyocytes, even though further characterization and standardization are still needed. Detailed characterization of these cell lines is available as metadata to the RNAseq datasets at <ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>. Our analyses used drug exposures similar to clinically reported maximum plasma concentrations of the individual KIs, rather than using the same concentrations for all KIs, even though we did not correct for protein binding. We expect that the duration of 48-h exposure may reflect transcriptomic changes that are likely related to early changes in subcellular processes associated with the adverse event of interest.</p><p id="Par31">Unfortunately, in this&#x000a0;study, it is not feasible or ethically possible, due to lack of prior informed consent, to compare cardiac gene-expression signatures with gene-expression profiles from patients who receive KI-therapy and/or who developed KI-associated CT. We considered whether we could compare our gene-expression signatures to cardiac gene-expression data from patients with heart failure who undergo surgery. Typically these are patients with advanced disease, and the gene expression in tissue from advanced disease is not likely to be of relevance to acute drug-induced CT.</p><p id="Par32">By investigating the chemical structure and binding profile similarity of KIs, we are able to observe that chemical components and scaffolds that lead to promiscuous binding of KIs to multiple binding targets are correlated with higher CT values. This is consistent with the notion that a portion of CT risk of KIs can be attributed to higher levels of off-target interactions. Indeed, when we investigate the binding profile of three chemically similar KIs: afatinib, erlotinib, and gefitinib, we find that their binding profiles are fairly specific compared to other KIs, and they have a lower normalized CT risk score. One limitation we have observed with our approach is that chemically distinct KIs (e.g., in terms of binding profile, substructural similarity, and type), such as the type IV inhibitor ibrutinib, exhibit diminished predictive performance. However, we think that using the guidelines we provide herein, this signature could still assist in the development and prioritization of KIs with lower toxicity risks.</p><p id="Par33">We cautiously anticipate that clinically weighted transcriptomic signatures such as those developed in this study may be of relevance to guide safety assessment in early drug development. Unlike the relatively well-established assessment of electrophysiological safety issues such as QT prolongation, the assessment of non-QT type of CT associated with KI<sup><xref ref-type="bibr" rid="CR16">16</xref></sup> and other novel drugs<sup><xref ref-type="bibr" rid="CR31">31</xref></sup>, lacks reliable biomarkers. The transcriptomic signature for CT identified in this study may help fill this gap, especially if its structure and binding profiles are closely represented within the inhibitors in this study. One could anticipate that after initial selection of promising KIs with apparent efficacy in preclinical screens, transcriptomic profiling using the signatures developed here may possibly be used to rank drugs for the expected CT risk and exclude those with high CT risk scores (Supplementary Fig.&#x000a0;<xref rid="MOESM1" ref-type="media">6</xref>).</p><p id="Par34">While beyond the scope of this study, future extension of our studies could explore the idea of studying individualized risk scores for CT. That is, do baseline gene-expression profiles of larger libraries of patient-derived cardiomyocyte cell lines predict the difference in risk for CT between individual patients? Ideally, such an analysis would be conducted using induced pluripotent stem cell-derived cardiomyocytes from patients, who have received KIs and experienced different levels of CT, such as was recently described for anthracycline chemotherapeutics<sup><xref ref-type="bibr" rid="CR32">32</xref></sup>. This would then further enable the development of precision medicine approaches to KI therapy that could minimize the risk for CT.</p></sec><sec id="Sec11"><title>Methods</title><sec id="Sec12"><title>Cell culture and drug treatment</title><p id="Par35">Adult human cardiomyocytes (Cat #: C12810) were purchased from PromoCell GmbH (Heidelberg, Germany) and grown in culture as per the manufacturer&#x02019;s instructions. Four different cell line lots (Lot #: 3042901.2, 4031101.3, 2082801.2, and 2120301.2) isolated from two male and two female subjects were cultured under serum-free differentiation conditions for at least 28 days prior to drug treatment. Details regarding metadata information, including cell line metadata and the quality control and assurance metrics, can be found on <ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>.</p></sec><sec id="Sec13"><title>Dose&#x02013;response experiments</title><p id="Par36">For two of the four cell lines, dose&#x02013;response experiments were conducted treating cells for 48&#x02009;h with eight increasing perturbagen concentrations (5&#x02009;nM, 50&#x02009;nM, 100&#x02009;nM, 500&#x02009;nM, 1&#x02009;&#x000b5;M, 5&#x02009;&#x000b5;M, 10&#x02009;&#x000b5;M, and 100&#x02009;&#x000b5;M) and vehicle-treated control, in quadruplicates. We assayed for viability through image-based analysis of nuclear counts with Hoechst 33342 (Thermo Fisher, Cat #: H3570) and MitoTracker Red (Thermo Fisher, Cat #: M22426) for mitochondrial toxicity. Details of the experimental protocols for cell culture, drug treatment, and transcriptomics have been described as step-by-step standard operating procedures for the various experiments available on <ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>.</p></sec><sec id="Sec14"><title>Transcriptomics</title><p id="Par37">Cells were treated for 48&#x02009;h with a single perturbagen concentration around the maximal concentration (Supplementary Table&#x000a0;<xref rid="MOESM1" ref-type="media">1</xref>). After drug treatment, the cells were lysed, RNA was collected using TRIzol, and gene-expression profiles were measured using the 3&#x02032; digital gene-expression method<sup><xref ref-type="bibr" rid="CR33">33</xref>,<xref ref-type="bibr" rid="CR34">34</xref></sup>.</p></sec><sec id="Sec15"><title>Sequence alignment and processing of gene-expression data</title><p id="Par38">The raw sequences were demultiplexed. Combined standard RNAseq files were aligned to the reference human genome hg38 using the STAR software suite<sup><xref ref-type="bibr" rid="CR35">35</xref></sup>. The resulting alignment files were parsed to identify the fragments with acceptable alignment quality, to remove duplicate fragments, and to assign accepted fragments to the corresponding genes. The resulting read-count (i.e., transcript count) table was then subjected to correlation analysis at each treatment condition, to identify and remove outlier samples, determined by predefined thresholds. The gene read-count tables were then subjected to differential gene-expression analysis using the R package EdgeR<sup><xref ref-type="bibr" rid="CR36">36</xref></sup>. Details of these computational procedures are described elsewhere<sup><xref ref-type="bibr" rid="CR23">23</xref></sup>, and step-by-step protocols are available on <ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>. The resulting normalized and log-transformed fold-change gene- expression values for each sample are also deposited for public access to the DToxS data repository (<ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>).</p></sec><sec id="Sec16"><title>Processing and exploratory analysis of gene-expression data</title><p id="Par39">The median log-transformed gene-expression fold-change value was calculated across all cell lines for each individual KI. The resulting matrix of gene fold-change values by KIs was used for the regression analysis. To obtain insight into the general patterns present in this KI-perturbed transcriptomics dataset, we generated rankings of the top 500 genes for each drug, by their absolute mean fold-change value, i.e., whether positive or negative. For each of these KI-associated rankings, we determined the frequency of these changes being also present in the ranking of other drugs, e.g., the similarity in genes present in the top 250 gene lists for each KI. This was visualized using the Jaccard index, and by plotting the most highly drug-connected genes against the associated drugs. Principal component analysis for the first three principal components on the absolute mean fold-change values for each drug was performed to further assess similarity between drugs in their gene-expression values.</p></sec><sec id="Sec17"><title>Calculation of tissue cell line expression similarity</title><p id="Par40">Pairwise expression similarity scores were computed based on the Jaccard coefficient of a binary matrix based on RNA sequencing data from PromoCell cardiomyocyte exposures to kinase inhibitors. The top 500 genes for a KI were set as 1, while genes that were not in the top 500 were set as 0.</p></sec><sec id="Sec18"><title>Calculation of clinical risk RORs</title><p id="Par41">Adverse-event frequencies from the FDA Adverse Event Reporting System (FAERS) were obtained from the AERSmine resource<sup><xref ref-type="bibr" rid="CR37">37</xref></sup>, which contains a curated version of the FAERS database. ADRs in the FAERS database are organized according to MedDRA<sup><xref ref-type="bibr" rid="CR38">38</xref></sup>, which is a hierarchical ontology to classify ADRs from high-level organs associated with the pathology to reported low-level specific pathological conditions. We downloaded the frequencies of the occurrence of ADRs for all protein KIs available in FAERS, together with all other frequencies of ADRs reported for these KIs. A time-stamped record of this download to reproduce this analysis was retained. RORs were then computed for each KI using the frequency <italic>f</italic><sub><italic>dt</italic></sub> of the ADR of interest, the frequency <italic>f</italic><sub><italic>dn</italic></sub> of any other ADR occurring, the frequencies <italic>f</italic><sub>nt</sub> of occurrence of the ADR of interest for any other protein kinase inhibitor, and the frequency <italic>f</italic><sub><italic>nn</italic></sub> for all other ADRs and KIs. The ROR was calculated using Eq. (<xref rid="Equ1" ref-type="">1</xref>)<disp-formula id="Equ1"><label>1</label><alternatives><tex-math id="M1">\documentclass[12pt]{minimal}
+				\usepackage{amsmath}
+				\usepackage{wasysym} 
+				\usepackage{amsfonts} 
+				\usepackage{amssymb} 
+				\usepackage{amsbsy}
+				\usepackage{mathrsfs}
+				\usepackage{upgreek}
+				\setlength{\oddsidemargin}{-69pt}
+				\begin{document}$${\rm{ROR}} = \frac{{f_{dt}/f_{dn}}}{{f_{nt}/f_{nn}}},$$\end{document}</tex-math><mml:math id="M2"><mml:mi mathvariant="normal">ROR</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>/</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub><mml:mo>/</mml:mo><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>,</mml:mo></mml:math><graphic xlink:href="41467_2020_18396_Article_Equ1.gif" position="anchor"/></alternatives></disp-formula>whereas the standard error (SE) of the log ROR was calculated using Eq. (<xref rid="Equ2" ref-type="">2</xref>)<disp-formula id="Equ2"><label>2</label><alternatives><tex-math id="M3">\documentclass[12pt]{minimal}
+				\usepackage{amsmath}
+				\usepackage{wasysym} 
+				\usepackage{amsfonts} 
+				\usepackage{amssymb} 
+				\usepackage{amsbsy}
+				\usepackage{mathrsfs}
+				\usepackage{upgreek}
+				\setlength{\oddsidemargin}{-69pt}
+				\begin{document}$${\rm{SE}}_{{\rm{logROR}}} = \sqrt {\frac{1}{{f_{dt}}} + \frac{1}{{f_{dn}}} + \frac{1}{{f_{nt}}} + \frac{1}{{f_{nn}}}},$$\end{document}</tex-math><mml:math id="M4"><mml:msub><mml:mrow><mml:mi mathvariant="normal">SE</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">logROR</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:msqrt><mml:mrow><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac><mml:mo>+</mml:mo><mml:mfrac><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:msub><mml:mrow><mml:mi>f</mml:mi></mml:mrow><mml:mrow><mml:mi>n</mml:mi><mml:mi>n</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mfrac></mml:mrow></mml:msqrt><mml:mo>,</mml:mo></mml:math><graphic xlink:href="41467_2020_18396_Article_Equ2.gif" position="anchor"/></alternatives></disp-formula>with the log-transformed confidence interval (CI) being calculated as follows: CI&#x02009;=&#x02009;log(ROR)&#x02009;&#x000b1;&#x02009;1.96*SE<sup>logROR</sup>.</p><p id="Par42">Adverse events in FAERS are mapped to the MEDDRA dictionary<sup><xref ref-type="bibr" rid="CR38">38</xref></sup>. CT events related to heart failures and cardiomyopathies, excluding arrhythmogenic ADRs and coronary artery disorders, were selected from the main MEDDRA cardiac ADR group. The selected ADRs primarily reflected different stages of heart failure, which were grouped together.</p></sec><sec id="Sec19"><title>Elastic net regression analysis</title><p id="Par43">The FAERS-derived risk RORs for CT were regressed against the KI-associated vectors of mean fold-change values across the four cell lines. A two-step regression procedure was then used to select predictor genes reducing the sensitivity to changes in dataset composition. For this, we first generated 1000 bootstrap datasets with replacements for gene expression&#x02013;KI risk score pairs. Each of these bootstrap datasets was fit using an elastic net regression model (R version 3.4.3, package glmnet, version 2.0-16). The genes that were selected as predictors (i.e., nonzero regression coefficient) and the scaled values of the gene-associated coefficients were saved for each bootstrap dataset. Across all bootstrap datasets, the relative frequency of the selection of gene-based predictors, and the mean-scaled coefficient value was computed. We then calculated the product of the mean frequency and scaled coefficient value, rank predictors by their importance with respect to robustness (selection frequency). A large number of percentiles of these rankings were evaluated using leave-one-out cross-validation. The selection percentile (99.755%) resulting in optimal prediction errors (RMSE) was then used to select a subset of gene-based predictors, and the model that generated the final gene-expression signatures. The selected predictor genes were then ranked by their relative importance, and by their median fold-change values, and displayed as clustered heatmaps. We finally evaluated the predictive value of the resulting regression model to predict CT risk scores for the two left-out KIs.</p><p id="Par44">When using this approach to analyze similar datasets of cardiomyocyte transcriptomes together with risk scores, it is possible that potentially different genes are identified than those described in the current report. This difference associated with the intrinsic property of penalized regression approaches that select predictors from potentially highly correlated sets of predictor candidates. Hence, small changes in either risk scores or gene-expression datasets may affect correlation structures of the data and thereby the list of genes for a signature.</p></sec><sec id="Sec20"><title>Enrichment and network analyses</title><p id="Par45">Enrichment analysis was performed based on a one-tailed Fisher&#x02019;s exact test using R (package stats), in order to identify enrichment of specific genes in predefined gene lists. For enrichment of pathways and biological processes, we used the KEGG database (2016), and for enrichment of protein kinases, we used the KEA database (2015). Diseases were excluded from the KEGG list of processes (e.g., diabetes, depression, and cancer), in order to only evaluate general biological processes or pathways. We used the top 250 DEGs ranked by <italic>p</italic> value for each KI to perform enrichment analysis. Subsequently enriched term <italic>p</italic> values were correlated with CT risk scores to identify kinases and pathways associated with CT risk.</p><p id="Par46">The gene part of the signature for CT identified in the regression analysis was used as seed note to perform a protein&#x02013;protein interaction network (PPI) analysis, conducted using the web application X2K<sup><xref ref-type="bibr" rid="CR39">39</xref></sup>, which aims to identify associated kinases and transcription factors based on multiple PPI databases.</p></sec><sec id="Sec21"><title>Calculation of chemical similarity</title><p id="Par47">RDkit (<ext-link ext-link-type="uri" xlink:href="https://www.rdkit.org">www.rdkit.org</ext-link>)<sup><xref ref-type="bibr" rid="CR40">40</xref></sup> was used to generate chemical fingerprints and compute pairwise Tanimoto coefficients (Tc) between the 26 tested kinase inhibitors. For each pair of inhibitors, we first calculated the Tc using four chemical fingerprints, including Morgan_2 2,048-bit (ECFP4)<sup><xref ref-type="bibr" rid="CR41">41</xref></sup>, Morgan_1 2,048-bit (ECFP2)<sup><xref ref-type="bibr" rid="CR41">41</xref></sup>, Daylight-like<sup><xref ref-type="bibr" rid="CR42">42</xref></sup>, and MACCS<sup><xref ref-type="bibr" rid="CR43">43</xref></sup>. Because each of these fingerprints capture distinct chemical properties, we computed a weighted Tc average of the three fingerprints: 30% ECFP4, 30% ECFP2, 30% Daylight-like, and 10% MACCS, which exhibited the most optimal spread of the distribution of the pairwise distances. To generate the SAS maps (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5a</xref>), we plotted the pairwise-weighted Tc values with their difference in CT scores (DCT). Finally, 0.35 was set as the threshold for chemical similarity, while half of the maximum difference was set as the threshold for DCS. Chemical structures were drawn using Marvin (<ext-link ext-link-type="uri" xlink:href="https://www.chemaxon.com">www.chemaxon.com</ext-link>)<sup><xref ref-type="bibr" rid="CR44">44</xref></sup> based on SMILES strings obtained from PubChem.</p></sec><sec id="Sec22"><title>Calculation of KI-binding target similarity</title><p id="Par48">Kinome-wide kinase inhibitor-binding (K<sub>d</sub>) profiling data were obtained from Klaegar et al.<sup><xref ref-type="bibr" rid="CR5">5</xref></sup>, which consisted of kinome-binding (Kd) profiling data for all of the tested kinase inhibitors across 242 kinases. A heatmap was generated for selected kinase inhibitors based on the negative log of the K<sub>d</sub> values from Klaegar et al. (Fig.&#x000a0;<xref rid="Fig5" ref-type="fig">5c</xref>)<sup><xref ref-type="bibr" rid="CR5">5</xref></sup>. Notably, the <italic>K</italic><sub>d</sub> values were scaled by 100,000 to avoid negative log values.<table-wrap id="Tab1"><label>Table 1</label><caption><p>Overview of KIs included in this analysis.</p></caption><table frame="hsides" rules="groups"><thead><tr><th>Drug</th><th>Three-letter code</th><th>Approval year<sup>a</sup></th><th>Therapeutic targets</th><th>Concentration (&#x000b5;M)<sup>b</sup></th></tr></thead><tbody><tr><td>Afatinib</td><td>AFA</td><td>2013</td><td>ErbB2 and EGFR</td><td>0.05</td></tr><tr><td>Axitinib</td><td>AXI</td><td>2012</td><td>VEGFR1/VEGFR2/VEGFR3/PDGFRB/c-KIT</td><td>0.2</td></tr><tr><td>Bosutinib</td><td>BOS</td><td>2012</td><td>Bcr-Abl and SRC</td><td>0.1</td></tr><tr><td>Cabozantinib</td><td>CAB</td><td>2012</td><td>c-Met and VEGFR2</td><td>2</td></tr><tr><td>Ceritinib</td><td>CER</td><td>2014</td><td>ALK</td><td>1</td></tr><tr><td>Crizotinib</td><td>CRI</td><td>2011</td><td>ALK and HGFR</td><td>0.25</td></tr><tr><td>Dabrafenib</td><td>DAB</td><td>2013</td><td>BRAF</td><td>2.5</td></tr><tr><td>Dasatinib</td><td>DAS</td><td>2006</td><td>ABL, ARG, KIT, PDGFR&#x003b1;/&#x003b2;, and SRC</td><td>0.1</td></tr><tr><td>Erlotinib</td><td>ERL</td><td>2004</td><td>ErbB1</td><td>3</td></tr><tr><td>Gefitinib</td><td>GEF</td><td>2003</td><td>ErbB1</td><td>1</td></tr><tr><td>Imatinib</td><td>IMA</td><td>2001</td><td>Bcr-Abl</td><td>5</td></tr><tr><td>Lapatinib</td><td>LAP</td><td>2007</td><td>ErbB1</td><td>2</td></tr><tr><td>Nilotinib</td><td>NIL</td><td>2007</td><td>Bcr-Abl</td><td>3</td></tr><tr><td>Pazopanib</td><td>PAZ</td><td>2009</td><td>VEGFR2, PDGFR&#x003b1;/&#x003b2;, and KIT</td><td>10</td></tr><tr><td>Ponatinib</td><td>PON</td><td>2012</td><td>Bcr-Abl, BEGFR, PDGFR, FGFR, EPH, SRC, c-KIT, RET, TIE2, and FLT3</td><td>0.1</td></tr><tr><td>Regorafenib</td><td>REG</td><td>2012</td><td>RET, VEGFR, and PDGFR</td><td>1</td></tr><tr><td>Ruxolitinib</td><td>RUX</td><td>2011</td><td>JAK</td><td>1</td></tr><tr><td>Sorafenib</td><td>SOR</td><td>2005</td><td>BRAF, VEGFRs, PDGFR&#x003b1;/&#x003b2;, FLT3, and KIT</td><td>0.5</td></tr><tr><td>Sunitinib</td><td>SUN</td><td>2006</td><td>VEGFR, PDGFR, CSF1R, FLT3, and KIT</td><td>1</td></tr><tr><td>Trametinib</td><td>TRA</td><td>2013</td><td>MEK1 and MEK2</td><td>0.1</td></tr><tr><td>Tofacitinib</td><td>TOF</td><td>2012</td><td>JAK</td><td>1</td></tr><tr><td>Vandetanib</td><td>VAN</td><td>2011</td><td>RET, VEGFR, and EGFR</td><td>0.33</td></tr><tr><td>Vemurafenib</td><td>VEM</td><td>2011</td><td>BRAF</td><td>2</td></tr></tbody></table><table-wrap-foot><p><sup>a</sup>US approval date, first indication.</p><p><sup>b</sup>Derived from maximum total (bound&#x02009;&#x02009;+&#x02009;&#x02009;free) plasma concentrations in humans as reported in the literature.</p><p>Table <xref rid="MOESM1" ref-type="media">S3</xref> lists the purity and literature references to clinical concentrations.</p></table-wrap-foot></table-wrap></p></sec><sec id="Sec23"><title>Reporting summary</title><p id="Par49">Further information on research design is available in the&#x000a0;<xref rid="MOESM3" ref-type="media">Nature Research Reporting Summary</xref> linked to this article.</p></sec></sec><sec sec-type="supplementary-material"><title>Supplementary information</title><sec id="Sec24"><p>
+<supplementary-material content-type="local-data" id="MOESM1"><media xlink:href="41467_2020_18396_MOESM1_ESM.pdf"><caption><p>Supplementary Information</p></caption></media></supplementary-material>
+<supplementary-material content-type="local-data" id="MOESM2"><media xlink:href="41467_2020_18396_MOESM2_ESM.pdf"><caption><p>Peer Review File</p></caption></media></supplementary-material>
+<supplementary-material content-type="local-data" id="MOESM3"><media xlink:href="41467_2020_18396_MOESM3_ESM.pdf"><caption><p>Reporting Summary</p></caption></media></supplementary-material>
+</p></sec></sec></body><back><app-group><app id="App1"><sec id="Sec25"><title>Source data</title><p id="Par52">
+<media position="anchor" xlink:href="41467_2020_18396_MOESM4_ESM.xlsx" id="MOESM4"><caption><p>Source Data</p></caption></media>
+</p></sec></app></app-group><fn-group><fn><p><bold>Peer review information</bold>
+<italic>Nature Communications</italic> thanks the anonymous reviewers for their contribution to the peer review of this work. Peer reviewer reports are available.</p></fn><fn><p><bold>Publisher&#x02019;s note</bold> Springer Nature remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.</p></fn><fn><p>These authors contributed equally: J. G. Coen van Hasselt, Rayees Rahman.</p></fn><fn><p>These authors jointly supervised this work: Avner Schlessinger, Evren U. Azeloglu, Ravi Iyengar.</p></fn></fn-group><sec><title>Supplementary information</title><p>Supplementary information is available for this paper at 10.1038/s41467-020-18396-7.</p></sec><ack><title>Acknowledgements</title><p>This project was supported in part by the NIH LINCS center grant (U54 HG008098) and the Systems Biology Center grant (P50 GM071558). J.G.C.H. received funding from the European Union MSCA program (Project ID 661588). This work was partially carried out using the Dutch national e-infrastructure with the support of SURF Foundation.</p></ack><notes notes-type="author-contribution"><title>Author contributions</title><p>J.G.C.H. and R.R. performed the data analysis; J.G.C.H., R.R., J.H., M.R.B.,&#x000a0;E.S.,&#x000a0;A. Sc., E.U.A., and R.I. wrote the paper; Y.X. performed RNAseq data processing; A.P. and J.M.G. performed the mass spectrometry drug purity analyses; A.St., B.H., G.J., and J.V.S. performed the cell culture, drug perturbation, and the RNA isolation; E.U.A. supervised the experimental efforts; E.U.A. and J.M.G. determined the experimental drug concentrations and purity; M.M. supervised the RNA sequencing; J.G. supervised the quality assurance and assay reproducibility; A.Sc. supervised the cheminformatics analysis;&#x000a0;R.I. conceived the project; all authors reviewed the paper.</p></notes><notes notes-type="data-availability"><title>Data availability</title><p>All processed RNAseq data and the curated version-controlled standard operating procedures featured in this study can be downloaded freely at (<ext-link ext-link-type="uri" xlink:href="https://www.dtoxs.org">www.dtoxs.org</ext-link>)<sup><xref ref-type="bibr" rid="CR22">22</xref></sup> or the LINCS Data Portal (<ext-link ext-link-type="uri" xlink:href="http://lincsportal.ccs.miami.edu/dcic-portal/">http://lincsportal.ccs.miami.edu/dcic-portal/</ext-link>). Raw transcriptomics data can be accessed through the Gene Expression Omnibus (GEO) repository with accession numbers GSE146096 and GSE146097. Source data for each figure are provided with this paper. All remaining data will be available from the corresponding author upon reasonable request.&#x000a0;Source data are provided with this paper.</p></notes><notes notes-type="data-availability"><title>Code availability</title><p>All scripts are open-source and available from the DToxS GitHub repository (<ext-link ext-link-type="uri" xlink:href="https://github.com/dtoxs">https://github.com/dtoxs</ext-link>).</p></notes><notes id="FPar1" notes-type="COI-statement"><title>Competing interests</title><p id="Par50">R.R. and A.S. are co-founders of Aichemy Inc. The remaining authors declare no competing interests.</p></notes><ref-list id="Bib1"><title>References</title><ref id="CR1"><label>1.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Cohen</surname><given-names>P</given-names></name></person-group><article-title>The role of protein phosphorylation in human health and disease: delivered on June 30th 2001 at the FEBS meeting in Lisbon</article-title><source>Eur. J. Biochem.</source><year>2001</year><volume>268</volume><fpage>5001</fpage><lpage>5010</lpage><pub-id pub-id-type="doi">10.1046/j.0014-2956.2001.02473.x</pub-id><pub-id pub-id-type="pmid">11589691</pub-id></element-citation></ref><ref id="CR2"><label>2.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Giamas</surname><given-names>G</given-names></name><etal/></person-group><article-title>Kinases as targets in the treatment of solid tumors</article-title><source>Cell. Signal.</source><year>2010</year><volume>22</volume><fpage>984</fpage><lpage>1002</lpage><pub-id pub-id-type="doi">10.1016/j.cellsig.2010.01.011</pub-id><pub-id pub-id-type="pmid">20096351</pub-id></element-citation></ref><ref id="CR3"><label>3.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Knapp</surname><given-names>S</given-names></name><name><surname>Sundstr&#x000f6;m</surname><given-names>M</given-names></name></person-group><article-title>Recently targeted kinases and their inhibitors-the path to clinical trials</article-title><source>Curr. Opin. Pharmacol.</source><year>2014</year><volume>17C</volume><fpage>58</fpage><lpage>63</lpage><pub-id pub-id-type="doi">10.1016/j.coph.2014.07.015</pub-id></element-citation></ref><ref id="CR4"><label>4.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Fabbro</surname><given-names>D</given-names></name><name><surname>Cowan-Jacob</surname><given-names>SW</given-names></name><name><surname>M&#x000f6;bitz</surname><given-names>H</given-names></name><name><surname>Martiny-Baron</surname><given-names>G</given-names></name></person-group><article-title>Targeting cancer with small-molecular-weight kinase inhibitors</article-title><source>Methods Mol. Biol.</source><year>2012</year><volume>795</volume><fpage>1</fpage><lpage>34</lpage><pub-id pub-id-type="doi">10.1007/978-1-61779-337-0_1</pub-id><pub-id pub-id-type="pmid">21960212</pub-id></element-citation></ref><ref id="CR5"><label>5.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Klaeger</surname><given-names>S</given-names></name><etal/></person-group><article-title>The target landscape of clinical kinase drugs</article-title><source>Science</source><year>2017</year><volume>358</volume><fpage>eaan4368</fpage><pub-id pub-id-type="doi">10.1126/science.aan4368</pub-id><pub-id pub-id-type="pmid">29191878</pub-id></element-citation></ref><ref id="CR6"><label>6.</label><mixed-citation publication-type="other">Roskoski, R. Properties of FDA-approved small molecule protein kinase inhibitors. <italic>Pharmacol. Res.</italic>10.1016/j.phrs.2019.03.006 (2019).</mixed-citation></ref><ref id="CR7"><label>7.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Force</surname><given-names>T</given-names></name><name><surname>Kolaja</surname><given-names>KL</given-names></name></person-group><article-title>Cardiotoxicity of kinase inhibitors: the prediction and translation of preclinical models to clinical outcomes</article-title><source>Nat. Rev. Drug Discov.</source><year>2011</year><volume>10</volume><fpage>111</fpage><lpage>26</lpage><pub-id pub-id-type="doi">10.1038/nrd3252</pub-id><pub-id pub-id-type="pmid">21283106</pub-id></element-citation></ref><ref id="CR8"><label>8.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Chu</surname><given-names>TF</given-names></name><etal/></person-group><article-title>Cardiotoxicity associated with tyrosine kinase inhibitor sunitinib</article-title><source>Lancet</source><year>2007</year><volume>370</volume><fpage>2011</fpage><lpage>2019</lpage><pub-id pub-id-type="doi">10.1016/S0140-6736(07)61865-0</pub-id><pub-id pub-id-type="pmid">18083403</pub-id></element-citation></ref><ref id="CR9"><label>9.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Orphanos</surname><given-names>GS</given-names></name><name><surname>Ioannidis</surname><given-names>GN</given-names></name><name><surname>Ardavanis</surname><given-names>AG</given-names></name></person-group><article-title>Cardiotoxicity induced by tyrosine kinase inhibitors</article-title><source>Acta Oncol.</source><year>2009</year><volume>48</volume><fpage>964</fpage><lpage>970</lpage><pub-id pub-id-type="doi">10.1080/02841860903229124</pub-id><pub-id pub-id-type="pmid">19734999</pub-id></element-citation></ref><ref id="CR10"><label>10.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Moslehi</surname><given-names>JJ</given-names></name></person-group><article-title>Cardiovascular toxic effects of targeted cancer therapies</article-title><source>N. Engl. J. Med.</source><year>2016</year><volume>375</volume><fpage>1457</fpage><lpage>1467</lpage><pub-id pub-id-type="doi">10.1056/NEJMra1100265</pub-id><pub-id pub-id-type="pmid">27732808</pub-id></element-citation></ref><ref id="CR11"><label>11.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Force</surname><given-names>T</given-names></name><name><surname>Kerkel&#x000e4;</surname><given-names>R</given-names></name></person-group><article-title>Cardiotoxicity of the new cancer therapeutics&#x02014;mechanisms of, and approaches to, the problem</article-title><source>Drug Discov. Today</source><year>2008</year><volume>13</volume><fpage>778</fpage><lpage>84</lpage><pub-id pub-id-type="doi">10.1016/j.drudis.2008.05.011</pub-id><pub-id pub-id-type="pmid">18617014</pub-id></element-citation></ref><ref id="CR12"><label>12.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Davis</surname><given-names>MI</given-names></name><etal/></person-group><article-title>Comprehensive analysis of kinase inhibitor selectivity</article-title><source>Nat. Biotechnol.</source><year>2011</year><volume>29</volume><fpage>1046</fpage><lpage>51</lpage><pub-id pub-id-type="doi">10.1038/nbt.1990</pub-id><pub-id pub-id-type="pmid">22037378</pub-id></element-citation></ref><ref id="CR13"><label>13.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Elkins</surname><given-names>JM</given-names></name><etal/></person-group><article-title>Comprehensive characterization of the Published Kinase Inhibitor Set</article-title><source>Nat. Biotechnol.</source><year>2016</year><volume>34</volume><fpage>95</fpage><lpage>103</lpage><pub-id pub-id-type="doi">10.1038/nbt.3374</pub-id><pub-id pub-id-type="pmid">26501955</pub-id></element-citation></ref><ref id="CR14"><label>14.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Hasinoff</surname><given-names>BB</given-names></name><name><surname>Patel</surname><given-names>D</given-names></name></person-group><article-title>The lack of target specificity of small molecule anticancer kinase inhibitors is correlated with their ability to damage myocytes in vitro</article-title><source>Toxicol. Appl. Pharmacol.</source><year>2010</year><volume>249</volume><fpage>132</fpage><lpage>139</lpage><pub-id pub-id-type="doi">10.1016/j.taap.2010.08.026</pub-id><pub-id pub-id-type="pmid">20832415</pub-id></element-citation></ref><ref id="CR15"><label>15.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Will</surname><given-names>Y</given-names></name><etal/></person-group><article-title>Effect of the multitargeted tyrosine kinase inhibitors imatinib, dasatinib, sunitinib, and sorafenib on mitochondrial function in isolated rat heart mitochondria and H9c2 cells</article-title><source>Toxicol. Sci.</source><year>2008</year><volume>106</volume><fpage>153</fpage><lpage>161</lpage><pub-id pub-id-type="doi">10.1093/toxsci/kfn157</pub-id><pub-id pub-id-type="pmid">18664550</pub-id></element-citation></ref><ref id="CR16"><label>16.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kerkel&#x000e4;</surname><given-names>R</given-names></name><etal/></person-group><article-title>Cardiotoxicity of the cancer therapeutic agent imatinib mesylate</article-title><source>Nat. Med.</source><year>2006</year><volume>12</volume><fpage>908</fpage><lpage>916</lpage><pub-id pub-id-type="doi">10.1038/nm1446</pub-id><pub-id pub-id-type="pmid">16862153</pub-id></element-citation></ref><ref id="CR17"><label>17.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Doherty</surname><given-names>KR</given-names></name><etal/></person-group><article-title>Multi-parameter in vitro toxicity testing of crizotinib, sunitinib, erlotinib, and nilotinib in human cardiomyocytes</article-title><source>Toxicol. Appl. Pharmacol.</source><year>2013</year><volume>272</volume><fpage>245</fpage><lpage>55</lpage><pub-id pub-id-type="doi">10.1016/j.taap.2013.04.027</pub-id><pub-id pub-id-type="pmid">23707608</pub-id></element-citation></ref><ref id="CR18"><label>18.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Force</surname><given-names>T</given-names></name><name><surname>Krause</surname><given-names>DS</given-names></name><name><surname>Van Etten</surname><given-names>RA</given-names></name></person-group><article-title>Molecular mechanisms of cardiotoxicity of tyrosine kinase inhibition</article-title><source>Nat. Rev. Cancer</source><year>2007</year><volume>7</volume><fpage>332</fpage><lpage>344</lpage><pub-id pub-id-type="doi">10.1038/nrc2106</pub-id><pub-id pub-id-type="pmid">17457301</pub-id></element-citation></ref><ref id="CR19"><label>19.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Bai</surname><given-names>JPF</given-names></name><name><surname>Abernethy</surname><given-names>DR</given-names></name></person-group><article-title>Systems pharmacology to predict drug toxicity: integration across levels of biological organization</article-title><source>Annu. Rev. Pharmacol. Toxicol.</source><year>2013</year><volume>53</volume><fpage>451</fpage><lpage>73</lpage><pub-id pub-id-type="doi">10.1146/annurev-pharmtox-011112-140248</pub-id><pub-id pub-id-type="pmid">23140241</pub-id></element-citation></ref><ref id="CR20"><label>20.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Berger</surname><given-names>SI</given-names></name><name><surname>Iyengar</surname><given-names>R</given-names></name></person-group><article-title>Role of systems pharmacology in understanding drug adverse events</article-title><source>Wiley Interdiscip. Rev.</source><year>2011</year><volume>3</volume><fpage>129</fpage><lpage>135</lpage></element-citation></ref><ref id="CR21"><label>21.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Berger</surname><given-names>SI</given-names></name><name><surname>Ma&#x02019;ayan</surname><given-names>A</given-names></name><name><surname>Iyengar</surname><given-names>R</given-names></name></person-group><article-title>Systems pharmacology of arrhythmias</article-title><source>Sci. Signal.</source><year>2010</year><volume>3</volume><fpage>ra30</fpage><pub-id pub-id-type="pmid">20407125</pub-id></element-citation></ref><ref id="CR22"><label>22.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhao</surname><given-names>S</given-names></name><etal/></person-group><article-title>Systems pharmacology of adverse event mitigation by drug combinations</article-title><source>Sci. Transl. Med.</source><year>2013</year><volume>5</volume><fpage>206ra140</fpage><pub-id pub-id-type="doi">10.1126/scitranslmed.3006548</pub-id></element-citation></ref><ref id="CR23"><label>23.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Xiong</surname><given-names>Y</given-names></name><etal/></person-group><article-title>A comparison of mRNA sequencing with random primed and 3&#x02032;-directed libraries</article-title><source>Sci. Rep.</source><year>2017</year><volume>7</volume><fpage>14626</fpage><pub-id pub-id-type="doi">10.1038/s41598-017-14892-x</pub-id><pub-id pub-id-type="pmid">29116112</pub-id></element-citation></ref><ref id="CR24"><label>24.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Lonsdale</surname><given-names>J</given-names></name><etal/></person-group><article-title>The genotype-tissue expression (GTEx) project</article-title><source>Nat. Genet.</source><year>2013</year><volume>45</volume><fpage>580</fpage><lpage>585</lpage><pub-id pub-id-type="doi">10.1038/ng.2653</pub-id><pub-id pub-id-type="pmid">23715323</pub-id></element-citation></ref><ref id="CR25"><label>25.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zou</surname><given-names>H</given-names></name><name><surname>Hastie</surname><given-names>T</given-names></name></person-group><article-title>Regularization and variable selection via the elastic net.</article-title><source>Journal of the Royal Statistical Society</source><year>2005</year><volume>67</volume><fpage>301</fpage><lpage>320</lpage><pub-id pub-id-type="doi">10.1111/j.1467-9868.2005.00503.x</pub-id></element-citation></ref><ref id="CR26"><label>26.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Giulianotti</surname><given-names>MA</given-names></name><name><surname>Welmaker</surname><given-names>GS</given-names></name><name><surname>Houghten</surname><given-names>RA</given-names></name></person-group><article-title>Shifting from the single to the multitarget paradigm in drug discovery</article-title><source>Drug Discov. Today</source><year>2013</year><volume>18</volume><fpage>495</fpage><lpage>501</lpage><pub-id pub-id-type="doi">10.1016/j.drudis.2013.01.008</pub-id><pub-id pub-id-type="pmid">23340113</pub-id></element-citation></ref><ref id="CR27"><label>27.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Ung</surname><given-names>P. M.-U.</given-names></name><name><surname>Rahman</surname><given-names>R</given-names></name><name><surname>Schlessinger</surname><given-names>A</given-names></name></person-group><article-title>Redefining the protein kinase conformational space with machine learning</article-title><source>Cell Chem. Biol.</source><year>2018</year><volume>25</volume><fpage>916</fpage><lpage>924.e2</lpage><pub-id pub-id-type="doi">10.1016/j.chembiol.2018.05.002</pub-id><pub-id pub-id-type="pmid">29861272</pub-id></element-citation></ref><ref id="CR28"><label>28.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rahman</surname><given-names>R</given-names></name><name><surname>Ung</surname><given-names>PM-U</given-names></name><name><surname>Schlessinger</surname><given-names>A</given-names></name></person-group><article-title>KinaMetrix: a web resource to investigate kinase conformations and inhibitor space</article-title><source>Nucleic Acids Res.</source><year>2019</year><volume>47</volume><fpage>D361</fpage><lpage>D366</lpage><pub-id pub-id-type="doi">10.1093/nar/gky916</pub-id><pub-id pub-id-type="pmid">30321373</pub-id></element-citation></ref><ref id="CR29"><label>29.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dar</surname><given-names>AC</given-names></name><name><surname>Shokat</surname><given-names>KM</given-names></name></person-group><article-title>The evolution of protein kinase inhibitors from antagonists to agonists of cellular signaling</article-title><source>Annu. Rev. Biochem.</source><year>2011</year><volume>80</volume><fpage>769</fpage><lpage>795</lpage><pub-id pub-id-type="doi">10.1146/annurev-biochem-090308-173656</pub-id><pub-id pub-id-type="pmid">21548788</pub-id></element-citation></ref><ref id="CR30"><label>30.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Zhang</surname><given-names>T</given-names></name><name><surname>Hatcher</surname><given-names>JM</given-names></name><name><surname>Teng</surname><given-names>M</given-names></name><name><surname>Gray</surname><given-names>NS</given-names></name><name><surname>Kostic</surname><given-names>M</given-names></name></person-group><article-title>Recent advances in selective and irreversible covalent ligand development and validation</article-title><source>Cell Chem. Biol.</source><year>2019</year><volume>26</volume><fpage>1486</fpage><lpage>1500</lpage><pub-id pub-id-type="doi">10.1016/j.chembiol.2019.09.012</pub-id><pub-id pub-id-type="pmid">31631011</pub-id></element-citation></ref><ref id="CR31"><label>31.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Schnell</surname><given-names>D</given-names></name><etal/></person-group><article-title>Pharmacokinetics of afatinib in subjects with mild or moderate hepatic impairment</article-title><source>Cancer Chemother. Pharm.</source><year>2014</year><volume>74</volume><fpage>267</fpage><lpage>275</lpage><pub-id pub-id-type="doi">10.1007/s00280-014-2484-y</pub-id></element-citation></ref><ref id="CR32"><label>32.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Burridge</surname><given-names>PW</given-names></name><etal/></person-group><article-title>Human induced pluripotent stem cell-derived cardiomyocytes recapitulate the predilection of breast cancer patients to doxorubicin-induced cardiotoxicity</article-title><source>Nat. Med.</source><year>2016</year><volume>22</volume><fpage>547</fpage><lpage>56</lpage><pub-id pub-id-type="doi">10.1038/nm.4087</pub-id><pub-id pub-id-type="pmid">27089514</pub-id></element-citation></ref><ref id="CR33"><label>33.</label><mixed-citation publication-type="other">Soumillon, M., Cacchiarelli, D., Semrau, S., van Oudenaarden, A. &#x00026; Mikkelsen, T. S. Characterization of directed differentiation by high-throughput single-cell RNA-Seq. Preprint at <ext-link ext-link-type="uri" xlink:href="https://www.biorxiv.org/content/10.1101/003236v1">https://www.biorxiv.org/content/10.1101/003236v1</ext-link> (2014).</mixed-citation></ref><ref id="CR34"><label>34.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Kivioja</surname><given-names>T</given-names></name><etal/></person-group><article-title>Counting absolute numbers of molecules using unique molecular identifiers</article-title><source>Nat. Methods</source><year>2011</year><volume>9</volume><fpage>72</fpage><lpage>74</lpage><pub-id pub-id-type="doi">10.1038/nmeth.1778</pub-id><pub-id pub-id-type="pmid">22101854</pub-id></element-citation></ref><ref id="CR35"><label>35.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Dobin</surname><given-names>A</given-names></name><etal/></person-group><article-title>STAR: ultrafast universal RNA-seq aligner</article-title><source>Bioinformatics</source><year>2013</year><volume>29</volume><fpage>15</fpage><lpage>21</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/bts635</pub-id><pub-id pub-id-type="pmid">23104886</pub-id></element-citation></ref><ref id="CR36"><label>36.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Robinson</surname><given-names>MD</given-names></name><name><surname>McCarthy</surname><given-names>DJ</given-names></name><name><surname>Smyth</surname><given-names>GK</given-names></name></person-group><article-title>edgeR: a Bioconductor package for differential expression analysis of digital gene expression data</article-title><source>Bioinformatics</source><year>2010</year><volume>26</volume><fpage>139</fpage><lpage>40</lpage><pub-id pub-id-type="doi">10.1093/bioinformatics/btp616</pub-id><pub-id pub-id-type="pmid">19910308</pub-id></element-citation></ref><ref id="CR37"><label>37.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Sarangdhar</surname><given-names>M</given-names></name><etal/></person-group><article-title>Data mining differential clinical outcomes associated with drug regimens using adverse event reporting data</article-title><source>Nat. Biotechnol.</source><year>2016</year><volume>34</volume><fpage>697</fpage><lpage>700</lpage><pub-id pub-id-type="doi">10.1038/nbt.3623</pub-id><pub-id pub-id-type="pmid">27404875</pub-id></element-citation></ref><ref id="CR38"><label>38.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Brown</surname><given-names>EG</given-names></name><name><surname>Wood</surname><given-names>L</given-names></name><name><surname>Wood</surname><given-names>S</given-names></name></person-group><article-title>The Medical Dictionary for Regulatory Activities (MedDRA)</article-title><source>Drug Saf.</source><year>1999</year><volume>20</volume><fpage>109</fpage><lpage>117</lpage><pub-id pub-id-type="doi">10.2165/00002018-199920020-00002</pub-id><pub-id pub-id-type="pmid">10082069</pub-id></element-citation></ref><ref id="CR39"><label>39.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Clarke</surname><given-names>DJB</given-names></name><etal/></person-group><article-title>EXpression2Kinases (X2K) Web: linking expression signatures to upstream cell signaling networks</article-title><source>Nucleic Acids Res.</source><year>2018</year><volume>46</volume><fpage>W171</fpage><lpage>W179</lpage><pub-id pub-id-type="doi">10.1093/nar/gky458</pub-id><pub-id pub-id-type="pmid">29800326</pub-id></element-citation></ref><ref id="CR40"><label>40.</label><mixed-citation publication-type="other">RDKit. <ext-link ext-link-type="uri" xlink:href="http://www.rdkit.org/">http://www.rdkit.org/</ext-link>.</mixed-citation></ref><ref id="CR41"><label>41.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Rogers</surname><given-names>D</given-names></name><name><surname>Hahn</surname><given-names>M</given-names></name></person-group><article-title>Extended-connectivity fingerprints</article-title><source>J. Chem. Inf. Model.</source><year>2010</year><volume>50</volume><fpage>742</fpage><lpage>754</lpage><pub-id pub-id-type="doi">10.1021/ci100050t</pub-id><pub-id pub-id-type="pmid">20426451</pub-id></element-citation></ref><ref id="CR42"><label>42.</label><mixed-citation publication-type="other">Daylight. <ext-link ext-link-type="uri" xlink:href="https://www.daylight.com/">https://www.daylight.com/</ext-link>.</mixed-citation></ref><ref id="CR43"><label>43.</label><element-citation publication-type="journal"><person-group person-group-type="author"><name><surname>Durant</surname><given-names>JL</given-names></name><name><surname>Leland</surname><given-names>BA</given-names></name><name><surname>Henry</surname><given-names>DR</given-names></name><name><surname>Nourse</surname><given-names>JG</given-names></name></person-group><article-title>Reoptimization of MDL keys for use in drug discovery</article-title><source>J. Chem. Inf. Comput. Sci.</source><year>2002</year><volume>42</volume><fpage>1273</fpage><lpage>1280</lpage><pub-id pub-id-type="doi">10.1021/ci010132r</pub-id><pub-id pub-id-type="pmid">12444722</pub-id></element-citation></ref><ref id="CR44"><label>44.</label><mixed-citation publication-type="other">ChemAxon - Software Solutions and Services for Chemistry &#x00026; Biology. <ext-link ext-link-type="uri" xlink:href="https://chemaxon.com/">https://chemaxon.com/</ext-link>.</mixed-citation></ref></ref-list></back></article>
diff --git a/jcore-pmc-reader/LICENSE b/jcore-pmc-reader/LICENSE
index fbbd41e05..d0f946a29 100644
--- a/jcore-pmc-reader/LICENSE
+++ b/jcore-pmc-reader/LICENSE
@@ -1,6 +1,6 @@
 BSD 2-Clause License
 
-Copyright (c) 2017, JULIE Lab
+Copyright (c) 2022, JULIE Lab
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index d3b402b36..5eedd46fa 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -7,6 +7,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.URI;
 import java.util.Iterator;
 
@@ -22,6 +23,10 @@ public CasPopulator(Iterator<URI> nxmlIterator, Boolean omitBibReferences) throw
         nxmlDocumentParser.loadElementPropertyFile(settings);
     }
 
+    public CasPopulator(Boolean omitBibReferences) throws IOException {
+        this(null, omitBibReferences);
+    }
+
     public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, NoDataAvailableException {
         ElementParsingResult result = null;
         URI currentUri = nxmlUri;
@@ -44,6 +49,18 @@ public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, N
         cas.setDocumentText(sb.toString());
     }
 
+    public void populateCas(InputStream is, JCas cas) throws ElementParsingException, NoDataAvailableException {
+        ElementParsingResult result;
+        try {
+            nxmlDocumentParser.reset(is, cas);
+            result = nxmlDocumentParser.parse();
+        } catch (DocumentParsingException e) {
+            throw new NoDataAvailableException(e);
+        }
+        StringBuilder sb = populateCas(result, new StringBuilder());
+        cas.setDocumentText(sb.toString());
+    }
+
     /**
      * This is the actual method that reads the parsing results, created the CAS document text and adds
      * the annotations from the parsing results.
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
index 7aa245057..4aec4f9a2 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
@@ -22,6 +22,11 @@
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
+/**
+ * Searches over directories and, optionally, the contents of ZIP archives for files with an (n)xml extension.
+ * Returns URIs that either point to single files or to entries into ZIP archives. Both can equally be accessed via
+ * "uri.toURL().openStream()" which is done in the NxmlDocumentParser.
+ */
 public class NXMLURIIterator implements Iterator<URI> {
     private final static Logger log = LoggerFactory.getLogger(NXMLURIIterator.class);
     private final static Logger logFileSearch = LoggerFactory.getLogger(NXMLURIIterator.class.getCanonicalName() + ".FileSearch");
@@ -48,7 +53,7 @@ public boolean hasNext() {
             // The beginning: The currentDirectory is null and we start at
             // the given path (which actually might be a single file to
             // read).
-            log.debug("Starting background thread to search for PMC (.nxml) files at {}", basePath);
+            log.debug("Starting background thread to search for PMC (.xml) files at {}", basePath);
             CompletableFuture.runAsync(() -> setFilesAndSubDirectories(basePath, false));
             fileSearchRunning = true;
         }
@@ -78,7 +83,7 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
             if ((searchRecursively || directory.equals(basePath)) && !isZipFile(directory)) {
                 logFileSearch.debug("Identified {} as a directory, reading files and subdirectories", directory);
                 // set the files in the directory
-                for (File file : directory.listFiles(f -> f.isFile() && f.getName().contains(".nxml") && !isZipFile(f) && isInWhitelist(f))) {
+                for (File file : directory.listFiles(f -> f.isFile() && f.getName().endsWith("xml") && !isZipFile(f) && isInWhitelist(f))) {
                     URI toURI = file.toURI();
                     try {
                         uris.put(toURI);
@@ -101,7 +106,7 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
                     while (entries.hasMoreElements()) {
                         final ZipEntry e = entries.nextElement();
                         if (!e.isDirectory() && e.getName().contains(".nxml") && isInWhitelist(new File(e.getName()))) {
-                            final String urlStr = "jar:" + directory.toURI().toString() + "!/" + e.getName();
+                            final String urlStr = "jar:" + directory.toURI() + "!/" + e.getName();
                             int exclamationIndex = urlStr.indexOf('!');
                             final String urlEncodedStr = urlStr.substring(0, exclamationIndex + 2) + Stream.of(urlStr.substring(exclamationIndex + 2).split("/")).map(x -> URLEncoder.encode(x, UTF_8)).collect(Collectors.joining("/"));
                             URL url = new URL(urlEncodedStr);
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
index 9f75ba8db..5285ee138 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
@@ -110,7 +110,7 @@ else if (docType.contains("JATS")) {
                 return;
             }
         }
-        throw new DocTypeNotFoundException("Could not find a doctype.");
+        throw new DocTypeNotFoundException("Could not find a known doctype.");
     }
 
     private void setupParserRegistry() {
diff --git a/jcore-pmc-reader/src/main/resources/LICENSE.txt b/jcore-pmc-reader/src/main/resources/LICENSE.txt
index fbbd41e05..d0f946a29 100644
--- a/jcore-pmc-reader/src/main/resources/LICENSE.txt
+++ b/jcore-pmc-reader/src/main/resources/LICENSE.txt
@@ -1,6 +1,6 @@
 BSD 2-Clause License
 
-Copyright (c) 2017, JULIE Lab
+Copyright (c) 2022, JULIE Lab
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without

From 8c7c12010ec19ce3b71b2580ae70f73663e83b00 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:19:04 +0100
Subject: [PATCH 121/269] Fix the setDBProcessingMetaData method to actually
 return the PK string.

---
 .../julielab/jcore/reader/db/DBMultiplierReader.java  |  7 ++++---
 .../java/de/julielab/jcore/reader/db/DBReader.java    | 11 +++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
index bfe474de8..992c64a00 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
@@ -19,6 +19,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -65,7 +66,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
     }
 
     @Override
-    public void getNext(JCas jCas) throws CollectionException {
+    public void getNext(JCas jCas) throws CollectionException, IOException {
         log.trace("Requesting next batch of document IDs from the database.");
         List<Object[]> idList = getNextDocumentIdBatch();
         if (idList.isEmpty())
@@ -119,7 +120,7 @@ public void getNext(JCas jCas) throws CollectionException {
      *
      * @see org.apache.uima.collection.base_cpm.BaseCollectionReader#hasNext()
      */
-    public boolean hasNext() {
+    public boolean hasNext() throws IOException, CollectionException {
         boolean hasNext = this.hasNext;
         if (retriever != null)
             hasNext = !retriever.getDocumentIds().isEmpty();
@@ -187,7 +188,7 @@ public Progress[] getProgress() {
     }
 
     @Override
-    public void close() {
+    public void close() throws IOException {
         if (dbc != null)
             dbc.close();
         dbc = null;
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java
index 5a21db4be..e580fa2fa 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java
@@ -118,17 +118,20 @@ public abstract class DBReader extends DBSubsetReader {
     private DBCIterator<byte[][]> xmlBytes;
 
     public static String setDBProcessingMetaData(DataBaseConnector dbc, boolean readDataTable, String tableName, byte[][] data, JCas cas) {
-        String pkString = null;
         // remove previously added dbMetaData
         JCasUtil.select(cas, DBProcessingMetaData.class).forEach(x -> x.removeFromIndexes());
 
         DBProcessingMetaData dbMetaData = new DBProcessingMetaData(cas);
         List<Integer> pkIndices = dbc.getPrimaryKeyIndices();
         StringArray pkArray = new StringArray(cas, pkIndices.size());
+        StringBuilder pkBuilder = new StringBuilder();
         for (int i = 0; i < pkIndices.size(); ++i) {
             Integer index = pkIndices.get(i);
             String pkElementValue = new String(data[index], Charset.forName("UTF-8"));
             pkArray.set(i, pkElementValue);
+            pkBuilder.append(pkElementValue);
+            if (i < pkIndices.size() - 1)
+                pkBuilder.append(",");
         }
         if (log.isDebugEnabled())
             log.trace("Setting primary key for DBProcessingMetaData to {}", Arrays.toString(pkArray.toArray()));
@@ -142,10 +145,9 @@ public static String setDBProcessingMetaData(DataBaseConnector dbc, boolean read
         } else {
             log.trace("Not setting the subset to DBProcessingMetaData because reading the data table is set to {}", readDataTable);
         }
-
-
         dbMetaData.addToIndexes();
-        return pkString;
+
+        return pkBuilder.toString();
     }
 
     @Override
@@ -257,6 +259,7 @@ public void close() {
      * pipeline status field
      */
     protected abstract String getReaderComponentName();
+
     /**
      * <p>
      * This class is charged with retrieving batches of document IDs and documents while previously fetched documents

From 11b406bd5d6fc6b6d0b0edd140bf1ff936b2c78d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:19:36 +0100
Subject: [PATCH 122/269] Fix a typo in the UIMA type imports.

---
 .../jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
index 7e3a1f520..296872b61 100644
--- a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
+++ b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
@@ -12,7 +12,7 @@
         <configurationParameterSettings />
         <typeSystemDescription>
       <imports>
-        <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+        <import name="de.julielab.jcore.types.jcore-semdantics-biology-types" />
           <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
       </imports>
     </typeSystemDescription>

From 473d4ac43a0b137bde4ffa08c93aa6473b39500e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:20:02 +0100
Subject: [PATCH 123/269] Add the PMCDBReader as a Maven module.

---
 .gitignore |   1 +
 pom.xml    | 321 ++++++++++++++++++++++++++++++++++-------------------
 2 files changed, 209 insertions(+), 113 deletions(-)

diff --git a/.gitignore b/.gitignore
index 247d87c61..6da01ef44 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,3 +11,4 @@ target
 **/*.iml
 /julie-xml-tools.jar
 
+/jcore-pmc-db-reader/src/test/resources/hiddenConfig
diff --git a/pom.xml b/pom.xml
index 6db724ae5..e4a6477fa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,228 +1,323 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-      
+        
+  
   <modelVersion>4.0.0</modelVersion>
-      
+        
+  
   <parent>
-            
+                
+    
     <groupId>de.julielab</groupId>
-            
+                
+    
     <artifactId>jcore-parent</artifactId>
-            
+                
+    
     <version>2.5.2-SNAPSHOT</version>
-        
+            
+  
   </parent>
-      
+        
+  
   <artifactId>jcore-base</artifactId>
-      
+        
+  
   <packaging>pom</packaging>
-      
+        
+  
   <name>JCoRe Base</name>
-      
+        
+  
   <description>The POM for the JCoRe Base projects.</description>
-      
+        
+  
   <version>2.6.0-SNAPSHOT</version>
-      
+        
+  
   <organization>
-            
+                
+    
     <name>JULIE Lab, Germany</name>
-            
+                
+    
     <url>http://www.julielab.de</url>
-        
+            
+  
   </organization>
-      
+        
+  
   <licenses>
-            
+                
+    
     <license>
-                  
+                        
+      
       <name>BSD-2-Clause</name>
-                  
+                        
+      
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-              
+                    
+    
     </license>
-        
+            
+  
   </licenses>
-      
+        
+  
   <url>https://github.com/JULIELab/jcore-base</url>
-      
+        
+  
   <dependencies>
-            
+                
+    
     <dependency>
-                  
+                        
+      
       <groupId>org.apache.uima</groupId>
-                  
+                        
+      
       <artifactId>uimaj-core</artifactId>
-                  
+                        
+      
       <version>${uima-version}</version>
-              
+                    
+    
     </dependency>
-            
+                
+    
     <dependency>
-                  
+                        
+      
       <groupId>org.apache.uima</groupId>
-                  
+                        
+      
       <artifactId>uimafit-core</artifactId>
-                  
+                        
+      
       <version>${uimafit-version}</version>
-              
+                    
+    
     </dependency>
-        
+            
+  
   </dependencies>
-      
+        
+  
   <modules>
-
+        
     <module>jcore-annotation-adder-ae</module>
-
+        
     <module>jcore-ace-reader</module>
-            
+                
+    
     <module>jcore-acronym-ae</module>
-
+        
     <module>jcore-acronym-writer</module>
-            
+                
+    
     <module>jcore-banner-ae</module>
-
+        
     <module>jcore-bc2gm-reader</module>
-
+        
     <module>jcore-bc2gmformat-writer</module>
-
+        
     <module>jcore-biolemmatizer-ae</module>
-            
+                
+    
     <module>jcore-bionlpformat-consumer</module>
-            
+                
+    
     <module>jcore-bionlpformat-reader</module>
-            
+                
+    
     <module>jcore-biosem-ae</module>
-            
+                
+    
     <module>jcore-conll-consumer</module>
-            
+                
+    
     <module>jcore-coordination-baseline-ae</module>
-
+        
     <module>jcore-cord19-reader</module>
-
+        
     <module>jcore-coreference-writer</module>
-
+        
     <module>jcore-ct-reader</module>
-
+        
     <module>jcore-db-checkpoint-ae</module>
-
+        
     <module>jcore-descriptor-creator</module>
-
+        
     <module>jcore-dta-reader</module>
-            
+                
+    
     <module>jcore-ec-code-ae</module>
-            
+                
+    
     <module>jcore-elasticsearch-consumer</module>
-            
+                
+    
     <module>jcore-embedding-writer</module>
-            
+                
+    
     <module>jcore-event-flattener-ae</module>
-            
+                
+    
     <module>jcore-feature-value-replacement-ae</module>
-            
+                
+    
     <module>jcore-file-reader</module>
-            
+                
+    
     <module>jcore-flair-ner-ae</module>
-
+        
     <module>jcore-flair-token-embedding-ae</module>
-
+        
     <module>jcore-flow-controllers</module>
-            
+                
+    
     <module>jcore-iexml-consumer</module>
-            
+                
+    
     <module>jcore-iexml-reader</module>
-            
+                
+    
     <module>jcore-ign-reader</module>
-            
+                
+    
     <module>jcore-iob-consumer</module>
-            
+                
+    
     <module>jcore-jnet-ae</module>
-            
+                
+    
     <module>jcore-jpos-ae</module>
-            
+                
+    
     <module>jcore-jsbd-ae</module>
-            
+                
+    
     <module>jcore-jtbd-ae</module>
-            
+                
+    
     <module>jcore-julielab-entity-evaluator-consumer</module>
-            
+                
+    
     <module>jcore-likelihood-assignment-ae</module>
-            
+                
+    
     <module>jcore-likelihood-detection-ae</module>
-
+        
     <module>jcore-line-multiplier</module>
-
+        
     <module>jcore-lingpipegazetteer-ae</module>
-            
+                
+    
     <module>jcore-lingpipe-porterstemmer-ae</module>
-            
+                
+    
     <module>jcore-lingscope-ae</module>
-            
+                
+    
     <module>jcore-linnaeus-species-ae</module>
-            
+                
+    
     <module>jcore-mantra-xml-types</module>
-            
+                
+    
     <module>jcore-medxn-ae</module>
-            
+                
+    
     <module>jcore-msdoc-reader</module>
-            
+                
+    
     <module>jcore-mstparser-ae</module>
-            
+                
+    
     <module>jcore-muc7-reader</module>
-            
+                
+    
     <module>jcore-mutationfinder-ae</module>
-
+        
     <module>jcore-neo4j-relations-consumer</module>
-            
+                
+    
     <module>jcore-opennlp-chunk-ae</module>
-            
+                
+    
     <module>jcore-opennlp-parser-ae</module>
-            
+                
+    
     <module>jcore-opennlp-postag-ae</module>
-            
+                
+    
     <module>jcore-opennlp-sentence-ae</module>
-            
+                
+    
     <module>jcore-opennlp-token-ae</module>
-
+        
     <module>jcore-ppd-writer</module>
-
+        
     <module>jcore-pmc-reader</module>
-            
+                
+    
     <module>jcore-pubtator-reader</module>
-            
+                
+    
     <module>jcore-stanford-lemmatizer-ae</module>
-            
+                
+    
     <module>jcore-topic-indexing-ae</module>
-            
+                
+    
     <module>jcore-topics-writer</module>
-            
+                
+    
     <module>jcore-txt-consumer</module>
-            
+                
+    
     <module>jcore-types</module>
-            
+                
+    
     <module>jcore-utilities</module>
-            
+                
+    
     <module>jcore-xml-mapper</module>
-            
+                
+    
     <module>jcore-xml-reader</module>
-            
+                
+    
     <module>jcore-xmi-reader</module>
-            
+                
+    
     <module>jcore-xmi-writer</module>
-            
+                
+    
     <module>jedis-parent</module>
-      <module>jcore-jedis-integration-tests</module>
-
-  </modules>
+          
+    <module>jcore-jedis-integration-tests</module>
+        
+    <module>jcore-pmc-db-reader</module>
       
+  </modules>
+        
+  
   <scm>
-            
+                
+    
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-            
+                
+    
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-            
+                
+    
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-        
-  </scm>
+            
   
+  </scm>
+    
+
 </project>

From 8bcd76fda0576841b26c7fc81317e37cf5ccc4e9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:21:00 +0100
Subject: [PATCH 124/269] Formatting.

---
 .../jcore/ae/flairner/FlairNerAnnotator.java  | 42 ++++++++++---------
 1 file changed, 23 insertions(+), 19 deletions(-)

diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index de2382319..04d65d3cf 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -63,9 +63,9 @@ public class FlairNerAnnotator extends JCasAnnotator_ImplBase {
     private String pythonExecutable;
     @ConfigurationParameter(name = PARAM_STORE_EMBEDDINGS, mandatory = false, description = "Optional. Possible values: ALL, ENTITIES, NONE. The FLAIR SequenceTagger first computes the embeddings for each sentence and uses those as input for the actual NER algorithm. By default, the embeddings are not stored. By setting this parameter to ALL, the embeddings of all tokens of the sentence are retrieved from flair and stored in the embeddingVectors feature of each token. Setting the parameter to ENTITIES will restrict the embedding storage to those tokens which overlap with an entity recognized by FLAIR.")
     private StoreEmbeddings storeEmbeddings;
-    @ConfigurationParameter(name = PARAM_GPU_NUM, mandatory = false, defaultValue="0", description = "Specifies the GPU device number to be used for FLAIR. This setting can be overwritten by the Java system property 'flairner.device'.")
+    @ConfigurationParameter(name = PARAM_GPU_NUM, mandatory = false, defaultValue = "0", description = "Specifies the GPU device number to be used for FLAIR. This setting can be overwritten by the Java system property 'flairner.device'.")
     private int gpuNum;
-    @ConfigurationParameter(name=PARAM_COMPONENT_ID, mandatory = false, description = "Specifies the componentId feature value given to the created annotations. Defaults to 'FlairNerAnnotator'.")
+    @ConfigurationParameter(name = PARAM_COMPONENT_ID, mandatory = false, description = "Specifies the componentId feature value given to the created annotations. Defaults to 'FlairNerAnnotator'.")
     private String componentId;
     private AnnotationAdderConfiguration adderConfig;
 
@@ -78,7 +78,7 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
         entityClass = (String) aContext.getConfigParameterValue(PARAM_ANNOTATION_TYPE);
         flairModel = (String) aContext.getConfigParameterValue(PARAM_FLAIR_MODEL);
         storeEmbeddings = StoreEmbeddings.valueOf(Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_STORE_EMBEDDINGS)).orElse(StoreEmbeddings.NONE.name()));
-        gpuNum = Optional.ofNullable((Integer)aContext.getConfigParameterValue(PARAM_GPU_NUM)).orElse(0);
+        gpuNum = Optional.ofNullable((Integer) aContext.getConfigParameterValue(PARAM_GPU_NUM)).orElse(0);
         componentId = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_COMPONENT_ID)).orElse(getClass().getSimpleName());
         if (System.getProperty(GPU_NUM_SYS_PROP) != null) {
             try {
@@ -157,21 +157,21 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
      */
     @Override
     public void process(final JCas aJCas) throws AnalysisEngineProcessException {
-        int i = 0;
-        final AnnotationIndex<Sentence> sentIndex = aJCas.getAnnotationIndex(Sentence.class);
-        Map<String, Sentence> sentenceMap = new HashMap<>();
-        for (Sentence sentence : sentIndex) {
-            if (sentence.getId() == null)
-                sentence.setId("s" + i++);
-            sentenceMap.put(sentence.getId(), sentence);
-        }
-        if ( log.isDebugEnabled()) {
-            if (sentenceMap.isEmpty())
-                log.debug("Document {} does not have any sentences.", JCoReTools.getDocId(aJCas));
-            if (!aJCas.getAnnotationIndex(Token.class).iterator().hasNext())
-                log.debug("Document {} does not have any tokens", JCoReTools.getDocId(aJCas));
-        }
         try {
+            int i = 0;
+            final AnnotationIndex<Sentence> sentIndex = aJCas.getAnnotationIndex(Sentence.class);
+            Map<String, Sentence> sentenceMap = new HashMap<>();
+            for (Sentence sentence : sentIndex) {
+                if (sentence.getId() == null)
+                    sentence.setId("s" + i++);
+                sentenceMap.put(sentence.getId(), sentence);
+            }
+            if (log.isDebugEnabled()) {
+                if (sentenceMap.isEmpty())
+                    log.debug("Document {} does not have any sentences.", JCoReTools.getDocId(aJCas));
+                if (!aJCas.getAnnotationIndex(Token.class).iterator().hasNext())
+                    log.debug("Document {} does not have any tokens", JCoReTools.getDocId(aJCas));
+            }
             JCoReOverlapAnnotationIndex<InternalReference> intRefIndex = new JCoReOverlapAnnotationIndex<>(aJCas, InternalReference.type);
             final AnnotationAdderHelper helper = new AnnotationAdderHelper();
             log.trace("Sending document sentences to flair for entity tagging.");
@@ -206,6 +206,9 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
             final String docId = JCoReTools.getDocId(aJCas);
             log.error("Could not set the offsets of an annotation in document {}", docId);
             throw new AnalysisEngineProcessException(e);
+        } catch (Throwable t) {
+            log.error("Error in {}", this.getClass().getSimpleName(), t);
+            throw new AnalysisEngineProcessException(t);
         }
     }
 
@@ -213,7 +216,7 @@ private void addTokenEmbeddings(JCas aJCas, Map<String, Sentence> sentenceMap, A
         final List<TokenEmbedding> tokenEmbeddings = taggingResponse.getTokenEmbeddings();
         JCoReTreeMapAnnotationIndex<Long, Token> tokenIndex = null;
         if (!tokenEmbeddings.isEmpty())
-            tokenIndex = new JCoReTreeMapAnnotationIndex<>(Comparators.longOverlapComparator(),TermGenerators.longOffsetTermGenerator(), TermGenerators.longOffsetTermGenerator(), aJCas, Token.type);
+            tokenIndex = new JCoReTreeMapAnnotationIndex<>(Comparators.longOverlapComparator(), TermGenerators.longOffsetTermGenerator(), TermGenerators.longOffsetTermGenerator(), aJCas, Token.type);
         Map<Token, List<double[]>> originalTokenEmbeddings = new HashMap<>();
         for (TokenEmbedding tokenEmbedding : tokenEmbeddings) {
             final Sentence sentence = sentenceMap.get(tokenEmbedding.getSentenceId());
@@ -262,7 +265,8 @@ private void addTokenEmbeddings(JCas aJCas, Map<String, Sentence> sentenceMap, A
     /**
      * Internal references can actually look like a part of a gene, e.g. "filament19" where "19" is a reference.
      * Exclude those spans from the gene mentions.
-     * @param a The gene annotation.
+     *
+     * @param a           The gene annotation.
      * @param intRefIndex The reference index.
      */
     private void excludeReferenceAnnotationSpans(Annotation a, JCoReOverlapAnnotationIndex<? extends Annotation> intRefIndex) {

From a7576266641b46c46d909f067d4a1bf73d623cf1 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:21:17 +0100
Subject: [PATCH 125/269] Typo.

---
 .../annotationdefined/AnnotationDefinedFlowController.java    | 4 ++--
 .../desc/jcore-annotation-defined-flowcontroller.xml          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
index 4158059a3..c6c016e45 100644
--- a/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
+++ b/jcore-flow-controllers/src/main/java/de/julielab/jcore/flow/annotationdefined/AnnotationDefinedFlowController.java
@@ -10,10 +10,10 @@
 
 /**
  * <p>Routes CASes through an aggregate analysis engine according to the {@link ToVisit} annotation present in the CAS.</p>
- * <p>If there is not <tt>ToVisit</tt> annotation, the default (fixed) flow will be used. Thus, the fixed flow constraint
+ * <p>If there is no <tt>ToVisit</tt> annotation, the default (fixed) flow will be used. Thus, the fixed flow constraint
  * must be set on the aggregate engine.</p>
  */
-@ResourceMetaData(name = "JCoRe Annotation Defined Flow Controller", description = "This flow controller relies on an annotation of type ToVisit to be present in the CAS. If there is no such annotation, the default fixed flow of the aggregate engine using this flow controller is used. Otherwise, die names of the components to pass the CAS to are taken from the annotation. If the annotation exists but defines to components to be visited by the CAS, no components are visited at all.", vendor = "JULIE Lab, Germany", version = "placeholder")
+@ResourceMetaData(name = "JCoRe Annotation Defined Flow Controller", description = "This flow controller relies on an annotation of type ToVisit to be present in the CAS. If there is no such annotation, the default fixed flow of the aggregate engine using this flow controller is used. Otherwise, the names of the components to pass the CAS to are taken from the annotation. If the annotation exists but defines to components to be visited by the CAS, no components are visited at all.", vendor = "JULIE Lab, Germany", version = "placeholder")
 public class AnnotationDefinedFlowController extends JCasFlowController_ImplBase {
     @Override
     public Flow computeFlow(JCas jCas) throws AnalysisEngineProcessException {
diff --git a/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml b/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
index 2babe5cd5..78ea9b35d 100644
--- a/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
+++ b/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
@@ -4,7 +4,7 @@
     <implementationName>de.julielab.jcore.flow.annotationdefined.AnnotationDefinedFlowController</implementationName>
     <processingResourceMetaData>
         <name>JCoRe Annotation Defined Flow Controller</name>
-        <description>This flow controller relies on an annotation of type ToVisit to be present in the CAS. If there is no such annotation, the default fixed flow of the aggregate engine using this flow controller is used. Otherwise, die names of the components to pass the CAS to are taken from the annotation. If the annotation exists but defines to components to be visited by the CAS, no components are visited at all.</description>
+        <description>This flow controller relies on an annotation of type ToVisit to be present in the CAS. If there is no such annotation, the default fixed flow of the aggregate engine using this flow controller is used. Otherwise, the names of the components to pass the CAS to are taken from the annotation. If the annotation exists but defines to components to be visited by the CAS, no components are visited at all.</description>
         <version>placeholder</version>
         <vendor>JULIE Lab, Germany</vendor>
         <configurationParameters/>

From 9794d62b40cead5c7426be8f03e4a4fc7b2b8db5 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:21:35 +0100
Subject: [PATCH 126/269] Formatting, comment correction.

---
 .../julielab/jcore/reader/xml/XMLDBMultiplier.java | 14 ++++++++------
 .../jcore/reader/xml/XMLDBMultiplierTest.java      |  2 --
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index 03c2b1160..ae7de1cef 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -62,7 +62,7 @@ public class XMLDBMultiplier extends DBMultiplier {
     @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
             "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
     private String xmiStorageDataTable;
-    @ConfigurationParameter(name= PARAM_TABLE_DOCUMENT_SCHEMA, mandatory = false, description = "For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the "+PARAM_TABLE_DOCUMENT+" parameter - adheres to. Only the primary key part is required for hash value retrieval.")
+    @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT_SCHEMA, mandatory = false, description = "For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the " + PARAM_TABLE_DOCUMENT + " parameter - adheres to. Only the primary key part is required for hash value retrieval.")
     private String xmiStorageDataTableSchema;
     @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.")
     private String[] toVisitKeys;
@@ -147,7 +147,7 @@ private void setToVisitAnnotation(JCas jCas) {
                 String newHash = getHash(jCas);
                 if (existingHash.equals(newHash)) {
                     if (log.isTraceEnabled())
-                    log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", pkString, toVisitKeys);
+                        log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", pkString, toVisitKeys);
                     ToVisit toVisit = new ToVisit(jCas);
                     if (toVisitKeys != null && toVisitKeys.length != 0) {
                         StringArray keysArray = new StringArray(jCas, toVisitKeys.length);
@@ -156,6 +156,8 @@ private void setToVisitAnnotation(JCas jCas) {
                     }
                     toVisit.addToIndexes();
                 }
+            } else {
+                log.trace("No existing hash was found for document {}", pkString);
             }
         }
     }
@@ -212,10 +214,10 @@ private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) th
                 while (rs.next()) {
                     StringBuilder pkSb = new StringBuilder();
                     for (int i = 0; i < xmiTableSchema.getPrimaryKey().length; i++)
-                        pkSb.append(rs.getString(i+1)).append(',');
-                    // Remove training comma
-                    pkSb.deleteCharAt(pkSb.length()-1);
-                    String hash = rs.getString(xmiTableSchema.getPrimaryKey().length+1);
+                        pkSb.append(rs.getString(i + 1)).append(',');
+                    // Remove trailing comma
+                    pkSb.deleteCharAt(pkSb.length() - 1);
+                    String hash = rs.getString(xmiTableSchema.getPrimaryKey().length + 1);
                     id2hash.put(pkSb.toString(), hash);
                 }
             } catch (SQLException e) {
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
index 3e2cd9f79..86009735d 100644
--- a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -95,7 +95,6 @@ private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnect
     }
 
     private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
-        // Note that the root is "xmi" and not "xml"
         String documentTextFmt = "This is document text number %d";
         dbc.createTable(TARGET_XMI_TABLE, "xmi_text", "Test table for hash comparison test.");
         dbc.assureColumnsExist(TARGET_XMI_TABLE, List.of(HASH_FIELD_NAME), "text");
@@ -138,7 +137,6 @@ public void testMultiplier() throws Exception {
         while (jCasIterator.hasNext()) {
             JCas newCas = jCasIterator.next();
             documentTexts.add(newCas.getDocumentText());
-            System.out.println(newCas.getDocumentText());
             newCas.release();
         }
         assertThat(documentTexts).containsExactly("This is document text number 0", "This is document text number 1", "This is document text number 2", "This is document text number 3", "This is document text number 4", "This is document text number 5", "This is document text number 6", "This is document text number 7", "This is document text number 8", "This is document text number 9");

From 296da6b944d0fec542111ff8994e48f36f13025c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:22:02 +0100
Subject: [PATCH 127/269] Remove unused throw directives.

---
 .../multiplier/pmc/PMCDBMultiplierHashComparisonTest.java      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
index a4f02e11a..a36155dfa 100644
--- a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
+++ b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
@@ -27,7 +27,6 @@
 import org.testcontainers.containers.PostgreSQLContainer;
 
 import java.io.File;
-import java.io.IOException;
 import java.nio.file.Path;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
@@ -57,7 +56,7 @@ public class PMCDBMultiplierHashComparisonTest {
     private static String costosysConfig;
 
     @BeforeAll
-    public static void setup() throws SQLException, UIMAException, IOException, ConfigurationException {
+    public static void setup() throws SQLException, ConfigurationException {
         postgres.start();
         DBTestUtils.createAndSetHiddenConfig(Path.of("src", "test", "resources", "hiddenConfig").toString(), postgres);
 

From 961b884e34e471ee787b91815d9eb8bfe6a4ec78 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:53:00 +0100
Subject: [PATCH 128/269] Fix a bug where .nxml files were not read any more.

Introduced in the course of adapting to the new PMC bulk download format. All files in there have a plain .xml extension instead of the old .nxml.
---
 .../java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
index 4aec4f9a2..1a4010576 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/NXMLURIIterator.java
@@ -83,7 +83,7 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
             if ((searchRecursively || directory.equals(basePath)) && !isZipFile(directory)) {
                 logFileSearch.debug("Identified {} as a directory, reading files and subdirectories", directory);
                 // set the files in the directory
-                for (File file : directory.listFiles(f -> f.isFile() && f.getName().endsWith("xml") && !isZipFile(f) && isInWhitelist(f))) {
+                for (File file : directory.listFiles(f -> f.isFile() && (f.getName().contains(".xml") || f.getName().contains(".nxml")) && !isZipFile(f) && isInWhitelist(f))) {
                     URI toURI = file.toURI();
                     try {
                         uris.put(toURI);
@@ -105,7 +105,7 @@ private void setFilesAndSubDirectories(File directory, boolean recursiveCall) {
                     int numEntries = 0;
                     while (entries.hasMoreElements()) {
                         final ZipEntry e = entries.nextElement();
-                        if (!e.isDirectory() && e.getName().contains(".nxml") && isInWhitelist(new File(e.getName()))) {
+                        if (!e.isDirectory() && (e.getName().contains(".xml") || e.getName().contains(".nxml")) && isInWhitelist(new File(e.getName()))) {
                             final String urlStr = "jar:" + directory.toURI() + "!/" + e.getName();
                             int exclamationIndex = urlStr.indexOf('!');
                             final String urlEncodedStr = urlStr.substring(0, exclamationIndex + 2) + Stream.of(urlStr.substring(exclamationIndex + 2).split("/")).map(x -> URLEncoder.encode(x, UTF_8)).collect(Collectors.joining("/"));

From 2c80ed2c209587f3e4923e6e4aa79f0cbdc636e6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 17:58:07 +0100
Subject: [PATCH 129/269] Add a PMC DB multiplier reader descriptor. Resolves
 #128.

The descriptor is just a rebranding for the DBMultiplierReader. It's the same basic JeDIS process of reading data from the database, marking them as being in process etc.
---
 .../desc/jcore-pmc-db-multiplier-reader.xml   | 191 ++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100644 jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml

diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
new file mode 100644
index 000000000..7cbc31dcf
--- /dev/null
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
@@ -0,0 +1,191 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <implementationName>de.julielab.jcore.reader.db.DBMultiplierReader</implementationName>
+    <processingResourceMetaData>
+        <name>JCoRe Database PMC Multiplier Reader</name>
+        <description>A collection reader that receives the IDs of documents from a database table. Additional tables may
+            be specified which will, together with the IDs, be sent to a CAS multiplier extending the DBMultiplierReader.
+            The multiplier will read documents and the joined additional tables according to the list of document IDs
+            sent by this reader. The component leverages the corpus storage system (CoStoSys) for this purpose and is
+            part of the Jena Document Information System, JeDIS.
+        </description>
+        <version>2.6.0-SNAPSHOT</version>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>ResetTable</name>
+                <description>If set to true and the parameter 'Table' is set to a subset table, the subset table will be
+                    reset atthe initialization of the reader to be ready for processing of the whole subset. Do not use
+                    when multiple readers read the same subset table.
+                </description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>Timestamp</name>
+                <description>PostgreSQL timestamp expression that is evaluated against the data table. The data table
+                    schema, which must be the active data table schema in the CoStoSys configuration as always, must
+                    specify a single timestamp field for this parameter to work. Only data rows with a timestamp value
+                    larger than the given timestamp expression will be processed. Note that when reading from a subset
+                    table, there may be subset rows indicated to be in process which are finally not read from the data
+                    table. This is an implementational shortcoming and might be addressed if respective feature requests
+                    are given through the JULIE Lab GitHub page or JCoRe issues.
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>FetchIdsProactively</name>
+                <description>If set to true and when reading from a subset table, batches of document IDs will be
+                    retrieved in a background thread while the previous batch is already in process. This is meant to
+                    minimize waiting time for the database. Deactivate this feature if you encounter issues with
+                    databaase connections.
+                </description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AdditionalTables</name>
+                <description>An array of table names. By default, the table names will be resolved against the active
+                    data postgres schema configured in the CoStoSys configuration file. If a name is already schema
+                    qualified, i.e. contains a dot, the active data schema will be ignored. When reading documents from
+                    the document data table, the additional tables will be joined onto the data table using the primary
+                    keys of the queried documents. Using the table schema for the additional documents defined by the
+                    'AdditionalTableSchema' parameter, the columns that are marked as 'retrieve=true' in the table
+                    schema, are returned together with the main document data. This mechanism is most prominently used
+                    to retrieve annotation table data together with the original document text in XMI format for the
+                    JeDIS system.
+                </description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AdditionalTableSchemas</name>
+                <description>The table schemas that corresponds to the additional tables given with the
+                    'AdditionalTables' parameter. If only one schema name is given, that schema must apply to all
+                    additional tables.
+                </description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>BatchSize</name>
+                <description />
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DBDriver</name>
+                <description>Currently unused because the Hikari JDBC library should recognize the correct driver.
+                    However, there seem to be cases where this doesn't work (HSQLDB). So we keep the parameter for
+                    later. When this issue comes up, the driver would have to be set manually. This isn't done right
+                    now.
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>Table</name>
+                <description>The data or subset database table to read from. The name will be resolved against the
+                    active Postgres schema defined in the CoStoSys configuration file.However, if the name contains a
+                    schema qualification (i.e. 'schemaname.tablename), the configuration file will be ignored in this
+                    point.
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>SelectionOrder</name>
+                <description>WARNING: Potential SQL injection vulnerability. Do not let unknown users interact with your
+                    database with this component. An SQL ORDER clause specifying in which order the documents in the
+                    target database table should be processed. Only the clause itself must be specified, the ORDER
+                    keyword is automatically added.
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>WhereCondition</name>
+                <description>WARNING: Potential SQL injection vulnerability. Do not let unknown users interact with your
+                    database with this component. Only used when reading data tables directly. No effect when the
+                    'tableName' parameter specifies a subset table. The parameter value should be an SQL WHERE clause
+                    restricting the documents to be read. Only the clause itself must be specified, the WHERE keyword is
+                    added automatically.
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>Limit</name>
+                <description />
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>CostosysConfigFile</name>
+                <description>File path or classpath resource location to the CoStoSys XML configuration. This
+                    configuration must specify the table schema of the table referred to by the 'Table' parameter as
+                    active table schema. The active table schema is always the schema of the data table that is either
+                    queried directly for documents or, if 'tableName' points to a subset table, indirectly through the
+                    subset table. Make also sure that the active database connection in the configuration points to the
+                    correct database.
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>ResetTable</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>FetchIdsProactively</name>
+                <value>
+                    <boolean>true</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>BatchSize</name>
+                <value>
+                    <integer>50</integer>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>SelectionOrder</name>
+                <value>
+                    <string />
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection />
+        <capabilities />
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file

From 5fdf9ac06a70e3e8eeee5747f852ad1e6637bcb7 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 18:05:11 +0100
Subject: [PATCH 130/269] Update the meta descriptor of the PMC DB reader to
 include the reader descriptor.

---
 jcore-pmc-db-reader/component.meta                        | 8 ++++++--
 .../julielab/jcore/reader/xmi/XmiDBMultiplierReader.java  | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/jcore-pmc-db-reader/component.meta b/jcore-pmc-db-reader/component.meta
index c57c78fa7..0310e53ee 100644
--- a/jcore-pmc-db-reader/component.meta
+++ b/jcore-pmc-db-reader/component.meta
@@ -1,13 +1,17 @@
 {
     "categories": [
-        "multiplier",
-        "reader"
+        "reader",
+        "multiplier"
     ],
     "description": "JeDIS database reader for PMC base documents.",
     "descriptors": [
         {
             "category": "multiplier",
             "location": "de.julielab.jcore.multiplier.pmc.desc.jcore-pmc-db-multiplier"
+        },
+        {
+            "category": "reader",
+            "location": "de.julielab.jcore.multiplier.pmc.desc.jcore-pmc-db-multiplier-reader"
         }
     ],
     "exposable": true,
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java
index 185bdd1d4..60c405b2f 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplierReader.java
@@ -94,7 +94,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
     }
 
     @Override
-    public void getNext(JCas jCas) throws CollectionException {
+    public void getNext(JCas jCas) throws CollectionException, IOException {
         try {
             super.getNext(jCas);
             // The above call to super.getNext has created a RowBatch annotation which we retrieve here.

From 3aa6cb1bf28629a735d716801342a17ebc58f610 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Feb 2022 18:24:25 +0100
Subject: [PATCH 131/269] Avoid the addition of the "PMC" prefix if it is
 already there.

---
 .../java/de/julielab/jcore/reader/pmc/parser/FrontParser.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index af4a2b944..124e47bef 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -110,7 +110,7 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
 			header.setSource("PubMed Central");
 			header.setComponentId(PMCReader.class.getName());
 
-			pmcid.ifPresent(id -> header.setDocId("PMC" + id));
+			pmcid.ifPresent(id -> header.setDocId(id.startsWith("PMC") ? id : "PMC" + id));
 			pmid.ifPresent(p -> {
 				OtherID otherID = new OtherID(nxmlDocumentParser.cas);
 				otherID.setComponentId(PMCReader.class.getName());

From b0d95fc08f07be76c9e8ee69389c9e648b8e6113 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 11:27:07 +0100
Subject: [PATCH 132/269] BANNER still shows concurrency issues. Added more
 synchronization.

---
 .../java/banner/tagging/pipe/LemmaPOS.java    | 19 +++++++++++++------
 .../jcore/ae/banner/BANNERAnnotator.java      |  5 ++++-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
index 36e8a7cd5..41a0a8e5c 100644
--- a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
+++ b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
@@ -43,11 +43,13 @@ public LemmaPOS(Lemmatiser lemmatiser, Tagger posTagger) {
     public void setLemmatiser(Lemmatiser lemmatiser) {
         initResourcesMap();
         getResources().lemmatiser = lemmatiser;
+//        System.out.println("Setting lemmatiser to " + Thread.currentThread());
     }
 
     public void setPosTagger(Tagger posTagger) {
         initResourcesMap();
         getResources().posTagger = posTagger;
+//        System.out.println("Setting PoS Tagger to " + Thread.currentThread());
     }
 
     synchronized private void initResourcesMap() {
@@ -56,12 +58,16 @@ synchronized private void initResourcesMap() {
     }
 
     private Resources getResources() {
-        return resourcesByThread.compute(Thread.currentThread(), (t, r) -> {
-            Resources ret = r;
-            if (ret == null)
-                ret = new Resources();
-            return ret;
-        });
+        Thread currentThread = Thread.currentThread();
+        Resources resources = resourcesByThread.get(currentThread);
+        if (resources == null) {
+            resources = new Resources();
+            synchronized (resourcesByThread) {
+//                System.out.println("Creating resources for thread " + currentThread);
+                resourcesByThread.put(currentThread, resources);
+            }
+        }
+        return resources;
     }
 
     @Override
@@ -118,6 +124,7 @@ public String toString() {
             return "Resources{" +
                     "lemmatiser=" + lemmatiser +
                     ", posTagger=" + posTagger +
+                    ", idHashCode= " + System.identityHashCode(this) +
                     '}';
         }
     }
diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
index b5c7e816e..9241d430f 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
@@ -139,7 +139,10 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
                 // model is deserialized multiple times, the FeatureSet#pipe field seems to be always the
                 // exact same instance, containing a single instance of LemmaPOS (again, despite reading the model
                 // file and deserializing it multiple times). This is why the Thread -> resources map was added.
-                tagger = CRFTagger.load(modelIs, lemmatiser, posTagger, dictionary);
+//                System.out.println("Initializing BANNER: " + Thread.currentThread() + " with lemmatiser " + lemmatiser + " and POS tagger " + posTagger);
+                synchronized (BANNERAnnotator.class) {
+                    tagger = CRFTagger.load(modelIs, lemmatiser, posTagger, dictionary);
+                }
             } catch (IOException e) {
                 log.error("Could not load the BANNER model at {}", modelFilename, e);
                 throw new AnalysisEngineProcessException(e);

From f44d24e78d5f7b626ea2f7244d9f99808e02fffa Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 11:27:32 +0100
Subject: [PATCH 133/269] Adapt PMC component names to better variants.

---
 jcore-pmc-db-reader/pom.xml                                     | 2 +-
 .../jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-pmc-db-reader/pom.xml b/jcore-pmc-db-reader/pom.xml
index 21d363909..5efb1a8b2 100644
--- a/jcore-pmc-db-reader/pom.xml
+++ b/jcore-pmc-db-reader/pom.xml
@@ -61,7 +61,7 @@
             <version>${jcore-utilities-version}</version>
         </dependency>
     </dependencies>
-    <name>JCoRe Pubmed Central DB Reader</name>
+    <name>JCoRe PubMed Central DB Reader</name>
     <organization>
         <name>JULIE Lab Jena, Germany</name>
         <url>http://www.julielab.de</url>
diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
index 1bf858c07..3193805bf 100644
--- a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
@@ -4,7 +4,7 @@
     <primitive>true</primitive>
     <annotatorImplementationName>de.julielab.jcore.multiplier.pmc.PMCDBMultiplier</annotatorImplementationName>
     <analysisEngineMetaData>
-        <name>JCoRe Abstract Database Multiplier</name>
+        <name>JCoRe PMC Database Multiplier</name>
         <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <copyright>JULIE Lab Jena, Germany</copyright>

From 76d5190397b2e2cfdea82f7b0e11bc0079e8edcc Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 11:31:37 +0100
Subject: [PATCH 134/269] Adapt the PMC DB reader name in component.meta, too.

---
 jcore-pmc-db-reader/component.meta | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-pmc-db-reader/component.meta b/jcore-pmc-db-reader/component.meta
index 0310e53ee..667465029 100644
--- a/jcore-pmc-db-reader/component.meta
+++ b/jcore-pmc-db-reader/component.meta
@@ -21,5 +21,5 @@
         "groupId": "de.julielab",
         "version": "2.6.0-SNAPSHOT"
     },
-    "name": "JCoRe Pubmed Central DB Reader"
+    "name": "JCoRe PubMed Central DB Reader"
 }

From 42906eabee103ef37a09efba0612c1bd980a1ee9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 12:03:07 +0100
Subject: [PATCH 135/269] Correcting the Maven module structure: The PMC DB
 reader belongs to JeDIS.

---
 jedis-parent/pom.xml | 1 +
 pom.xml              | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 0b8807ef9..b5cbf4f94 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -36,6 +36,7 @@
         <module>../jcore-xml-db-reader</module>
         <module>../jcore-xmi-db-reader</module>
         <module>../jcore-xmi-db-writer</module>
+        <module>../jcore-pmc-db-reader</module>
     </modules>
     <licenses>
         <license>
diff --git a/pom.xml b/pom.xml
index e4a6477fa..8ebd5e10c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -299,8 +299,6 @@
           
     <module>jcore-jedis-integration-tests</module>
         
-    <module>jcore-pmc-db-reader</module>
-      
   </modules>
         
   
From f265bd22f3c39406701d1c788a8af0290d6aa4b3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 12:36:42 +0100
Subject: [PATCH 136/269] Import the flow controller types in the annotation
 defined flow controller descriptor.

---
 .../desc/jcore-annotation-defined-flowcontroller.xml         | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml b/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
index 78ea9b35d..b64a02723 100644
--- a/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
+++ b/jcore-flow-controllers/src/main/resources/de/julielab/jcore/flow/annotationdefined/desc/jcore-annotation-defined-flowcontroller.xml
@@ -9,6 +9,11 @@
         <vendor>JULIE Lab, Germany</vendor>
         <configurationParameters/>
         <configurationParameterSettings/>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-casflow-types" />
+            </imports>
+        </typeSystemDescription>
         <capabilities/>
         <operationalProperties>
             <modifiesCas>false</modifiesCas>

From b9f1ad189be2ab7a307f00ef4a7435c33d9be752 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 12:37:22 +0100
Subject: [PATCH 137/269] Clarify the SHA hash checking parameter description
 for the XML database multipliers.

---
 .../java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java  | 2 +-
 .../multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml      | 2 +-
 .../main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
index 551b8dacb..447e95929 100644
--- a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
+++ b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
@@ -42,7 +42,7 @@ public class PMCDBMultiplier extends DBMultiplier {
     private final static Logger log = LoggerFactory.getLogger(PMCDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_OMIT_BIB_REFERENCES, mandatory = false, defaultValue = "false", description = "If set to true, references to the bibliography are omitted from the CAS text.")
     protected boolean omitBibReferences;
-    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
+    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the " + PARAM_TO_VISIT_KEYS + " parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'.")
     private String documentItemToHash;
     @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
             "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
index 7cbc31dcf..6bfd2a7c3 100644
--- a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
@@ -3,7 +3,7 @@
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <implementationName>de.julielab.jcore.reader.db.DBMultiplierReader</implementationName>
     <processingResourceMetaData>
-        <name>JCoRe Database PMC Multiplier Reader</name>
+        <name>JCoRe PMC Database Multiplier Reader</name>
         <description>A collection reader that receives the IDs of documents from a database table. Additional tables may
             be specified which will, together with the IDs, be sent to a CAS multiplier extending the DBMultiplierReader.
             The multiplier will read documents and the joined additional tables according to the list of document IDs
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index ae7de1cef..b429470c2 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -57,7 +57,7 @@ public class XMLDBMultiplier extends DBMultiplier {
     protected String[] rowMappingArray;
     @ConfigurationParameter(name = PARAM_MAPPING_FILE, description = XMLDBReader.DESC_MAPPING_FILE)
     protected String mappingFileStr;
-    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS skips all component except the DBCheckpointAE to mark the document as processed.")
+    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the " + PARAM_TO_VISIT_KEYS + " parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'.")
     private String documentItemToHash;
     @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
             "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")

From d62512a9560253a4b58b2ce5b6b855de06e9fcbb Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 15 Feb 2022 17:03:24 +0100
Subject: [PATCH 138/269] Start writing a consumer for GNormPlus compatible
 BioC documents.

Regards #129.
---
 jcore-gnp-bioc-writer/LICENSE                 |  26 ++
 jcore-gnp-bioc-writer/README.md               |  34 ++
 jcore-gnp-bioc-writer/pom.xml                 |  54 +++
 .../consumer/gnp/BioCCollectionWriter.java    |  55 +++
 .../consumer/gnp/BioCDocumentPopulator.java   |  29 ++
 .../consumer/gnp/GNormPlusFormatWriter.java   |  75 ++++
 .../jcore/consumer/gnp/desc/PLACEHOLDER       |   1 +
 .../gnp/desc/jcore-gnp-bioc-writer.xml        |  21 ++
 .../gnp/GNormPlusFormatWriterTest.java        |  13 +
 pom.xml                                       | 341 ++++++++++++------
 10 files changed, 536 insertions(+), 113 deletions(-)
 create mode 100644 jcore-gnp-bioc-writer/LICENSE
 create mode 100644 jcore-gnp-bioc-writer/README.md
 create mode 100644 jcore-gnp-bioc-writer/pom.xml
 create mode 100644 jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
 create mode 100644 jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
 create mode 100644 jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
 create mode 100644 jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER
 create mode 100644 jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
 create mode 100644 jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java

diff --git a/jcore-gnp-bioc-writer/LICENSE b/jcore-gnp-bioc-writer/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-gnp-bioc-writer/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-gnp-bioc-writer/README.md b/jcore-gnp-bioc-writer/README.md
new file mode 100644
index 000000000..6b6af0a20
--- /dev/null
+++ b/jcore-gnp-bioc-writer/README.md
@@ -0,0 +1,34 @@
+# JCoRe GNormPlus BioC Writer
+
+**Descriptor Path**:
+```
+de.julielab.jcore.consumer.gnp.desc.jcore-gnp-bioc-writer
+```
+
+Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-gnp-bioc-writer/pom.xml b/jcore-gnp-bioc-writer/pom.xml
new file mode 100644
index 000000000..465d66c2e
--- /dev/null
+++ b/jcore-gnp-bioc-writer/pom.xml
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-gnp-bioc-writer</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.pengyifan.bioc</groupId>
+            <artifactId>pengyifan-bioc</artifactId>
+            <version>1.0.3</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe GNormPlus BioC Writer</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-gnp-bioc-writer</url>
+    <description>Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.
+    </description>
+</project>
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
new file mode 100644
index 000000000..a0e03880a
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
@@ -0,0 +1,55 @@
+package de.julielab.jcore.consumer.gnp;
+
+import com.pengyifan.bioc.BioCCollection;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Writes a collection of BioC documents into a single file. That file is created within a subdirectory of
+ * some base directory und changes over time to avoid overflowing directories.
+ */
+public class BioCCollectionWriter {
+    private final static Logger log = LoggerFactory.getLogger(BioCCollectionWriter.class);
+    private int numFilesPerDir;
+    private File baseDir;
+    private File currentDir;
+    private int numWrittenIntoCurrentDir;
+
+    public BioCCollectionWriter(int numFilesPerDir, File baseDir) {
+        this.numFilesPerDir = numFilesPerDir;
+        this.baseDir = baseDir;
+    }
+
+    public void writeBioCCollection(BioCCollection collection) throws XMLStreamException, IOException {
+        File collectionFile = null;
+        synchronized (BioCCollectionWriter.class) {
+            if (!baseDir.exists()) {
+                log.debug("Creating base BioC collection directory {}", baseDir);
+                baseDir.mkdirs();
+            }
+            if (currentDir == null) {
+                int i = 0;
+                do {
+                    currentDir = new File(baseDir, "bioc_collections_" + i++);
+                } while (currentDir.exists());
+                i = 0;
+                do {
+                    collectionFile = new File(currentDir, "bioc_collection_" + i++ + ".xml");
+                } while (collectionFile.exists());
+            }
+        }
+
+        com.pengyifan.bioc.io.BioCCollectionWriter writer = new com.pengyifan.bioc.io.BioCCollectionWriter(collectionFile);
+        writer.writeCollection(collection);
+        ++numWrittenIntoCurrentDir;
+        // "close" the current directory if the number of files for it has been reached
+        if (numWrittenIntoCurrentDir >= numFilesPerDir) {
+            currentDir = null;
+            numWrittenIntoCurrentDir = 0;
+        }
+    }
+}
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
new file mode 100644
index 000000000..e161ee4ad
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -0,0 +1,29 @@
+package de.julielab.jcore.consumer.gnp;
+
+import com.pengyifan.bioc.BioCDocument;
+import de.julielab.jcore.types.AbstractText;
+import de.julielab.jcore.types.Section;
+import de.julielab.jcore.types.Title;
+import de.julielab.jcore.types.Zone;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.jcas.JCas;
+
+/**
+ * Extracts text passages from the CAS and adds them to a new BioCDocument.
+ */
+public class BioCDocumentPopulator {
+    public BioCDocument populate(JCas jCas) {
+        AnnotationIndex<Zone> zoneIndex = jCas.getAnnotationIndex(Zone.type);
+        for (Zone z : zoneIndex) {
+            if (z instanceof Title) {
+                // only document title; other titles should be accessed via features of the zone body
+            }
+            else if (z instanceof AbstractText) {
+                // don't check for structured parts; for GNormPlus the only important thing is title, abstract, body
+            } else if (z instanceof Section) {
+                // handle headings
+            }
+        }
+        return null;
+    }
+}
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
new file mode 100644
index 000000000..cde7c209b
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -0,0 +1,75 @@
+package de.julielab.jcore.consumer.gnp;
+
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.BioCDocument;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.util.Date;
+
+@ResourceMetaData(name = "JCoRe GNormPlus BioC Writer", description = "Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {})
+public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
+
+    public static final String PARAM_NUM_DOCS_PER_FILE = "NumDocsPerFile";
+    public static final String PARAM_NUM_FILES_PER_DIR = "NumFilesPerDir";
+    public static final String PARAM_BASE_DIR = "BaseDirectory";
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatWriter.class);
+    @ConfigurationParameter(name = PARAM_NUM_DOCS_PER_FILE, description = "The number of documents (i.e. CASes) that should be written into a single BioC XML file.")
+    private int numDocsPerFile;
+    @ConfigurationParameter(name = PARAM_NUM_FILES_PER_DIR, description = "The number of files that should be put in a directory before a new one is created.")
+    private int numDocsPerDir;
+    @ConfigurationParameter(name = PARAM_BASE_DIR, description = "The base directory into which to create new directories that contain the actual BioC collection files.")
+    private String baseDirectory;
+
+    private BioCDocumentPopulator bioCDocumentPopulator;
+    private BioCCollectionWriter bioCCollectionWriter;
+    private BioCCollection currentCollection;
+
+    /**
+     * This method is called a single time by the framework at component
+     * creation. Here, descriptor parameters are read and initial setup is done.
+     */
+    @Override
+    public void initialize(final UimaContext aContext) throws ResourceInitializationException {
+        numDocsPerFile = (int) aContext.getConfigParameterValue(PARAM_NUM_DOCS_PER_FILE);
+        numDocsPerDir = (int) aContext.getConfigParameterValue(PARAM_NUM_FILES_PER_DIR);
+        baseDirectory = (String) aContext.getConfigParameterValue(PARAM_BASE_DIR);
+
+        bioCDocumentPopulator = new BioCDocumentPopulator();
+        bioCCollectionWriter = new BioCCollectionWriter(numDocsPerDir, new File(baseDirectory));
+
+        currentCollection = new BioCCollection("UTF-8", "1.0", new Date().toString(), true, "JCoRe GNormPlus BioC Writer", "PubTator.key");
+    }
+
+    /**
+     * This method is called for each document going through the component. This
+     * is where the actual work happens.
+     */
+    @Override
+    public void process(final JCas jCas) throws AnalysisEngineProcessException {
+        try {
+            BioCDocument doc = bioCDocumentPopulator.populate(jCas);
+            currentCollection.addDocument(doc);
+            if (currentCollection.getDocmentCount() >= numDocsPerFile) {
+                bioCCollectionWriter.writeBioCCollection(currentCollection);
+                currentCollection.clearDocuments();
+                currentCollection.clearInfons();
+            }
+        } catch (Exception e) {
+            log.error("Exception was raised for document {}", JCoReTools.getDocId(jCas));
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
+}
+
diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER
new file mode 100644
index 000000000..9f6c6ddb5
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER
@@ -0,0 +1 @@
+The actual descriptor must be created by UIMA fit.
diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
new file mode 100644
index 000000000..47d89e355
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>GNormPlusFormatWriter</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>JCoRe GNormPlus BioC Writer</name>
+    <description/>
+    <version>2.3.0-SNAPSHOT</version>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription/>
+    <capabilities/>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
new file mode 100644
index 000000000..1f6b31b3e
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
@@ -0,0 +1,13 @@
+
+package de.julielab.jcore.consumer.gnp;
+
+
+
+
+/**
+ * Unit tests for jcore-gnp-bioc-writer.
+ *
+ */
+public class GNormPlusFormatWriterTest{
+// TODO
+}
diff --git a/pom.xml b/pom.xml
index 8ebd5e10c..995fec529 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,321 +1,436 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-        
+          
+  
   
   <modelVersion>4.0.0</modelVersion>
-        
+          
+  
   
   <parent>
-                
+                    
+    
     
     <groupId>de.julielab</groupId>
-                
+                    
+    
     
     <artifactId>jcore-parent</artifactId>
-                
+                    
+    
     
     <version>2.5.2-SNAPSHOT</version>
-            
+                
+  
   
   </parent>
-        
+          
+  
   
   <artifactId>jcore-base</artifactId>
-        
+          
+  
   
   <packaging>pom</packaging>
-        
+          
+  
   
   <name>JCoRe Base</name>
-        
+          
+  
   
   <description>The POM for the JCoRe Base projects.</description>
-        
+          
+  
   
   <version>2.6.0-SNAPSHOT</version>
-        
+          
+  
   
   <organization>
-                
+                    
+    
     
     <name>JULIE Lab, Germany</name>
-                
+                    
+    
     
     <url>http://www.julielab.de</url>
-            
+                
+  
   
   </organization>
-        
+          
+  
   
   <licenses>
-                
+                    
+    
     
     <license>
-                        
+                              
+      
       
       <name>BSD-2-Clause</name>
-                        
+                              
+      
       
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-                    
+                          
+    
     
     </license>
-            
+                
+  
   
   </licenses>
-        
+          
+  
   
   <url>https://github.com/JULIELab/jcore-base</url>
-        
+          
+  
   
   <dependencies>
-                
+                    
+    
     
     <dependency>
-                        
+                              
+      
       
       <groupId>org.apache.uima</groupId>
-                        
+                              
+      
       
       <artifactId>uimaj-core</artifactId>
-                        
+                              
+      
       
       <version>${uima-version}</version>
-                    
+                          
+    
     
     </dependency>
-                
+                    
+    
     
     <dependency>
-                        
+                              
+      
       
       <groupId>org.apache.uima</groupId>
-                        
+                              
+      
       
       <artifactId>uimafit-core</artifactId>
-                        
+                              
+      
       
       <version>${uimafit-version}</version>
-                    
+                          
+    
     
     </dependency>
-            
+                
+  
   
   </dependencies>
-        
+          
+  
   
   <modules>
-        
+            
+    
     <module>jcore-annotation-adder-ae</module>
-        
+            
+    
     <module>jcore-ace-reader</module>
-                
+                    
+    
     
     <module>jcore-acronym-ae</module>
-        
+            
+    
     <module>jcore-acronym-writer</module>
-                
+                    
+    
     
     <module>jcore-banner-ae</module>
-        
+            
+    
     <module>jcore-bc2gm-reader</module>
-        
+            
+    
     <module>jcore-bc2gmformat-writer</module>
-        
+            
+    
     <module>jcore-biolemmatizer-ae</module>
-                
+                    
+    
     
     <module>jcore-bionlpformat-consumer</module>
-                
+                    
+    
     
     <module>jcore-bionlpformat-reader</module>
-                
+                    
+    
     
     <module>jcore-biosem-ae</module>
-                
+                    
+    
     
     <module>jcore-conll-consumer</module>
-                
+                    
+    
     
     <module>jcore-coordination-baseline-ae</module>
-        
+            
+    
     <module>jcore-cord19-reader</module>
-        
+            
+    
     <module>jcore-coreference-writer</module>
-        
+            
+    
     <module>jcore-ct-reader</module>
-        
+            
+    
     <module>jcore-db-checkpoint-ae</module>
-        
+            
+    
     <module>jcore-descriptor-creator</module>
-        
+            
+    
     <module>jcore-dta-reader</module>
-                
+                    
+    
     
     <module>jcore-ec-code-ae</module>
-                
+                    
+    
     
     <module>jcore-elasticsearch-consumer</module>
-                
+                    
+    
     
     <module>jcore-embedding-writer</module>
-                
+                    
+    
     
     <module>jcore-event-flattener-ae</module>
-                
+                    
+    
     
     <module>jcore-feature-value-replacement-ae</module>
-                
+                    
+    
     
     <module>jcore-file-reader</module>
-                
+                    
+    
     
     <module>jcore-flair-ner-ae</module>
-        
+            
+    
     <module>jcore-flair-token-embedding-ae</module>
-        
+            
+    
     <module>jcore-flow-controllers</module>
-                
+                    
+    <module>jcore-gnp-bioc-writer</module>
     
     <module>jcore-iexml-consumer</module>
-                
+                    
+    
     
     <module>jcore-iexml-reader</module>
-                
+                    
+    
     
     <module>jcore-ign-reader</module>
-                
+                    
+    
     
     <module>jcore-iob-consumer</module>
-                
+                    
+    
     
     <module>jcore-jnet-ae</module>
-                
+                    
+    
     
     <module>jcore-jpos-ae</module>
-                
+                    
+    
     
     <module>jcore-jsbd-ae</module>
-                
+                    
+    
     
     <module>jcore-jtbd-ae</module>
-                
+                    
+    
     
     <module>jcore-julielab-entity-evaluator-consumer</module>
-                
+                    
+    
     
     <module>jcore-likelihood-assignment-ae</module>
-                
+                    
+    
     
     <module>jcore-likelihood-detection-ae</module>
-        
+            
+    
     <module>jcore-line-multiplier</module>
-        
+            
+    
     <module>jcore-lingpipegazetteer-ae</module>
-                
+                    
+    
     
     <module>jcore-lingpipe-porterstemmer-ae</module>
-                
+                    
+    
     
     <module>jcore-lingscope-ae</module>
-                
+                    
+    
     
     <module>jcore-linnaeus-species-ae</module>
-                
+                    
+    
     
     <module>jcore-mantra-xml-types</module>
-                
+                    
+    
     
     <module>jcore-medxn-ae</module>
-                
+                    
+    
     
     <module>jcore-msdoc-reader</module>
-                
+                    
+    
     
     <module>jcore-mstparser-ae</module>
-                
+                    
+    
     
     <module>jcore-muc7-reader</module>
-                
+                    
+    
     
     <module>jcore-mutationfinder-ae</module>
-        
+            
+    
     <module>jcore-neo4j-relations-consumer</module>
-                
+                    
+    
     
     <module>jcore-opennlp-chunk-ae</module>
-                
+                    
+    
     
     <module>jcore-opennlp-parser-ae</module>
-                
+                    
+    
     
     <module>jcore-opennlp-postag-ae</module>
-                
+                    
+    
     
     <module>jcore-opennlp-sentence-ae</module>
-                
+                    
+    
     
     <module>jcore-opennlp-token-ae</module>
-        
+            
+    
     <module>jcore-ppd-writer</module>
-        
+            
+    
     <module>jcore-pmc-reader</module>
-                
+                    
+    
     
     <module>jcore-pubtator-reader</module>
-                
+                    
+    
     
     <module>jcore-stanford-lemmatizer-ae</module>
-                
+                    
+    
     
     <module>jcore-topic-indexing-ae</module>
-                
+                    
+    
     
     <module>jcore-topics-writer</module>
-                
+                    
+    
     
     <module>jcore-txt-consumer</module>
-                
+                    
+    
     
     <module>jcore-types</module>
-                
+                    
+    
     
     <module>jcore-utilities</module>
-                
+                    
+    
     
     <module>jcore-xml-mapper</module>
-                
+                    
+    
     
     <module>jcore-xml-reader</module>
-                
+                    
+    
     
     <module>jcore-xmi-reader</module>
-                
+                    
+    
     
     <module>jcore-xmi-writer</module>
-                
+                    
+    
     
     <module>jedis-parent</module>
-          
+              
+    
     <module>jcore-jedis-integration-tests</module>
-        
+            
+    
+    <module>jcore-gnp-bioc-writer</module>
+      
   </modules>
-        
+          
+  
   
   <scm>
-                
+                    
+    
     
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-                
+                    
+    
     
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-                
+                    
+    
     
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-            
+                
+  
   
   </scm>
-    
+      
+
 
 </project>

From 1af3f7223c2454aeb398701082fe0e5d3c1fdeef Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Feb 2022 10:41:40 +0100
Subject: [PATCH 139/269] Implement the BioCDocumentPopulator.

---
 .../consumer/gnp/BioCDocumentPopulator.java   | 67 ++++++++++++++++---
 .../consumer/gnp/GNormPlusFormatWriter.java   | 15 ++++-
 2 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index e161ee4ad..e1a096c1c 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -1,29 +1,76 @@
 package de.julielab.jcore.consumer.gnp;
 
 import com.pengyifan.bioc.BioCDocument;
-import de.julielab.jcore.types.AbstractText;
-import de.julielab.jcore.types.Section;
-import de.julielab.jcore.types.Title;
-import de.julielab.jcore.types.Zone;
+import com.pengyifan.bioc.BioCPassage;
+import de.julielab.jcore.types.*;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.jcas.JCas;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Extracts text passages from the CAS and adds them to a new BioCDocument.
  */
 public class BioCDocumentPopulator {
+    private final static Logger log = LoggerFactory.getLogger(BioCDocumentPopulator.class);
+
     public BioCDocument populate(JCas jCas) {
+        BioCDocument doc = new BioCDocument(JCoReTools.getDocId(jCas));
         AnnotationIndex<Zone> zoneIndex = jCas.getAnnotationIndex(Zone.type);
         for (Zone z : zoneIndex) {
             if (z instanceof Title) {
-                // only document title; other titles should be accessed via features of the zone body
-            }
-            else if (z instanceof AbstractText) {
+                Title t = (Title) z;
+                String titleType;
+                switch (t.getTitleType()) {
+                    case "document":
+                        titleType = "title";
+                        break;
+                    case "section":
+                        titleType = "section_title";
+                        break;
+                    case "figure":
+                        titleType = "figure_title";
+                        break;
+                    case "table":
+                        titleType = "table_title";
+                        break;
+                    default:
+                        log.debug("Unhandled title type {}", t.getTitleType());
+                        titleType = "other_title";
+                        break;
+                }
+                BioCPassage p = getPassageForAnnotation(t);
+                p.putInfon("type", titleType);
+                doc.addPassage(p);
+            } else if (z instanceof AbstractText) {
                 // don't check for structured parts; for GNormPlus the only important thing is title, abstract, body
-            } else if (z instanceof Section) {
-                // handle headings
+                AbstractText at = (AbstractText) z;
+                BioCPassage p = getPassageForAnnotation(at);
+                p.putInfon("type", "abstract");
+                doc.addPassage(p);
+            } else if (z instanceof Paragraph) {
+                Paragraph pa = (Paragraph) z;
+                BioCPassage p = getPassageForAnnotation(pa);
+                p.putInfon("type", "paragraph");
+                doc.addPassage(p);
+            } else {
+                log.debug("Unhandled Zone: {}", z);
             }
         }
-        return null;
+        return doc;
+    }
+
+    /**
+     * Creates a BioCPassage with offset and text corresponding to the passed annotation <tt>a</tt>.
+     *
+     * @param a The annotation to create a BioCPassage for.
+     * @return A BioCPassage corresponding to <tt>a</tt> in offset and text.
+     */
+    private BioCPassage getPassageForAnnotation(Annotation a) {
+        BioCPassage p = new BioCPassage();
+        p.setOffset(a.getBegin());
+        p.setText(a.getCoveredText());
+        return p;
     }
 }
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
index cde7c209b..0aa125cc0 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -10,7 +10,6 @@
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -41,7 +40,7 @@ public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
      * creation. Here, descriptor parameters are read and initial setup is done.
      */
     @Override
-    public void initialize(final UimaContext aContext) throws ResourceInitializationException {
+    public void initialize(final UimaContext aContext) {
         numDocsPerFile = (int) aContext.getConfigParameterValue(PARAM_NUM_DOCS_PER_FILE);
         numDocsPerDir = (int) aContext.getConfigParameterValue(PARAM_NUM_FILES_PER_DIR);
         baseDirectory = (String) aContext.getConfigParameterValue(PARAM_BASE_DIR);
@@ -71,5 +70,17 @@ public void process(final JCas jCas) throws AnalysisEngineProcessException {
             throw new AnalysisEngineProcessException(e);
         }
     }
+
+    @Override
+    public void collectionProcessComplete() throws AnalysisEngineProcessException {
+        super.collectionProcessComplete();
+        try {
+            if (currentCollection.getDocmentCount() != 0)
+                bioCCollectionWriter.writeBioCCollection(currentCollection);
+        } catch (Exception e) {
+            log.error("Could not write final batch of BioCDocuments.", e);
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
 }
 

From bbc96482a72e0990238476f9d41017a7cc2f8065 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Feb 2022 12:48:43 +0100
Subject: [PATCH 140/269] Add the title type `abstract` to abstract titles.

---
 .../julielab/jcore/reader/pmc/resources/elementproperties.yml  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
index 230bbf929..321ddf287 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
@@ -7,6 +7,9 @@ title:
     default-feature-values:
         titleType: other
     paths:
+        - path: abstract/title
+          default-feature-values:
+            titleType: abstract
         - path: sec/title
           type: de.julielab.jcore.types.SectionTitle
           default-feature-values:

From 6be8f9c6257db550788281562b09769e211f8782 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Feb 2022 16:08:22 +0100
Subject: [PATCH 141/269] Implement tests for the GNormPlus BioC writer.

Writing into BioC documents does not. They are also distributed into files and directories as intended.
---
 jcore-gnp-bioc-writer/pom.xml                 |  4 +
 .../consumer/gnp/BioCCollectionWriter.java    | 35 ++++----
 .../consumer/gnp/BioCDocumentPopulator.java   | 11 ++-
 .../consumer/gnp/GNormPlusFormatWriter.java   |  4 +-
 .../gnp/BioCDocumentPopulatorTest.java        | 41 +++++++++
 .../gnp/GNormPlusFormatWriterTest.java        | 77 ++++++++++++++++-
 .../consumer/gnp/TestDocumentGenerator.java   | 85 +++++++++++++++++++
 7 files changed, 233 insertions(+), 24 deletions(-)
 create mode 100644 jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
 create mode 100644 jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java

diff --git a/jcore-gnp-bioc-writer/pom.xml b/jcore-gnp-bioc-writer/pom.xml
index 465d66c2e..4381dfd93 100644
--- a/jcore-gnp-bioc-writer/pom.xml
+++ b/jcore-gnp-bioc-writer/pom.xml
@@ -42,6 +42,10 @@
             <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
     </dependencies>
     <name>JCoRe GNormPlus BioC Writer</name>
     <organization>
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
index a0e03880a..785976d1a 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
@@ -5,8 +5,9 @@
 import org.slf4j.LoggerFactory;
 
 import javax.xml.stream.XMLStreamException;
-import java.io.File;
 import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
 
 /**
  * Writes a collection of BioC documents into a single file. That file is created within a subdirectory of
@@ -15,34 +16,36 @@
 public class BioCCollectionWriter {
     private final static Logger log = LoggerFactory.getLogger(BioCCollectionWriter.class);
     private int numFilesPerDir;
-    private File baseDir;
-    private File currentDir;
+    private Path baseDir;
+    private Path currentDir;
     private int numWrittenIntoCurrentDir;
 
-    public BioCCollectionWriter(int numFilesPerDir, File baseDir) {
+    public BioCCollectionWriter(int numFilesPerDir, Path baseDir) {
         this.numFilesPerDir = numFilesPerDir;
         this.baseDir = baseDir;
     }
 
     public void writeBioCCollection(BioCCollection collection) throws XMLStreamException, IOException {
-        File collectionFile = null;
+        Path collectionFile = null;
         synchronized (BioCCollectionWriter.class) {
-            if (!baseDir.exists()) {
-                log.debug("Creating base BioC collection directory {}", baseDir);
-                baseDir.mkdirs();
-            }
+            // currentDir is either null at the very beginning or after a batch of documents have been written
             if (currentDir == null) {
                 int i = 0;
                 do {
-                    currentDir = new File(baseDir, "bioc_collections_" + i++);
-                } while (currentDir.exists());
-                i = 0;
-                do {
-                    collectionFile = new File(currentDir, "bioc_collection_" + i++ + ".xml");
-                } while (collectionFile.exists());
+                    currentDir = Path.of(baseDir.toString(), "bioc_collections_" + i++);
+                } while (Files.exists(currentDir));
+            }
+            int i = 0;
+            do {
+                collectionFile = Path.of(currentDir.toString(), "bioc_collection_" + i++ + ".xml");
+            } while (Files.exists(collectionFile));
+            if (!Files.exists(collectionFile.getParent())) {
+                log.debug("Creating base BioC collection directory {}", baseDir);
+                Files.createDirectories(collectionFile.getParent());
             }
         }
-
+        if (collectionFile == null)
+            throw new IllegalStateException("No file for the next collection was constructed. This is a programming error.");
         com.pengyifan.bioc.io.BioCCollectionWriter writer = new com.pengyifan.bioc.io.BioCCollectionWriter(collectionFile);
         writer.writeCollection(collection);
         ++numWrittenIntoCurrentDir;
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index e1a096c1c..7dd246876 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -22,6 +22,8 @@ public BioCDocument populate(JCas jCas) {
             if (z instanceof Title) {
                 Title t = (Title) z;
                 String titleType;
+                if (t.getTitleType() == null)
+                    throw new IllegalArgumentException("The titleType feature was not set for " + t);
                 switch (t.getTitleType()) {
                     case "document":
                         titleType = "title";
@@ -54,8 +56,13 @@ public BioCDocument populate(JCas jCas) {
                 BioCPassage p = getPassageForAnnotation(pa);
                 p.putInfon("type", "paragraph");
                 doc.addPassage(p);
-            } else {
-                log.debug("Unhandled Zone: {}", z);
+            } else if (z instanceof Caption) {
+                Caption c = (Caption) z;
+                BioCPassage p = getPassageForAnnotation(c);
+                if (c.getCaptionType() == null)
+                    throw new IllegalArgumentException("The captionType feature is null for " + c);
+                p.putInfon("type", c.getCaptionType());
+                doc.addPassage(p);
             }
         }
         return doc;
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
index 0aa125cc0..24f016a69 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -13,7 +13,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.File;
+import java.nio.file.Path;
 import java.util.Date;
 
 @ResourceMetaData(name = "JCoRe GNormPlus BioC Writer", description = "Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.", vendor = "JULIE Lab Jena, Germany")
@@ -46,7 +46,7 @@ public void initialize(final UimaContext aContext) {
         baseDirectory = (String) aContext.getConfigParameterValue(PARAM_BASE_DIR);
 
         bioCDocumentPopulator = new BioCDocumentPopulator();
-        bioCCollectionWriter = new BioCCollectionWriter(numDocsPerDir, new File(baseDirectory));
+        bioCCollectionWriter = new BioCCollectionWriter(numDocsPerDir, Path.of(baseDirectory));
 
         currentCollection = new BioCCollection("UTF-8", "1.0", new Date().toString(), true, "JCoRe GNormPlus BioC Writer", "PubTator.key");
     }
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
new file mode 100644
index 000000000..8f831bbf0
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
@@ -0,0 +1,41 @@
+package de.julielab.jcore.consumer.gnp;
+
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.BioCDocument;
+import com.pengyifan.bioc.io.BioCCollectionWriter;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Date;
+
+import static org.assertj.core.api.Assertions.assertThat;
+class BioCDocumentPopulatorTest {
+    @Test
+    public void populate() throws Exception {
+        BioCDocumentPopulator populator = new BioCDocumentPopulator();
+        JCas jCas = TestDocumentGenerator.prepareCas(1);
+        BioCDocument biocDoc = populator.populate(jCas);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        BioCCollection collection = new BioCCollection("UTF-8", "1.0", (new Date()).toString(), true, "jUnit Test", "PubTator.key");
+        collection.addDocument(biocDoc);
+        BioCCollectionWriter collectionWriter = new BioCCollectionWriter(baos);
+        collectionWriter.writeCollection(collection);
+        String resultXml = baos.toString(StandardCharsets.UTF_8);
+        // Just check that the test text contents are there that are used in TestDocumentGenerator and that
+        // there are no duplicates
+        assertThat(resultXml).containsOnlyOnce("<text>This is the title of document 1.</text>");
+        assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">title</infon>");
+        // The abstract should be one single string
+        assertThat(resultXml).containsOnlyOnce("<text>BACKGROUND This abstract section belongs to document 1.\nRESULTS There are certainly some results reported by document 1.</text>");
+        assertThat(resultXml).containsOnlyOnce("INTRODUCTION");
+        assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">section_title</infon>");
+        assertThat(resultXml).contains("<infon key=\"type\">paragraph</infon>");
+        assertThat(resultXml).containsOnlyOnce("This is section 1, paragraph 1 of document 1.");
+        assertThat(resultXml).containsOnlyOnce("This is a second paragraph in the first section.");
+        assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">table_title</infon>");
+        assertThat(resultXml).containsOnlyOnce("Tab1.");
+        assertThat(resultXml).containsOnlyOnce("This is the table1 caption.");
+    }
+}
\ No newline at end of file
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
index 1f6b31b3e..16a3ec233 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
@@ -1,13 +1,82 @@
-
 package de.julielab.jcore.consumer.gnp;
 
 
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.io.BioCCollectionReader;
+import org.apache.commons.io.FileUtils;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.junit.jupiter.api.AfterAll;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
 
+import static org.assertj.core.api.Assertions.assertThat;
 
 /**
  * Unit tests for jcore-gnp-bioc-writer.
- *
  */
-public class GNormPlusFormatWriterTest{
-// TODO
+public class GNormPlusFormatWriterTest {
+
+    private static final Path BASEDIR = Path.of("src", "test", "resources", "testoutput");
+
+    @AfterAll
+    public static void cleanFinally() {
+        FileUtils.deleteQuietly(BASEDIR.toFile());
+    }
+
+    @BeforeEach
+    public void cleanOutput() {
+        FileUtils.deleteQuietly(BASEDIR.toFile());
+    }
+
+    private AnalysisEngine getWriterInstance(int docsPerFile, int filesPerDir) throws ResourceInitializationException {
+        return AnalysisEngineFactory.createEngine(GNormPlusFormatWriter.class, GNormPlusFormatWriter.PARAM_BASE_DIR, BASEDIR.toString(), GNormPlusFormatWriter.PARAM_NUM_DOCS_PER_FILE, docsPerFile, GNormPlusFormatWriter.PARAM_NUM_FILES_PER_DIR, filesPerDir);
+    }
+
+    @Test
+    public void process1() throws Exception {
+        // write a single document
+        JCas jCas = TestDocumentGenerator.prepareCas(1);
+        AnalysisEngine writer = getWriterInstance(1, 1);
+        writer.process(jCas);
+        writer.collectionProcessComplete();
+
+        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0.xml")).exists().isNotEmptyFile();
+    }
+
+    @Test
+    public void process2() throws Exception {
+        // write a single document
+        JCas jCas = TestDocumentGenerator.createTestJCas();
+        AnalysisEngine writer = getWriterInstance(2, 3);
+        for (int i = 0; i < 15; ++i) {
+            TestDocumentGenerator.prepareCas(jCas, i);
+            writer.process(jCas);
+            jCas.reset();
+        }
+        writer.collectionProcessComplete();
+
+        assertThat(Files.list(BASEDIR)).hasSize(3);
+        for (int i : List.of(0, 1, 2)) {
+            List<Integer> fileIndices = i < 2 ? List.of(0, 1, 2) : List.of(0,1);
+            for (int j : fileIndices) {
+                assertThat(Path.of(BASEDIR.toString(), "bioc_collections_"+i, "bioc_collection_"+j+".xml")).exists().isNotEmptyFile();
+            }
+        }
+        // there should only be two files in the last directory
+        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_2", "bioc_collection_2.xml")).doesNotExist();
+
+        // the last file should only contain a single document
+        BioCCollectionReader reader = new BioCCollectionReader(Path.of(BASEDIR.toString(), "bioc_collections_2", "bioc_collection_1.xml"));
+        BioCCollection lastCollection = reader.readCollection();
+        assertThat(lastCollection.getDocmentCount()).isEqualTo(1);
+
+    }
+
 }
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
new file mode 100644
index 000000000..da5e83a6f
--- /dev/null
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
@@ -0,0 +1,85 @@
+package de.julielab.jcore.consumer.gnp;
+
+import de.julielab.jcore.types.*;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.uima.UIMAException;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+
+public class TestDocumentGenerator {
+
+    public static JCas createTestJCas() throws UIMAException {
+        return JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+    }
+
+    public static JCas prepareCas(int docId) throws UIMAException {
+        JCas jCas = createTestJCas();
+        return prepareCas(jCas, docId);
+    }
+
+    public static JCas prepareCas(JCas jCas, int docId) {
+        Header h = new de.julielab.jcore.types.pubmed.Header(jCas);
+        h.setDocId(String.valueOf(docId));
+        h.addToIndexes();
+
+        StringBuilder sb = new StringBuilder();
+        String ls = System.getProperty("line.separator");
+        int currentBegin = sb.length();
+        sb.append("This is the title of document ").append(docId).append(".");
+        Title t = new Title(jCas, currentBegin, sb.length());
+        t.setTitleType("document");
+        t.addToIndexes();
+        currentBegin = sb.length();
+        sb.append("BACKGROUND This abstract section belongs to document ").append(docId).append(".");
+        AbstractSectionHeading ash1 = new AbstractSectionHeading(jCas, currentBegin, currentBegin + 10);
+        ash1.setTitleType("abstract");
+        AbstractSection as1 = new AbstractSection(jCas, currentBegin, sb.length());
+        as1.setAbstractSectionHeading(ash1);
+        currentBegin = sb.length();
+        sb.append(ls);
+        sb.append("RESULTS There are certainly some results reported by document ").append(docId).append(".");
+        AbstractSectionHeading ash2 = new AbstractSectionHeading(jCas, currentBegin, currentBegin + 7);
+        ash2.setTitleType("abstract");
+        AbstractSection as2 = new AbstractSection(jCas, currentBegin, sb.length());
+        as2.setAbstractSectionHeading(ash2);
+        AbstractText at = new AbstractText(jCas, as1.getBegin(), as2.getEnd());
+        at.setStructuredAbstractParts(JCoReTools.addToFSArray(JCoReTools.addToFSArray(null, as1), as2));
+        at.addToIndexes();
+        sb.append(ls);
+        currentBegin = sb.length();
+        sb.append("INTRODUCTION This is section 1, paragraph 1 of document ").append(docId).append(".");
+        SectionTitle st1 = new SectionTitle(jCas, currentBegin, currentBegin + 12);
+        st1.setTitleType("section");
+        Section s1 = new Section(jCas, currentBegin, sb.length());
+        st1.addToIndexes();
+        s1.setSectionHeading(st1);
+        s1.addToIndexes();
+        // paragraphs do not include the heading
+        Paragraph p11 = new Paragraph(jCas, s1.getBegin() + 13, s1.getEnd());
+        p11.addToIndexes();
+        currentBegin = sb.length();
+        sb.append("This is a second paragraph in the first section.");
+        Paragraph p12 = new Paragraph(jCas, currentBegin, sb.length());
+        p12.addToIndexes();
+        currentBegin = sb.length();
+        int objectBegin = sb.length();
+        sb.append("Let this be table content.");
+        currentBegin = sb.length();
+        sb.append("Tab1.");
+        Title tabTitle = new Title(jCas, currentBegin, sb.length());
+        tabTitle.setTitleType("table");
+        tabTitle.addToIndexes();
+        currentBegin = sb.length();
+        sb.append("This is the table1 caption.");
+        Caption tCap = new Caption(jCas, currentBegin, sb.length());
+        tCap.setCaptionType("table");
+        tCap.addToIndexes();
+        Table tab = new Table(jCas, objectBegin, sb.length());
+        tab.setObjectTitle(tabTitle);
+        tab.setObjectCaption(tCap);
+        tab.addToIndexes();
+        tab.addToIndexes();
+        jCas.setDocumentText(sb.toString());
+        return jCas;
+    }
+}

From 2b6a9c2c015b9f9d145371a8865e45b46cd2d352 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Feb 2022 16:17:33 +0100
Subject: [PATCH 142/269] Add descriptor and component.meta to the GNP BioC
 writer. Resolves #129.

---
 jcore-gnp-bioc-writer/component.meta          | 20 ++++++
 jcore-gnp-bioc-writer/pom.xml                 |  4 ++
 .../jcore/consumer/gnp/desc/PLACEHOLDER       |  1 -
 .../gnp/desc/jcore-gnp-bioc-writer.xml        | 71 ++++++++++++++-----
 4 files changed, 76 insertions(+), 20 deletions(-)
 create mode 100644 jcore-gnp-bioc-writer/component.meta
 delete mode 100644 jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER

diff --git a/jcore-gnp-bioc-writer/component.meta b/jcore-gnp-bioc-writer/component.meta
new file mode 100644
index 000000000..78c499835
--- /dev/null
+++ b/jcore-gnp-bioc-writer/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "consumer"
+    ],
+    "description": "Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.",
+    "descriptors": [
+        {
+            "category": "consumer",
+            "location": "de.julielab.jcore.consumer.gnp.desc.jcore-gnp-bioc-writer"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-gnp-bioc-writer",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe GNormPlus BioC Writer"
+}
diff --git a/jcore-gnp-bioc-writer/pom.xml b/jcore-gnp-bioc-writer/pom.xml
index 4381dfd93..93aa158ea 100644
--- a/jcore-gnp-bioc-writer/pom.xml
+++ b/jcore-gnp-bioc-writer/pom.xml
@@ -46,6 +46,10 @@
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
     </dependencies>
     <name>JCoRe GNormPlus BioC Writer</name>
     <organization>
diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER
deleted file mode 100644
index 9f6c6ddb5..000000000
--- a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/PLACEHOLDER
+++ /dev/null
@@ -1 +0,0 @@
-The actual descriptor must be created by UIMA fit.
diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
index 47d89e355..524f590ea 100644
--- a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
+++ b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
@@ -1,21 +1,54 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <primitive>true</primitive>
-  <annotatorImplementationName>GNormPlusFormatWriter</annotatorImplementationName>
-  <analysisEngineMetaData>
-    <name>JCoRe GNormPlus BioC Writer</name>
-    <description/>
-    <version>2.3.0-SNAPSHOT</version>
-    <vendor>JULIE Lab Jena, Germany</vendor>
-    <configurationParameters/>
-    <configurationParameterSettings/>
-    <typeSystemDescription/>
-    <capabilities/>
-    <operationalProperties>
-      <modifiesCas>true</modifiesCas>
-      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-      <outputsNewCASes>false</outputsNewCASes>
-    </operationalProperties>
-  </analysisEngineMetaData>
-</analysisEngineDescription>
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.consumer.gnp.GNormPlusFormatWriter</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe GNormPlus BioC Writer</name>
+        <description>Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.</description>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>NumDocsPerFile</name>
+                <description>The number of documents (i.e. CASes) that should be written into a single BioC XML file.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>NumFilesPerDir</name>
+                <description>The number of files that should be put in a directory before a new one is created.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>BaseDirectory</name>
+                <description>The base directory into which to create new directories that contain the actual BioC collection files.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings/>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs/>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file

From c535e78199407a35d084989bfa8d1a63a2c88831 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Feb 2022 16:21:27 +0100
Subject: [PATCH 143/269] Remove duplicate module entry for GNP BioC Writer.

---
 pom.xml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pom.xml b/pom.xml
index 995fec529..662cf49ef 100644
--- a/pom.xml
+++ b/pom.xml
@@ -406,8 +406,7 @@
     <module>jcore-jedis-integration-tests</module>
             
     
-    <module>jcore-gnp-bioc-writer</module>
-      
+
   </modules>
           
   
From b0e1be9d3d7a4e15fb78b61c84ae8f6df06d6f84 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Feb 2022 10:06:52 +0100
Subject: [PATCH 144/269] Adapt output more to the GNormPlus corpus format.

Which currently means to put the structures abstract section labels into the text (BACKGROUND, RESULTS etc).
Also, sort out empty titles (which are the structured abstract headings in our current handling of PubMed).
---
 .../consumer/gnp/BioCCollectionWriter.java    |  2 +-
 .../consumer/gnp/BioCDocumentPopulator.java   | 26 ++++++++++++++++---
 .../gnp/BioCDocumentPopulatorTest.java        |  2 +-
 .../consumer/gnp/TestDocumentGenerator.java   | 12 +++++----
 .../xmi/desc/jcore-xmi-db-multiplier.xml      |  1 +
 5 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
index 785976d1a..df5b12587 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
@@ -26,7 +26,7 @@ public BioCCollectionWriter(int numFilesPerDir, Path baseDir) {
     }
 
     public void writeBioCCollection(BioCCollection collection) throws XMLStreamException, IOException {
-        Path collectionFile = null;
+        Path collectionFile;
         synchronized (BioCCollectionWriter.class) {
             // currentDir is either null at the very beginning or after a batch of documents have been written
             if (currentDir == null) {
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 7dd246876..96120276c 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -19,6 +19,8 @@ public BioCDocument populate(JCas jCas) {
         BioCDocument doc = new BioCDocument(JCoReTools.getDocId(jCas));
         AnnotationIndex<Zone> zoneIndex = jCas.getAnnotationIndex(Zone.type);
         for (Zone z : zoneIndex) {
+            if (z.getEnd() - z.getBegin() <= 0)
+                continue;
             if (z instanceof Title) {
                 Title t = (Title) z;
                 String titleType;
@@ -46,11 +48,27 @@ public BioCDocument populate(JCas jCas) {
                 p.putInfon("type", titleType);
                 doc.addPassage(p);
             } else if (z instanceof AbstractText) {
-                // don't check for structured parts; for GNormPlus the only important thing is title, abstract, body
                 AbstractText at = (AbstractText) z;
-                BioCPassage p = getPassageForAnnotation(at);
-                p.putInfon("type", "abstract");
-                doc.addPassage(p);
+                if (at.getStructuredAbstractParts() != null && at.getStructuredAbstractParts().size() > 0) {
+                    StringBuilder sb = new StringBuilder();
+                    for (int i = 0; i < at.getStructuredAbstractParts().size() && at.getStructuredAbstractParts(i) != null; ++i) {
+                        AbstractSection abstractPart = at.getStructuredAbstractParts(i);
+                        String sectionLabel = ((AbstractSectionHeading) abstractPart.getAbstractSectionHeading()).getLabel();
+                        sb.append(sectionLabel).append(": ");
+                        sb.append(abstractPart.getCoveredText());
+                        if (i < at.getStructuredAbstractParts().size() - 1 && at.getStructuredAbstractParts(i+1) != null)
+                            sb.append(" ");
+                    }
+                    BioCPassage p = new BioCPassage();
+                    p.setOffset(at.getBegin());
+                    p.setText(sb.toString());
+                    p.putInfon("type", "abstract");
+                    doc.addPassage(p);
+                } else {
+                    BioCPassage p = getPassageForAnnotation(at);
+                    p.putInfon("type", "abstract");
+                    doc.addPassage(p);
+                }
             } else if (z instanceof Paragraph) {
                 Paragraph pa = (Paragraph) z;
                 BioCPassage p = getPassageForAnnotation(pa);
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
index 8f831bbf0..9f085bc0b 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
@@ -28,7 +28,7 @@ public void populate() throws Exception {
         assertThat(resultXml).containsOnlyOnce("<text>This is the title of document 1.</text>");
         assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">title</infon>");
         // The abstract should be one single string
-        assertThat(resultXml).containsOnlyOnce("<text>BACKGROUND This abstract section belongs to document 1.\nRESULTS There are certainly some results reported by document 1.</text>");
+        assertThat(resultXml).containsOnlyOnce("<text>BACKGROUND: This abstract section belongs to document 1. RESULTS: There are certainly some results reported by document 1.</text>");
         assertThat(resultXml).containsOnlyOnce("INTRODUCTION");
         assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">section_title</infon>");
         assertThat(resultXml).contains("<infon key=\"type\">paragraph</infon>");
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
index da5e83a6f..55ca81a02 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
@@ -30,15 +30,17 @@ public static JCas prepareCas(JCas jCas, int docId) {
         t.setTitleType("document");
         t.addToIndexes();
         currentBegin = sb.length();
-        sb.append("BACKGROUND This abstract section belongs to document ").append(docId).append(".");
-        AbstractSectionHeading ash1 = new AbstractSectionHeading(jCas, currentBegin, currentBegin + 10);
+        sb.append("This abstract section belongs to document ").append(docId).append(".");
+        AbstractSectionHeading ash1 = new AbstractSectionHeading(jCas);
+        ash1.setLabel("BACKGROUND");
         ash1.setTitleType("abstract");
         AbstractSection as1 = new AbstractSection(jCas, currentBegin, sb.length());
         as1.setAbstractSectionHeading(ash1);
-        currentBegin = sb.length();
         sb.append(ls);
-        sb.append("RESULTS There are certainly some results reported by document ").append(docId).append(".");
-        AbstractSectionHeading ash2 = new AbstractSectionHeading(jCas, currentBegin, currentBegin + 7);
+        currentBegin = sb.length();
+        sb.append("There are certainly some results reported by document ").append(docId).append(".");
+        AbstractSectionHeading ash2 = new AbstractSectionHeading(jCas);
+        ash2.setLabel("RESULTS");
         ash2.setTitleType("abstract");
         AbstractSection as2 = new AbstractSection(jCas, currentBegin, sb.length());
         as2.setAbstractSectionHeading(ash2);
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index c124b4804..bd4929ad1 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -29,6 +29,7 @@
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
             </imports>
         </typeSystemDescription>
         <fsIndexCollection />

From 3a6b74ed4552c346e454e3f7f47573f0a1eefc2c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Feb 2022 11:46:00 +0100
Subject: [PATCH 145/269] Revoke the last change regarding structured
 abstracts.

The issue was actually the newlines in the abstract text, not the omitted headings.
---
 .../consumer/gnp/BioCDocumentPopulator.java   | 25 +++----------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 96120276c..488f42613 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -49,26 +49,9 @@ public BioCDocument populate(JCas jCas) {
                 doc.addPassage(p);
             } else if (z instanceof AbstractText) {
                 AbstractText at = (AbstractText) z;
-                if (at.getStructuredAbstractParts() != null && at.getStructuredAbstractParts().size() > 0) {
-                    StringBuilder sb = new StringBuilder();
-                    for (int i = 0; i < at.getStructuredAbstractParts().size() && at.getStructuredAbstractParts(i) != null; ++i) {
-                        AbstractSection abstractPart = at.getStructuredAbstractParts(i);
-                        String sectionLabel = ((AbstractSectionHeading) abstractPart.getAbstractSectionHeading()).getLabel();
-                        sb.append(sectionLabel).append(": ");
-                        sb.append(abstractPart.getCoveredText());
-                        if (i < at.getStructuredAbstractParts().size() - 1 && at.getStructuredAbstractParts(i+1) != null)
-                            sb.append(" ");
-                    }
-                    BioCPassage p = new BioCPassage();
-                    p.setOffset(at.getBegin());
-                    p.setText(sb.toString());
-                    p.putInfon("type", "abstract");
-                    doc.addPassage(p);
-                } else {
-                    BioCPassage p = getPassageForAnnotation(at);
-                    p.putInfon("type", "abstract");
-                    doc.addPassage(p);
-                }
+                BioCPassage p = getPassageForAnnotation(at);
+                p.putInfon("type", "abstract");
+                doc.addPassage(p);
             } else if (z instanceof Paragraph) {
                 Paragraph pa = (Paragraph) z;
                 BioCPassage p = getPassageForAnnotation(pa);
@@ -95,7 +78,7 @@ public BioCDocument populate(JCas jCas) {
     private BioCPassage getPassageForAnnotation(Annotation a) {
         BioCPassage p = new BioCPassage();
         p.setOffset(a.getBegin());
-        p.setText(a.getCoveredText());
+        p.setText(a.getCoveredText().replaceAll("\n", " "));
         return p;
     }
 }

From ef27b1d5b174e091c2ff8a4c75317a296f812eac Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Feb 2022 18:19:37 +0100
Subject: [PATCH 146/269] Add a mechanism to avoid mirror subset reset for
 updated JeDIS document whose text hasn't changed; resolves #130.

The XMLDBMultiplier already had the "ToVisit" mechanism for the AnnotationDefinedFlowController that could skip the majority of a pipeline when the document hash in the database was the same as the hash for the updated document text. However, we needed to update the base documents and, thus, needed to not skip the XMI writer. But by default, that would cause the mirror subsets to reset.
Now, the XMLDBMultiplier fills the new feature of the DBProcessingMetaData annotation named "isDocumentHashUnchanged". If it is set to be unchanged, the XMIDBWriter will not reset the mirror subsets for that document.
---
 .../jcore-document-meta-extension-types.xml   |   5 +
 .../jcore/consumer/xmi/XMIDBWriter.java       |  53 ++--
 .../jcore/consumer/xmi/XmiDataInserter.java   |  28 ++-
 .../jcore/consumer/xmi/XmiDBWriterTest.java   | 229 ++++++++++++++----
 .../jcore/reader/xml/XMLDBMultiplier.java     |   3 +-
 5 files changed, 245 insertions(+), 73 deletions(-)

diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
index 115927024..200ff0383 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
@@ -25,6 +25,11 @@
           <description>This feature is used by the DBCheckpointAE. It allows components in the pipeline to prevent a document to be marked as being finished with processing. This can be used to indicate issues with specific documents which will require reprocessing.</description>
           <rangeTypeName>uima.cas.Boolean</rangeTypeName>
         </featureDescription>
+        <featureDescription>
+          <name>isDocumentHashUnchanged</name>
+          <description>For use by the XMIDBWriter. Used to prohibit that mirror subsets reset to 'not processed' for this document when there was no change in the document text. That allows to update the base document without indicating that a reprocessing is required. This is useful when the document is updated by the distributor (e.g. PubMed) but the text contents have not changed.</description>
+          <rangeTypeName>uima.cas.Boolean</rangeTypeName>
+        </featureDescription>
       </features>
     </typeDescription>
   </types>
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index b9594dda3..8a085cf8b 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -29,6 +29,7 @@
 import de.julielab.jcore.types.Header;
 import de.julielab.jcore.types.XmiMetaData;
 import de.julielab.jcore.types.ext.DBProcessingMetaData;
+import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.xml.*;
 import de.julielab.xml.binary.BinaryJeDISNodeEncoder;
 import de.julielab.xml.binary.BinaryStorageAnalysisResult;
@@ -250,6 +251,7 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "Possible values: document_text. If this parameter is set to a valid value, the SHA256 hash for the given value will be calculated, base64 encoded and added to each document as a new column in the document table. The column will be named after the parameter value, suffixed by '_sha256'.")
     private String documentItemToHash;
     private Map<DocumentId, String> shaMap;
+    private Set<DocumentId> mirrorResetIds;
     private String mappingCacheKey;
     private DocumentReleaseCheckpoint docReleaseCheckpoint;
     private List<DocumentId> currentDocumentIdBatch;
@@ -288,8 +290,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         // The deletion of obsolete annotations should only be active when the base document is stored because then, old annotations won't be valid any more.
         deleteObsolete &= storeBaseDocument;
         baseDocumentAnnotationTypes = Arrays.stream(
-                Optional.ofNullable((String[]) aContext.getConfigParameterValue(PARAM_BASE_DOCUMENT_ANNOTATION_TYPES))
-                        .orElse(new String[0]))
+                        Optional.ofNullable((String[]) aContext.getConfigParameterValue(PARAM_BASE_DOCUMENT_ANNOTATION_TYPES))
+                                .orElse(new String[0]))
                 .collect(Collectors.toSet());
         attributeSize = (Integer) aContext.getConfigParameterValue(PARAM_ATTRIBUTE_SIZE);
         writeBatchSize = Optional.ofNullable((Integer) aContext.getConfigParameterValue(PARAM_WRITE_BATCH_SIZE)).orElse(50);
@@ -423,6 +425,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         if (useBinaryFormat) {
             this.binaryEncoder = new BinaryJeDISNodeEncoder();
         }
+        mirrorResetIds = new HashSet<>();
 
         log.info(XMIDBWriter.class.getName() + " initialized.");
         log.info("Effective document table name: {}", effectiveDocTableName);
@@ -509,7 +512,13 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             } catch (IllegalArgumentException e) {
                 // Do nothing; this is not the work item CAS
             }
-            DocumentId docId = getDocumentId(aJCas);
+            Collection<DBProcessingMetaData> metaDatas = JCasUtil.select(aJCas, DBProcessingMetaData.class);
+            if (metaDatas.size() > 1)
+                throw new AnalysisEngineProcessException(new IllegalArgumentException(
+                        "There is more than one type of DBProcessingMetaData in document " + JCoReTools.getDocId(aJCas)));
+            Optional<DBProcessingMetaData> metaData = metaDatas.stream().findAny();
+            DocumentId docId = getDocumentId(aJCas, metaData);
+            setMirrorResetStateForDocId(docId, metaData);
             if (docId == null) {
                 log.warn("The current document does not have a document ID. It is omitted from database import.");
                 return;
@@ -518,12 +527,8 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             currentDocumentIdBatch.add(docId);
 
             if (subsetTable == null) {
-                Collection<DBProcessingMetaData> metaData = JCasUtil.select(aJCas, DBProcessingMetaData.class);
                 if (!metaData.isEmpty()) {
-                    if (metaData.size() > 1)
-                        throw new AnalysisEngineProcessException(new IllegalArgumentException(
-                                "There is more than one type of DBProcessingMetaData in document " + docId));
-                    subsetTable = metaData.stream().findAny().get().getSubsetTable();
+                    subsetTable = metaData.get().getSubsetTable();
 
                     if (subsetTable != null && storeBaseDocument) {
                         // Check if we are about to read from a mirror subset and to update the base document. This is not allowed
@@ -563,6 +568,19 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
         }
     }
 
+    private void setMirrorResetStateForDocId(DocumentId docId, Optional<DBProcessingMetaData> metaData) {
+        if (metaData.isPresent()) {
+            // mirror subset reset is only necessary if we store the base document in any way;
+            // additionally, we check if the document text hash key is reported to by different to its already
+            // existing database entry. Only then the mirror subsets should be reset for this document.
+            if (storeBaseDocument && !metaData.get().getIsDocumentHashUnchanged())
+                mirrorResetIds.add(docId);
+        } else {
+            // default: reset the mirror tables
+            mirrorResetIds.add(docId);
+        }
+    }
+
     private void handleAddhash(JCas aJCas, DocumentId docId) {
         if (documentItemToHash != null) {
             final String documentText = aJCas.getDocumentText();
@@ -836,15 +854,14 @@ private Map<String, ByteArrayOutputStream> convertModuleLabelsToColumnNames(Map<
         return convertedMap;
     }
 
-    private DocumentId getDocumentId(JCas aJCas) {
+    private DocumentId getDocumentId(JCas aJCas, Optional<DBProcessingMetaData> metaData) {
         DocumentId docId = null;
-        try {
-            DBProcessingMetaData dbProcessingMetaData = JCasUtil.selectSingle(aJCas, DBProcessingMetaData.class);
-            docId = new DocumentId(dbProcessingMetaData);
-        } catch (IllegalArgumentException e) {
-            // it seems there is not DBProcessingMetaData we could get a complex primary key from. The document ID
+        if (metaData.isPresent()) {
+            docId = new DocumentId(metaData.get());
+        } else {
+            // it seems there is no DBProcessingMetaData we could get a complex primary key from. The document ID
             // will have to do.
-            log.trace("Could not find the primary key in the DBProcessingMetaData due to exception: {}. Using the document ID as primary key.", e.getMessage());
+            log.trace("Could not find the primary key in the DBProcessingMetaData because no meta data annotation is set. Using the document ID as primary key.");
         }
         if (docId == null) {
             AnnotationIndex<Annotation> headerIndex = aJCas.getAnnotationIndex(Header.type);
@@ -1005,7 +1022,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
             final boolean readyToSendData = processXmiBuffer();
             if (readyToSendData) {
                 if (!(featuresToMapDryRun && useBinaryFormat))
-                    annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, storeBaseDocument, deleteObsolete, shaMap);
+                    annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, mirrorResetIds, deleteObsolete, shaMap);
                 else
                     log.info("The dry run to see details about features to be mapped in the binary format is activated. No contents are written into the database.");
                 log.trace("Clearing {} annotation modules", annotationModules.size());
@@ -1015,6 +1032,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
                 if (docReleaseCheckpoint != null)
                     docReleaseCheckpoint.release(jedisSyncKey, currentDocumentIdBatch.stream());
                 currentDocumentIdBatch.clear();
+                mirrorResetIds.clear();
             }
         } catch (XmiDataInsertionException e) {
             throw new AnalysisEngineProcessException(e);
@@ -1034,7 +1052,7 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
         try {
             processXmiBuffer();
             if (!(featuresToMapDryRun && useBinaryFormat))
-                annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, storeBaseDocument, deleteObsolete, shaMap);
+                annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, mirrorResetIds, deleteObsolete, shaMap);
             else
                 log.info("The dry run to see details about features to be mapped in the binary format is activated. No contents are written into the database.");
             annotationModules.clear();
@@ -1043,6 +1061,7 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
             if (docReleaseCheckpoint != null)
                 docReleaseCheckpoint.release(jedisSyncKey, currentDocumentIdBatch.stream());
             currentDocumentIdBatch.clear();
+            mirrorResetIds.clear();
         } catch (XmiDataInsertionException e) {
             throw new AnalysisEngineProcessException(e);
         }
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
index 1a75f474e..390e27e67 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
@@ -18,6 +18,7 @@
 import java.sql.SQLException;
 import java.util.*;
 import java.util.function.Function;
+import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
@@ -59,23 +60,33 @@ public XmiDataInserter(Set<String> annotationModuleColumnNames,
      * will be a primary key constraint violation, i.e. duplicates).
      *
      * @param annotationModules
-     * @param storeBaseDocument
+     * @param mirrorResetIds
      * @param deleteObsolete
      * @param shaMap
      * @throws XmiDataInsertionException
      * @throws AnalysisEngineProcessException
      */
-    public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> annotationModules, String subsetTableName, Boolean storeBaseDocument, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
+    public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> annotationModules, String subsetTableName, Set<DocumentId> mirrorResetIds, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
         log.trace("Sending {} XMI data items", annotationModules.size());
         final Map<DocumentId, List<XmiData>> dataByDoc = annotationModules.stream().collect(Collectors.groupingBy(XmiData::getDocId));
         // Collect all document IDs we want to add something for into the database. This can be annotations or the hash.
          final Set<DocumentId> documentIdsWithData = shaMap != null ? Sets.union(dataByDoc.keySet(), shaMap.keySet()) : dataByDoc.keySet();
         log.trace("There are {} documents with values to be updated in the database.", documentIdsWithData.size());
         class RowIterator implements Iterator<Map<String, Object>> {
+            /**
+             * An iterator that always returns only rows for a subset of document IDs. Either the ones that need mirror subsets to be reset or those for which mirror subsets should not be reset.
+             * @param returnDocumentsWithMirrorReset
+             */
+            public RowIterator(boolean returnDocumentsWithMirrorReset) {
+                Predicate<DocumentId> mirrorResetFilterPredicate = docId -> mirrorResetIds.contains(docId);
+                if (!returnDocumentsWithMirrorReset)
+                    mirrorResetFilterPredicate = Predicate.not(mirrorResetFilterPredicate);
+                docIdIterator = Stream.concat(documentIdsWithData.stream(), processedDocumentIds.stream()).filter(mirrorResetFilterPredicate).distinct().iterator();
+            }
 
             // Add documents that have been processed but no data. We need to do this to override potentially existing
             // annotation values with null to remove them.
-            private Iterator<DocumentId> docIdIterator = Stream.concat(documentIdsWithData.stream(), processedDocumentIds.stream()).distinct().iterator();
+            private Iterator<DocumentId> docIdIterator;
             private FieldConfig fieldConfig = dbc.getFieldConfiguration(schemaDocument);
             private List<Map<String, String>> fields = fieldConfig.getFields();
 
@@ -169,12 +180,15 @@ public void remove() {
 
             // This is the private in-line defined class from above. All values are already contained in the class
             // definition.
-            RowIterator iterator = new RowIterator();
+            RowIterator iterator = new RowIterator(true);
             try {
                 if (updateMode) {
-                    log.debug("Updating {} XMI CAS data in database table '{}'.",
-                            annotationModules.size(), xmiTableName);
-                    dbc.updateFromRowIterator(iterator, xmiTableName, false, storeBaseDocument, schemaDocument);
+                    log.debug("Updating {} XMI CAS data in database table '{}' for documents with mirror subset resets.",
+                            mirrorResetIds.size(), xmiTableName);
+                    dbc.updateFromRowIterator(iterator, xmiTableName, false, true, schemaDocument);
+                    log.debug("Updating {} XMI CAS data in database table '{}' for documents without mirror subset resets.",
+                            annotationModules.size()-mirrorResetIds.size(), xmiTableName);
+                    dbc.updateFromRowIterator(new RowIterator(false), xmiTableName, false, false, schemaDocument);
                 } else {
                     log.debug("Inserting {} XMI CAS data into database table '{}'.",
                             annotationModules.size(), xmiTableName);
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
index 68150ad75..fc93a2138 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
@@ -4,6 +4,7 @@
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.db.test.DBTestUtils;
 import de.julielab.jcore.types.*;
+import de.julielab.jcore.types.ext.DBProcessingMetaData;
 import de.julielab.xml.XmiSplitConstants;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.uima.UIMAException;
@@ -11,19 +12,23 @@
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.StringArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.InvalidXMLException;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Nested;
 import org.junit.jupiter.api.Test;
 import org.testcontainers.containers.PostgreSQLContainer;
 import org.testcontainers.junit.jupiter.Container;
 import org.testcontainers.junit.jupiter.Testcontainers;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
 import java.util.stream.Collectors;
 
 import static org.assertj.core.api.Assertions.assertThat;
@@ -32,7 +37,7 @@
 @Testcontainers
 public class XmiDBWriterTest {
     @Container
-    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:" + DataBaseConnector.POSTGRES_VERSION);
     private static String costosysConfig;
     private static DataBaseConnector dbc;
 
@@ -58,48 +63,6 @@ public static JCas getJCasWithRequiredTypes() throws UIMAException {
                 "de.julielab.jcore.types.jcore-xmi-splitter-types");
     }
 
-    @Test
-    public void testXmiDBWriterSplitAnnotations() throws Exception {
-
-        AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.xmi.desc.jcore-xmi-db-writer",
-                XMIDBWriter.PARAM_ANNOS_TO_STORE, new String[]{Token.class.getCanonicalName(), Sentence.class.getCanonicalName()},
-                XMIDBWriter.PARAM_COSTOSYS_CONFIG, costosysConfig,
-                XMIDBWriter.PARAM_STORE_ALL, false,
-                XMIDBWriter.PARAM_STORE_BASE_DOCUMENT, true,
-                XMIDBWriter.PARAM_TABLE_DOCUMENT, "_data.documents2",
-                XMIDBWriter.PARAM_DO_GZIP, false,
-                XMIDBWriter.PARAM_STORE_RECURSIVELY, true,
-                XMIDBWriter.PARAM_UPDATE_MODE, true,
-                XMIDBWriter.PARAM_BASE_DOCUMENT_ANNOTATION_TYPES, new String[]{MeshHeading.class.getCanonicalName(), AbstractText.class.getCanonicalName(), Title.class.getCanonicalName(), de.julielab.jcore.types.pubmed.Header.class.getCanonicalName()}
-        );
-        JCas jCas = getJCasWithRequiredTypes();
-        final Header header = new Header(jCas);
-        header.setDocId("789");
-        header.addToIndexes();
-        jCas.setDocumentText("This is a sentence. This is another one.");
-        new Sentence(jCas, 0, 19).addToIndexes();
-        new Sentence(jCas, 20, 40).addToIndexes();
-        // Of course, these token offsets are wrong, but it doesn't matter to the test
-        new Token(jCas, 0, 19).addToIndexes();
-        new Token(jCas, 20, 40).addToIndexes();
-        assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
-        jCas.reset();
-        xmiWriter.collectionProcessComplete();
-
-        dbc = DBTestUtils.getDataBaseConnector(postgres);
-        try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
-            assertThat(dbc.tableExists("_data.documents2")).isTrue();
-
-            assertThat(dbc.getTableColumnNames("_data.documents2")).contains("de_julielab_jcore_types_token", "de_julielab_jcore_types_sentence");
-            assertThat(dbc.isEmpty("_data.documents2", XmiSplitConstants.BASE_DOC_COLUMN)).isFalse();
-            assertThat(dbc.isEmpty("_data.documents2", XmiDataInserter.FIELD_MAX_XMI_ID)).isFalse();
-            assertThat(dbc.isEmpty("_data.documents2", "sofa_mapping")).isFalse();
-            assertThat(dbc.isEmpty("_data.documents2", "de_julielab_jcore_types_token")).isFalse();
-            assertThat(dbc.isEmpty("_data.documents2", "de_julielab_jcore_types_sentence")).isFalse();
-
-        }
-    }
-
     @Test
     public void testXmiDBWriterSplitAnnotationsSpecifyAnnotationSchemas() throws Exception {
 
@@ -148,7 +111,7 @@ public void testXmiDBWriterSplitAnnotationsSpecifyAnnotationSchemas() throws Exc
     public void testXmiDBWriterSplitAnnotationsDefaultAnnotationSchemas() throws Exception {
 
         AnalysisEngine xmiWriter = AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.xmi.desc.jcore-xmi-db-writer",
-                XMIDBWriter.PARAM_ANNOS_TO_STORE, new String[]{ Token.class.getCanonicalName(), Sentence.class.getCanonicalName()},
+                XMIDBWriter.PARAM_ANNOS_TO_STORE, new String[]{Token.class.getCanonicalName(), Sentence.class.getCanonicalName()},
                 XMIDBWriter.PARAM_ANNO_DEFAULT_QUALIFIER, "testschema",
                 XMIDBWriter.PARAM_COSTOSYS_CONFIG, costosysConfig,
                 XMIDBWriter.PARAM_STORE_ALL, false,
@@ -216,9 +179,179 @@ public void testXmiSubtypeStorage() throws Exception {
             assertThat(dbc.tableExists("_data.documents3")).isTrue();
             ResultSet rs = ignored.createStatement().executeQuery("SELECT " + XmiSplitConstants.BASE_DOC_COLUMN + " FROM " + "_data.documents3");
             assertThat(rs.next()).isTrue();
-            String documentString = rs.getString(1);
-            System.out.println(documentString);
+//            String documentString = rs.getString(1);
+//            System.out.println(documentString);
+
+        }
+    }
+
+    @Nested
+    class WriteWithMirrorSubsets {
+        /**
+         * This test checks that the XMI is split as intended and distributed into database table columns as annotation modules.
+         * @throws Exception
+         */
+        @Test
+        public void testXmiDBWriterSplitAnnotations() throws Exception {
+
+            AnalysisEngine xmiWriter = getXmiWriterForDocuments2();
+            JCas jCas = getJCasWithRequiredTypes();
+            prepareDocument1(jCas);
+            assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+            jCas.reset();
+            prepareDocument2(jCas);
+            assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+            xmiWriter.collectionProcessComplete();
+
+            dbc = DBTestUtils.getDataBaseConnector(postgres);
+            dbc.setActiveTableSchema("medline_2017");
+            try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+                assertThat(dbc.tableExists("_data.documents2")).isTrue();
+
+                assertThat(dbc.getTableColumnNames("_data.documents2")).contains("de_julielab_jcore_types_token", "de_julielab_jcore_types_sentence");
+                assertThat(dbc.isEmpty("_data.documents2", XmiSplitConstants.BASE_DOC_COLUMN)).isFalse();
+                assertThat(dbc.isEmpty("_data.documents2", XmiDataInserter.FIELD_MAX_XMI_ID)).isFalse();
+                assertThat(dbc.isEmpty("_data.documents2", "sofa_mapping")).isFalse();
+                assertThat(dbc.isEmpty("_data.documents2", "de_julielab_jcore_types_token")).isFalse();
+                assertThat(dbc.isEmpty("_data.documents2", "de_julielab_jcore_types_sentence")).isFalse();
+
+            }
+
+            // create a subset for nested tests and set its only entry to "processed"
+            try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+                dbc.createSubsetTable("_data._data_mirror", "_data.documents2", 1, "Test subset", "medline_2017");
+                dbc.initMirrorSubset("_data._data_mirror", "_data.documents2", true, "medline_2017");
+                List<byte[][]> idsList = new ArrayList<>();
+                idsList.add(new byte[][]{"789".getBytes(StandardCharsets.UTF_8)});
+                idsList.add(new byte[][]{"890".getBytes(StandardCharsets.UTF_8)});
+                dbc.setProcessed("_data._data_mirror", idsList);
+                assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED)).isProcessed).isEqualTo(2);
+            }
+        }
+
+        /**
+         * Produces the test XMI writer for this nested test group. It stores the base document which should cause
+         * mirror subsets to reset the "is processed" status to false for the written documents.
+         * @return The XMI writer for testing.
+         * @throws InvalidXMLException
+         * @throws IOException
+         * @throws ResourceInitializationException
+         */
+        private AnalysisEngine getXmiWriterForDocuments2() throws InvalidXMLException, IOException, ResourceInitializationException {
+            return AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.xmi.desc.jcore-xmi-db-writer",
+                    XMIDBWriter.PARAM_ANNOS_TO_STORE, new String[]{Token.class.getCanonicalName(), Sentence.class.getCanonicalName()},
+                    XMIDBWriter.PARAM_COSTOSYS_CONFIG, costosysConfig,
+                    XMIDBWriter.PARAM_STORE_ALL, false,
+                    XMIDBWriter.PARAM_STORE_BASE_DOCUMENT, true,
+                    XMIDBWriter.PARAM_TABLE_DOCUMENT, "_data.documents2",
+                    XMIDBWriter.PARAM_DO_GZIP, false,
+                    XMIDBWriter.PARAM_STORE_RECURSIVELY, true,
+                    XMIDBWriter.PARAM_UPDATE_MODE, true,
+                    XMIDBWriter.PARAM_BASE_DOCUMENT_ANNOTATION_TYPES, new String[]{MeshHeading.class.getCanonicalName(), AbstractText.class.getCanonicalName(), Title.class.getCanonicalName(), de.julielab.jcore.types.pubmed.Header.class.getCanonicalName()}
+            );
+        }
+
+        /**
+         * Prepares the first of two documents used in these nested tests.
+         * @param jCas The CAS to populate with the test data.
+         */
+        private void prepareDocument1(JCas jCas) {
+            final Header header = new Header(jCas);
+            header.setDocId("789");
+            header.addToIndexes();
+            jCas.setDocumentText("This is a sentence. This is another one.");
+            new Sentence(jCas, 0, 19).addToIndexes();
+            new Sentence(jCas, 20, 40).addToIndexes();
+            // Of course, these token offsets are wrong, but it doesn't matter to the test
+            new Token(jCas, 0, 19).addToIndexes();
+            new Token(jCas, 20, 40).addToIndexes();
+        }
+
+        /**
+         * Prepares the second of two documents used in these nested tests.
+         * @param jCas The CAS to populate with the test data.
+         */
+        private void prepareDocument2(JCas jCas) {
+            final Header header2 = new Header(jCas);
+            header2.setDocId("890");
+            header2.addToIndexes();
+            jCas.setDocumentText("Sentence of document 2.");
+            new Sentence(jCas, 0, jCas.getDocumentText().length()).addToIndexes();
+        }
+
+        /**
+         * Default case: mirror subsets should be reset after writing the base document
+         */
+        @Nested
+        class CheckMirrorSubsetIsReset {
+            @Test
+            public void testMirrorSubsetReset() throws Exception {
+                AnalysisEngine xmiWriter = getXmiWriterForDocuments2();
+                JCas jCas = getJCasWithRequiredTypes();
+                prepareDocument1(jCas);
+                assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+                jCas.reset();
+                prepareDocument2(jCas);
+                assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+                xmiWriter.collectionProcessComplete();
+
+                dbc = DBTestUtils.getDataBaseConnector(postgres);
+                dbc.setActiveTableSchema("medline_2017");
+
+                // check that the subset table has been reset
+                try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED)).isProcessed).isEqualTo(0);
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.TOTAL)).total).isEqualTo(2);
+                    // set it again to processed for the next test
+                    List<byte[][]> idsList = new ArrayList<>();
+                    idsList.add(new byte[][]{"789".getBytes(StandardCharsets.UTF_8)});
+                    idsList.add(new byte[][]{"890".getBytes(StandardCharsets.UTF_8)});
+                    dbc.setProcessed("_data._data_mirror", idsList);
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED)).isProcessed).isEqualTo(2);
+                }
+            }
+        }
+
+        /**
+         * The interesting test case: Given a DBProcessingMetaData annotation that specifies that the document
+         * text hasn't changed between a former document version in the database and the newly written version,
+         * the mirror subsets should not be reset to "is not processed" for the given document.
+         */
+        @Nested
+        class CheckMirrorSubsetIsNotReset {
+            @Test
+            public void testMirrorSubsetNotReset() throws Exception {
+                // precondition check: the mirror subset is currently processed
+                // this main test will be to ensure that the mirror subset stays this way
+                try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED)).isProcessed).isEqualTo(2);
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.TOTAL)).total).isEqualTo(2);
+                }
+                AnalysisEngine xmiWriter = getXmiWriterForDocuments2();
+                JCas jCas = getJCasWithRequiredTypes();
+                prepareDocument1(jCas);
+                // This is the important part: tell the writer not to reset mirror subsets for this document
+                DBProcessingMetaData processingMetaData = new DBProcessingMetaData(jCas);
+                processingMetaData.setIsDocumentHashUnchanged(true);
+                StringArray pk = new StringArray(jCas, 1);
+                pk.set(0, "789");
+                processingMetaData.setPrimaryKey(pk);
+                processingMetaData.addToIndexes();
+                assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+                jCas.reset();
+                prepareDocument2(jCas);
+                assertThatCode(() -> xmiWriter.process(jCas)).doesNotThrowAnyException();
+                xmiWriter.collectionProcessComplete();
+
+                dbc = DBTestUtils.getDataBaseConnector(postgres);
+                dbc.setActiveTableSchema("medline_2017");
 
+                // check that the subset table has NOT been reset for document 789 but for the other
+                try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.IS_PROCESSED)).isProcessed).isEqualTo(1);
+                    assertThat(dbc.status("_data._data_mirror", EnumSet.of(DataBaseConnector.StatusElement.TOTAL)).total).isEqualTo(2);
+                }
+            }
         }
     }
 }
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index b429470c2..f3c3d7790 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -57,7 +57,7 @@ public class XMLDBMultiplier extends DBMultiplier {
     protected String[] rowMappingArray;
     @ConfigurationParameter(name = PARAM_MAPPING_FILE, description = XMLDBReader.DESC_MAPPING_FILE)
     protected String mappingFileStr;
-    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the " + PARAM_TO_VISIT_KEYS + " parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'.")
+    @ConfigurationParameter(name = PARAM_ADD_SHA_HASH, mandatory = false, description = "For use with AnnotationDefinedFlowController and XMIDBWriter. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the " + PARAM_TO_VISIT_KEYS + " parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'. Additionally, the DBProcessingMetaData#hasDocumentHashChanged is set. This can be used by the XMIDBWriter to omit the reset of mirror subsets when updating the base document when the actual CAS text stayed the same.")
     private String documentItemToHash;
     @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT, mandatory = false, description = "For use with AnnotationDefinedFlowController. String parameter indicating the name of the " +
             "table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.")
@@ -148,6 +148,7 @@ private void setToVisitAnnotation(JCas jCas) {
                 if (existingHash.equals(newHash)) {
                     if (log.isTraceEnabled())
                         log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", pkString, toVisitKeys);
+                    dbProcessingMetaData.setIsDocumentHashUnchanged(true);
                     ToVisit toVisit = new ToVisit(jCas);
                     if (toVisitKeys != null && toVisitKeys.length != 0) {
                         StringArray keysArray = new StringArray(jCas, toVisitKeys.length);

From cf3f2e01b22e22a7c7eaa9e1405dc36e171b2a47 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Feb 2022 16:19:36 +0100
Subject: [PATCH 147/269] Write GNormPlus BioC XML reader (#131).

In progress; already written base classes and tests.
---
 jcore-gnp-bioc-reader/BioC.dtd                | 158 ++++++++
 jcore-gnp-bioc-reader/LICENSE                 |  26 ++
 jcore-gnp-bioc-reader/README.md               |  34 ++
 jcore-gnp-bioc-reader/pom.xml                 |  58 +++
 .../jcore/reader/BioCCasPopulator.java        | 153 ++++++++
 .../GNormPlusFormatMultiplierReader.java      |  94 +++++
 .../jcore/reader/MissingInfonException.java   |  22 ++
 .../de/julielab/jcore/reader/desc/PLACEHOLDER |   4 +
 .../reader/desc/jcore-bnp-bioc-reader.xml     |  20 +
 .../jcore/reader/BioCCasPopulatorTest.java    |  62 ++++
 .../GNormPlusFormatMultiplierReaderTest.java  |  69 ++++
 .../test-input-path/bioc_collection_3.xml     |   1 +
 .../subdir1/bioc_collection_0.xml             |   2 +
 .../subdir1/bioc_collection_1.xml             |   2 +
 .../subdir2/bioc_collection_2.xml             |   2 +
 pom.xml                                       | 342 ++++++++++++------
 16 files changed, 936 insertions(+), 113 deletions(-)
 create mode 100644 jcore-gnp-bioc-reader/BioC.dtd
 create mode 100644 jcore-gnp-bioc-reader/LICENSE
 create mode 100644 jcore-gnp-bioc-reader/README.md
 create mode 100644 jcore-gnp-bioc-reader/pom.xml
 create mode 100644 jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
 create mode 100644 jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
 create mode 100644 jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/MissingInfonException.java
 create mode 100644 jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER
 create mode 100644 jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml
 create mode 100644 jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
 create mode 100644 jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReaderTest.java
 create mode 100644 jcore-gnp-bioc-reader/src/test/resources/test-input-path/bioc_collection_3.xml
 create mode 100644 jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml
 create mode 100644 jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml
 create mode 100644 jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml

diff --git a/jcore-gnp-bioc-reader/BioC.dtd b/jcore-gnp-bioc-reader/BioC.dtd
new file mode 100644
index 000000000..8bd0d55ca
--- /dev/null
+++ b/jcore-gnp-bioc-reader/BioC.dtd
@@ -0,0 +1,158 @@
+<!-- BioC.dtd -->
+
+        <!--
+
+            BioC is designed to allow programs that process text and
+            annotations on that text to easily share data and work
+            together. This DTD describes how that data is represented in XML
+            files.
+
+            Some believe XML is easily read by humans and that should be
+            supported by clearly formatting the elements. In the long run,
+            this is distracting. While the only meaningful spaces are in text
+            elements and the other spaces can be ignored, current tools add no
+            additional space.  Formatters and editors may be used to make the
+            XML file appear more readable.
+
+            The possible variety of annotations that one might want to produce
+            or use is nearly countless. There is no guarantee that these are
+            organized in the nice nested structure required for XML
+            elements. Even if they were, it would be nice to easily ignore
+            unwanted annotations.  So annotations are recorded in a stand off
+            manner, external to the annotated text. The exceptions are
+            passages and sentences because of their fundamental place in text.
+
+            The text is expected to be encoded in Unicode, specifically
+            UTF-8. This is one of the encodings required to be implemented by
+            XML tools, is portable between big-endian and little-endian
+            machines and is a superset of 7-bit ASCII. Code points beyond 127
+            may be expressed directly in UTF-8 or indirectly using numeric
+            entities.  Since many tools today still only directly process
+            ASCII characters, conversion should be available and
+            standardized.  Offsets should be in 8 bit code units (bytes) for
+            easier processing by naive programs.
+
+            collection:  Group of documents, usually from a larger corpus. If
+            a group of documents is from several corpora, use several
+            collections.
+
+            source:  Name of the source corpus from which the documents were selected
+
+            date:  Date documents extracted from original source. Can be as
+            simple as yyyymmdd or an ISO timestamp.
+
+            key: Separate file describing the infons used and any other useful
+            information about the data in the file. For example, if a file
+            includes part-of-speech tags, this file should describe the set of
+            part-of-speech tags used.
+
+            infon: key-value pairs. Can record essentially arbitrary
+            information. "type" will be a particular common key in the major
+            sub elements below. For PubMed references, passage "type" might
+            signal "title" or "abstract". For annotations, it might indicate
+            "noun phrase", "gene", or "disease". In the programming language
+            data structures, infons are typically represented as a map from a
+            string to a string.  This means keys should be unique within each
+            parent element.
+
+            document: A document in the collection. A single, complete
+            stand-alone document as described by its parent source.
+
+            id:  Typically, the id of the document in the parent
+            source. Should at least be unique in the collection.
+
+            passage: One portion of the document.  In the sample collection of
+            PubMed documents, each document has a title and frequently an
+            abstract. Structured abstracts could have additional passages. For
+            a full text document, passages could be sections such as
+            Introduction, Materials and Methods, or Conclusion. Another option
+            would be paragraphs. Passages impose a linear structure on the
+            document. Further structure in the document can be described by
+            infon values.
+
+            offset: Where the passage occurs in the parent document. Depending
+            on the source corpus, this might be a very relevant number.  They
+            should be sequential and identify a passage's position in the
+            document.  Since the sample PubMed collection is extracted from an
+            XML file, literal offsets have little value. The title is given an
+            offset of zero, while the abstract is assumed to begin after the
+            title and one space.
+
+            text: The original text of the passage.
+
+            sentence:  One sentence of the passage.
+
+            offset: A document offset to where the sentence begins in the
+            passage. This value is the sum of the passage offset and the local
+            offset within the passage.
+
+            text: The original text of the sentence.
+
+            annotation:  Stand-off annotation
+
+            id: Used to refer to this annotation in relations. Should be
+            unique at whatever level relations at appear. If relations appear
+            at the sentence level, annotation ids need to be unique within
+            each sentence. Similarly, if relations appear at the passage
+            level, annotation ids need to be unique within each passage.
+
+            location: Location of the annotated text. Multiple locations
+            indicate a multi-span annotation.
+
+            offset: Document offset to where the annotated text begins in
+            the passage or sentence. The value is the sum of the passage or
+            sentence offset and the local offset within the passage or
+            sentence.
+
+            length: Length of the annotated text. While unlikely, this could
+            be zero to describe an annotation that belongs between two
+            characters.
+
+            text:  Typically the annotated text.
+
+            relation: Relation between multiple annotations and / or other
+            relations. Relations are allowed to appear at several levels
+            (document, passage, and sentence). Typically they will all appear
+            at one level, the level at which they are determined.
+            Significantly different types of relations might appear at
+            different levels.
+
+            id: Used to refer to this relation in other relations. This id
+            needs to be unique at whatever level relations appear. (See
+            discussion of annotation ids.)
+
+            refid: Id of an annotation or an other relation.
+
+            role: Describes how the referenced annotattion or other relation
+            participates in the current relation. Has a default value so it
+            can be left out if there is no meaningful value.
+
+        -->
+
+        <!ELEMENT collection ( source, date, key, infon*, document+ ) >
+        <!ELEMENT source (#PCDATA)>
+        <!ELEMENT date (#PCDATA)>
+        <!ELEMENT key (#PCDATA)>
+        <!ELEMENT infon (#PCDATA)>
+        <!ATTLIST infon key CDATA #REQUIRED >
+
+        <!ELEMENT document ( id, infon*, passage+, relation* ) >
+        <!ELEMENT id (#PCDATA)>
+
+        <!ELEMENT passage ( infon*, offset, ( ( text?, annotation* ) | sentence* ), relation* ) >
+        <!ELEMENT offset (#PCDATA)>
+        <!ELEMENT text (#PCDATA)>
+
+        <!ELEMENT sentence ( infon*, offset, text?, annotation*, relation* ) >
+
+        <!ELEMENT annotation ( infon*, location*, text ) >
+        <!ATTLIST annotation id CDATA #IMPLIED >
+        <!ELEMENT location EMPTY>
+        <!ATTLIST location offset CDATA #REQUIRED >
+        <!ATTLIST location length CDATA #REQUIRED >
+
+        <!ELEMENT relation ( infon*, node* ) >
+        <!ATTLIST relation id CDATA #IMPLIED >
+        <!ELEMENT node EMPTY>
+        <!ATTLIST node refid CDATA #REQUIRED >
+        <!ATTLIST node role CDATA "" >
diff --git a/jcore-gnp-bioc-reader/LICENSE b/jcore-gnp-bioc-reader/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-gnp-bioc-reader/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-gnp-bioc-reader/README.md b/jcore-gnp-bioc-reader/README.md
new file mode 100644
index 000000000..7947f772a
--- /dev/null
+++ b/jcore-gnp-bioc-reader/README.md
@@ -0,0 +1,34 @@
+# JCoRe GNormPlus BioC Reader
+
+**Descriptor Path**:
+```
+de.julielab.jcore.reader.desc.jcore-bnp-bioc-reader
+```
+
+A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and genes.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-gnp-bioc-reader/pom.xml b/jcore-gnp-bioc-reader/pom.xml
new file mode 100644
index 000000000..86008eabd
--- /dev/null
+++ b/jcore-gnp-bioc-reader/pom.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-bnp-bioc-reader</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.pengyifan.bioc</groupId>
+            <artifactId>pengyifan-bioc</artifactId>
+            <version>1.0.3</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe GNormPlus BioC Reader</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-bnp-bioc-reader</url>
+    <description>A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and
+        genes.
+    </description>
+</project>
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
new file mode 100644
index 000000000..4af6d0342
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -0,0 +1,153 @@
+package de.julielab.jcore.reader;
+
+import com.pengyifan.bioc.*;
+import com.pengyifan.bioc.io.BioCCollectionReader;
+import de.julielab.jcore.types.*;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+/**
+ * Reads a BioC collection from file and adds the species and gene annotations from its documents to a JCases.
+ */
+public class BioCCasPopulator {
+
+    private final static Logger log = LoggerFactory.getLogger(BioCCasPopulator.class);
+    private final BioCCollection bioCCollection;
+    private int pos;
+
+    public BioCCasPopulator(Path biocCollectionPath) throws XMLStreamException, IOException {
+        try (BioCCollectionReader bioCCollectionReader = new BioCCollectionReader(biocCollectionPath)) {
+            bioCCollection = bioCCollectionReader.readCollection();
+        }
+        pos = 0;
+    }
+
+    public void populateWithNextDocument(JCas jCas) throws XMLStreamException, IOException {
+        BioCDocument document = bioCCollection.getDocument(pos++);
+        setDocumentText(jCas, document);
+        Iterator<BioCAnnotation> allAnnotations = Stream.concat(document.getAnnotations().stream(), document.getPassages().stream().map(BioCPassage::getAnnotations).flatMap(Collection::stream)).iterator();
+        for (BioCAnnotation annotation : (Iterable<BioCAnnotation>)() ->allAnnotations) {
+            Optional<String> type = annotation.getInfon("type");
+            if (!type.isPresent())
+                throw new IllegalArgumentException("BioCDocument " + document.getID() + " has an annotation that does not specify its type: " + annotation);
+            try {
+                switch (type.get()) {
+                    case "Gene":
+                        addGeneAnnotation(annotation, jCas);
+                        break;
+                    case "Species":
+                        addSpeciesAnnotation(annotation, jCas);
+                        break;
+                }
+            } catch (MissingInfonException e) {
+                throw new IllegalArgumentException("BioCDocument " + document.getID() + " has an annotation issue; see cause exception.", e);
+            }
+        }
+    }
+
+    private void setDocumentText(JCas jCas, BioCDocument document) {
+        StringBuilder sb = new StringBuilder();
+        // iterate over the passages and create the complete document text from their individual text elements
+        for (BioCPassage passage : document.getPassages()) {
+            int offset = passage.getOffset();
+            // The offset of the passage must match its starting position in the StringBuilder or the annotation
+            // offsets won't match. We might need to fill up the StringBuilder to reach the given offset.
+            while (sb.length() < offset)
+                sb.append(" ");
+            if (passage.getText().isPresent()) {
+                sb.append(passage.getText().get());
+                Optional<String> type = passage.getInfon("type");
+                if (type.isPresent()) {
+                    int passageEnd = offset + passage.getText().get().length();
+                    Zone passageAnnotation;
+                    // The values in this switch are basically determined by the values created in the BioCDocumentPopulator in the jcore-gnp-bioc-writer project.
+                    switch (type.get()) {
+                        case "title":
+                            passageAnnotation = new Title(jCas, offset, passageEnd);
+                            ((Title) passageAnnotation).setTitleType("document");
+                            break;
+                        case "section_title":
+                            passageAnnotation = new Title(jCas, offset, passageEnd);
+                            ((Title) passageAnnotation).setTitleType("section");
+                            break;
+                        case "figure_title":
+                            passageAnnotation = new Title(jCas, offset, passageEnd);
+                            ((Title) passageAnnotation).setTitleType("figure");
+                            break;
+                        case "table_title":
+                            passageAnnotation = new Title(jCas, offset, passageEnd);
+                            ((Title) passageAnnotation).setTitleType("table");
+                            break;
+                        case "abstract":
+                            passageAnnotation = new AbstractText(jCas, offset, passageEnd);
+                            break;
+                        case "paragraph":
+                            passageAnnotation = new Paragraph(jCas, offset, passageEnd);
+                            break;
+                        case "figure":
+                        case "table":
+                            // for figures and tables we have actually no means to distinguish between captions and the actual object; mainly because the actual objects have so far not been part of the CAS documents; thus, this can only be a caption until the objects themselves are added
+                            passageAnnotation = new Caption(jCas, offset, passageEnd);
+                            ((Caption) passageAnnotation).setCaptionType(type.get());
+                        default:
+                            log.debug("Unhandled passage type {}", type.get());
+                            passageAnnotation = new Zone(jCas, offset, passageEnd);
+                            break;
+                    }
+                    passageAnnotation.addToIndexes();
+                }
+            }
+        }
+        jCas.setDocumentText(sb.toString());
+    }
+
+    private void addSpeciesAnnotation(BioCAnnotation annotation, JCas jCas) throws MissingInfonException {
+        Optional<String> taxId = annotation.getInfon("NCBI Taxonomy");
+        if (!taxId.isPresent())
+            throw new MissingInfonException("Species annotation does not specify its taxonomy ID: " + annotation);
+        // the "total location" is the span from the minimum location value to the maximum location value;
+        // for GNormPlus, there are no discontinuing annotations anyway
+        BioCLocation location = annotation.getTotalLocation();
+        Organism organism = new Organism(jCas, location.getOffset(), location.getOffset() + location.getLength());
+        ResourceEntry resourceEntry = new ResourceEntry(jCas, organism.getBegin(), organism.getEnd());
+        resourceEntry.setSource("NCBI Taxonomy");
+        resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+        resourceEntry.setEntryId(taxId.get());
+        FSArray resourceEntryList = new FSArray(jCas, 1);
+        resourceEntryList.set(0, resourceEntry);
+        organism.setResourceEntryList(resourceEntryList);
+        organism.addToIndexes();
+    }
+
+    private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws MissingInfonException {
+        Optional<String> geneId = annotation.getInfon("NCBI Gene");
+        if (!geneId.isPresent())
+            throw new MissingInfonException("Gene annotation does not specify its gene ID: " + annotation);
+        // the "total location" is the span from the minimum location value to the maximum location value;
+        // for GNormPlus, there are no discontinuing annotations anyway
+        BioCLocation location = annotation.getTotalLocation();
+        Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
+        ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
+        resourceEntry.setSource("NCBI Gene");
+        resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+        resourceEntry.setEntryId(geneId.get());
+        FSArray resourceEntryList = new FSArray(jCas, 1);
+        resourceEntryList.set(0, resourceEntry);
+        gene.setResourceEntryList(resourceEntryList);
+        gene.addToIndexes();
+    }
+
+    public int documentsLeftInCollection() {
+        return bioCCollection.getDocmentCount() - pos;
+    }
+}
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
new file mode 100644
index 000000000..dc04596e4
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
@@ -0,0 +1,94 @@
+package de.julielab.jcore.reader;
+
+import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import org.apache.uima.UimaContext;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.FileVisitOption;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Iterator;
+import java.util.Optional;
+import java.util.stream.Stream;
+
+@ResourceMetaData(name = "JCoRe GNormPlus Format Multiplier Reader", description = "A reader for the BioC XML format used by GNormPlus. Requires the matching multiplier.")
+public class GNormPlusFormatMultiplierReader extends JCasCollectionReader_ImplBase {
+
+    public static final String PARAM_INPUT_PATH = "InputPath";
+    public static final String PARAM_RECURSIVE = "Recursive";
+    public static final String PARAM_BATCH_SIZE = "BatchSize";
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatMultiplierReader.class);
+    @ConfigurationParameter(name = PARAM_INPUT_PATH, description = "Path to a directory or file to be read. In case of a directory, all files ending in .xml will be read.")
+    private String inputPathString;
+    @ConfigurationParameter(name = PARAM_RECURSIVE, mandatory = false, defaultValue = "true", description = "Whether to read also the subdirectories of the input directory, if the input path points to a directory.")
+    private boolean recursive;
+    @ConfigurationParameter(name = PARAM_BATCH_SIZE, mandatory = false, defaultValue = "20", description = "The number of XML file URI references to send to the CAS multipliers in each work assignment. Defaults to 20.")
+    private int batchSize;
+    private Iterator<Path> fileIterator;
+    private int completed;
+
+
+    /**
+     * This method is called a single time by the framework at component
+     * creation. Here, descriptor parameters are read and initial setup is done.
+     */
+    @Override
+    public void initialize(UimaContext context) throws ResourceInitializationException {
+        super.initialize(context);
+        inputPathString = (String) context.getConfigParameterValue(PARAM_INPUT_PATH);
+        recursive = Optional.of((boolean) context.getConfigParameterValue(PARAM_RECURSIVE)).orElse(true);
+        try {
+            Path inputPath = Path.of(inputPathString);
+            Stream<Path> pathStream;
+            if (recursive)
+                pathStream = Files.walk(inputPath, FileVisitOption.FOLLOW_LINKS);
+            else
+                pathStream = Files.list(inputPath);
+            pathStream = pathStream.filter(p -> p.toString().endsWith(".xml"));
+            fileIterator = pathStream.iterator();
+        } catch (IOException e) {
+            log.error("Could not read the files of inputPath {}", inputPathString, e);
+            throw new ResourceInitializationException(e);
+        }
+        completed = 0;
+    }
+
+    @Override
+    public void getNext(JCas jCas) throws CollectionException {
+        for (int i = 0; i < batchSize && fileIterator.hasNext(); i++) {
+            URI uri = fileIterator.next().toUri();
+            try {
+                JCoReURI fileType = new JCoReURI(jCas);
+                fileType.setUri(uri.toString());
+                fileType.addToIndexes();
+            } catch (Exception e) {
+                log.error("Exception with URI: " + uri, e);
+                throw new CollectionException(e);
+            }
+            completed++;
+        }
+    }
+
+
+    @Override
+    public Progress[] getProgress() {
+            return new Progress[]{new ProgressImpl(completed, -1, "documents")};
+    }
+
+    @Override
+    public boolean hasNext() {
+        return fileIterator.hasNext();
+    }
+
+}
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/MissingInfonException.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/MissingInfonException.java
new file mode 100644
index 000000000..59277495c
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/MissingInfonException.java
@@ -0,0 +1,22 @@
+package de.julielab.jcore.reader;
+
+public class MissingInfonException extends Exception {
+    public MissingInfonException() {
+    }
+
+    public MissingInfonException(String message) {
+        super(message);
+    }
+
+    public MissingInfonException(String message, Throwable cause) {
+        super(message, cause);
+    }
+
+    public MissingInfonException(Throwable cause) {
+        super(cause);
+    }
+
+    public MissingInfonException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) {
+        super(message, cause, enableSuppression, writableStackTrace);
+    }
+}
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER
new file mode 100644
index 000000000..e4b0b196a
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER
@@ -0,0 +1,4 @@
+The actual descriptor must be created by UIMA fit.
+For this purpose, use UIMAfit annotations to annotate the reader component class.
+Then employ the jcore-descriptor-creator's main method to build the descriptor from the reader class.
+The jcore-descriptor-creator is already on the classpath as a Maven dependency.
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml
new file mode 100644
index 000000000..9ce0d444f
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>GNormPlusFormatMultiplierReader</implementationName>
+  <processingResourceMetaData>
+    <name>JCoRe GNormPlus BioC Reader</name>
+    <description>This is only a placeholder descriptor. Please use UIMAfit to annotate the component parameters. Then employ the jcore-descriptor-creator's main method to build the descriptor from the reader class GNormPlusFormatMultiplierReader. The jcore-descriptor-creator is already on the classpath as a Maven dependency.</description>
+    <version>2.3.0-SNAPSHOT</version>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters/>
+    <configurationParameterSettings/>
+    <typeSystemDescription/>
+    <capabilities/>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+</collectionReaderDescription>
diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
new file mode 100644
index 000000000..dddbb8704
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
@@ -0,0 +1,62 @@
+package de.julielab.jcore.reader;
+
+import de.julielab.jcore.types.*;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.assertj.core.api.Condition;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+import java.util.Collection;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+class BioCCasPopulatorTest {
+
+    private JCas getJCas() throws Exception {
+        return JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+    }
+
+    @Test
+    public void populateWithNextDocument() throws Exception {
+        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources", "test-input-path", "bioc_collection_3.xml"));
+        assertThat(bioCCasPopulator.documentsLeftInCollection()).isEqualTo(2);
+        JCas jCas = getJCas();
+        bioCCasPopulator.populateWithNextDocument(jCas);
+
+        assertThat(jCas.getDocumentText()).startsWith("Langerin").endsWith("antigen-processing pathway.");
+        Title title = JCasUtil.selectSingle(jCas, Title.class);
+        assertThat(title).extracting(Title::getTitleType).isEqualTo("document");
+        assertThat(title).extracting(Title::getCoveredText).isEqualTo("Langerin, a novel C-type lectin specific to Langerhans cells, is an endocytic receptor that induces the formation of Birbeck granules.");
+        AbstractText abstractText = JCasUtil.selectSingle(jCas, AbstractText.class);
+        assertThat(abstractText).extracting(AbstractText::getCoveredText).is(new Condition<>(s -> s.startsWith("We have identified"), "Abstract has an unexpected beginning"));
+        // this document does not have organisms, we check those for the second document in the collection below
+        Collection<Gene> genes = JCasUtil.select(jCas, Gene.class);
+        assertThat(genes).hasSize(7);
+        for (Gene o : genes) {
+            assertThat(o.getResourceEntryList()).isNotNull();
+            assertThat(o.getResourceEntryList()).hasSize(1);
+            assertThat(o.getResourceEntryList(0)).extracting(ResourceEntry::getComponentId).isEqualTo(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+            assertThat(o.getResourceEntryList(0)).extracting(ResourceEntry::getSource).isEqualTo("NCBI Gene");
+            assertThat(o.getResourceEntryList(0)).extracting(ResourceEntry::getEntryId).isNotNull();
+        }
+        assertThat(genes).extracting(Gene::getCoveredText).contains("Langerin");
+
+        assertThat(bioCCasPopulator.documentsLeftInCollection()).isEqualTo(1);
+        jCas.reset();
+        bioCCasPopulator.populateWithNextDocument(jCas);
+        assertThat(jCas.getDocumentText()).startsWith("BCAR1, a human homologue");
+
+        Collection<Organism> organisms = JCasUtil.select(jCas, Organism.class);
+        assertThat(organisms).isNotEmpty();
+        for (Organism o : organisms) {
+            assertThat(o.getResourceEntryList()).isNotNull();
+            assertThat(o.getResourceEntryList()).hasSize(1);
+            assertThat(o.getResourceEntryList(0)).extracting(ResourceEntry::getComponentId).isEqualTo(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+            assertThat(o.getResourceEntryList(0)).extracting(ResourceEntry::getSource).isEqualTo("NCBI Taxonomy");
+            assertThat(o.getResourceEntryList(0)).extracting(ResourceEntry::getEntryId).isNotNull();
+        }
+        assertThat(organisms).extracting(Organism::getCoveredText).contains("human", "patients", "rat", "retrovirus", "ZR-75-1");
+    }
+}
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReaderTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReaderTest.java
new file mode 100644
index 000000000..b2ad2190e
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReaderTest.java
@@ -0,0 +1,69 @@
+
+package de.julielab.jcore.reader;
+
+
+import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+import java.util.Collection;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Unit tests for jcore-bnp-bioc-reader.
+ * @author 
+ *
+ */
+public class GNormPlusFormatMultiplierReaderTest{
+
+    private JCas getCas() throws Exception {
+        return JCasFactory.createJCas("de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types");
+    }
+    @Test
+    public void testReader() throws Exception {
+        CollectionReader reader = CollectionReaderFactory.createReader(GNormPlusFormatMultiplierReader.class, GNormPlusFormatMultiplierReader.PARAM_INPUT_PATH, Path.of("src", "test", "resources", "test-input-path").toString());
+        assertThat(reader.hasNext()).isTrue();
+        JCas jCas = getCas();
+        reader.getNext(jCas.getCas());
+        Collection<JCoReURI> uris = JCasUtil.select(jCas, JCoReURI.class);
+        assertThat(uris).extracting(JCoReURI::getUri).map(Path::of).map(Path::getFileName).map(Path::toString).containsExactlyInAnyOrder("bioc_collection_2.xml", "bioc_collection_3.xml", "bioc_collection_0.xml", "bioc_collection_1.xml");
+        assertThat(reader.hasNext()).isFalse();
+    }
+
+    @Test
+    public void testReader2() throws Exception {
+        // check that the non-recursive mode also works
+        CollectionReader reader = CollectionReaderFactory.createReader(GNormPlusFormatMultiplierReader.class, GNormPlusFormatMultiplierReader.PARAM_INPUT_PATH, Path.of("src", "test", "resources", "test-input-path").toString(), GNormPlusFormatMultiplierReader.PARAM_RECURSIVE, false);
+        assertThat(reader.hasNext());
+        JCas jCas = getCas();
+        reader.getNext(jCas.getCas());
+        Collection<JCoReURI> uris = JCasUtil.select(jCas, JCoReURI.class);
+        assertThat(uris).extracting(JCoReURI::getUri).map(Path::of).map(Path::getFileName).map(Path::toString).containsExactlyInAnyOrder("bioc_collection_3.xml");
+        assertThat(reader.hasNext()).isFalse();
+    }
+
+    @Test
+    public void testReader3() throws Exception {
+        // check that the batch size parameter works as intended
+        CollectionReader reader = CollectionReaderFactory.createReader(GNormPlusFormatMultiplierReader.class, GNormPlusFormatMultiplierReader.PARAM_INPUT_PATH, Path.of("src", "test", "resources", "test-input-path").toString(), GNormPlusFormatMultiplierReader.PARAM_BATCH_SIZE, 2);
+        assertThat(reader.hasNext()).isTrue();
+        JCas jCas = getCas();
+        reader.getNext(jCas.getCas());
+        Collection<JCoReURI> uris = JCasUtil.select(jCas, JCoReURI.class);
+        assertThat(uris).hasSize(2);
+        assertThat(reader.hasNext()).isTrue();
+        jCas.reset();
+        // there should another batch available
+        reader.getNext(jCas.getCas());
+        Collection<JCoReURI> uris2 = JCasUtil.select(jCas, JCoReURI.class);
+        assertThat(uris2).hasSize(2);
+        // now the reader should be exhausted
+        assertThat(reader.hasNext()).isFalse();
+    }
+}
diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/bioc_collection_3.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/bioc_collection_3.xml
new file mode 100644
index 000000000..a874a1823
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/bioc_collection_3.xml
@@ -0,0 +1 @@
+<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM "BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10661407</id><passage><infon key="type">title</infon><offset>0</offset><text>Langerin, a novel C-type lectin specific to Langerhans cells, is an endocytic receptor that induces the formation of Birbeck granules.</text><annotation id="0"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="0" length="8"/><text>Langerin</text></annotation></passage><passage><infon key="type">abstract</infon><offset>135</offset><text>We have identified a type II Ca2+-dependent lectin displaying mannose-binding specificity, exclusively expressed by Langerhans cells (LC), and named Langerin. LC are uniquely characterized by Birbeck granules (BG), which are organelles consisting of superimposed and zippered membranes. Here, we have shown that Langerin is constitutively associated with BG and that antibody to Langerin is internalized into these structures. Remarkably, transfection of Langerin cDNA into fibroblasts created a compact network of membrane structures with typical features of BG. Langerin is thus a potent inducer of membrane superimposition and zippering leading to BG formation. Our data suggest that induction of BG is a consequence of the antigen-capture function of Langerin, allowing routing into these organelles and providing access to a nonclassical antigen-processing pathway.</text><annotation id="1"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="284" length="8"/><text>Langerin</text></annotation><annotation id="2"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="447" length="8"/><text>Langerin</text></annotation><annotation id="3"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="514" length="8"/><text>Langerin</text></annotation><annotation id="4"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="590" length="8"/><text>Langerin</text></annotation><annotation id="5"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="699" length="8"/><text>Langerin</text></annotation><annotation id="6"><infon key="NCBI Gene">50489</infon><infon key="type">Gene</infon><location offset="890" length="8"/><text>Langerin</text></annotation></passage></document><document><id>10639512</id><passage><infon key="type">title</infon><offset>0</offset><text>BCAR1, a human homologue of the adapter protein p130Cas, and antiestrogen resistance in breast cancer cells.</text><annotation id="0"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="0" length="5"/><text>BCAR1</text></annotation><annotation id="1"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="48" length="7"/><text>p130Cas</text></annotation><annotation id="2"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="9" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>109</offset><text>Treatment of breast cancer with the antiestrogen tamoxifen is effective in approximately one half of the patients with estrogen receptor-positive disease, but tumors recur frequently because of the development of metastases that are resistant to tamoxifen. We have previously shown that mutagenesis of human estrogen-dependent ZR-75-1 breast cancer cells by insertion of a defective retrovirus genome caused the cells to become antiestrogen resistant. In this study, we isolated and characterized the crucial gene at the breast cancer antiestrogen resistance 1 (BCAR1) locus. Transfer of the BCAR1 locus from retrovirus-mutated, antiestrogen-resistant cells to estrogen-dependent ZR-75-1 cells by cell fusion conferred an antiestrogen-resistant phenotype on the recipient cells. The complete coding sequence of BCAR1 was isolated by use of exon-trapping and complementary DNA (cDNA) library screening. Sequence analysis of human BCAR1 cDNA predicted a protein of 870 amino acids that was strongly homologous to rat p130Cas-adapter protein. Genomic analysis revealed that BCAR1 consists of seven exons and is located at chromosome 16q23.1. BCAR1 transcripts were detected in multiple human tissues and were similar in size to transcripts produced by retrovirus-mutated ZR-75-1 cells. Transfection of BCAR1 cDNA into ZR-75-1 cells again resulted in sustained cell proliferation in the presence of antiestrogens, confirming that BCAR1 was the responsible gene in the locus. Overexpression of the BCAR1 gene confers antiestrogen resistance on human ZR-75-1 breast cancer cells. Overexpression of BCAR1 in retrovirus-mutated cells appears to result from activation of the gene's promoter. The isolation and characterization of this gene open new avenues to elucidating mechanisms by which the growth of human breast cancer becomes independent of estrogen.</text><annotation id="3"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="630" length="39"/><text>breast cancer antiestrogen resistance 1</text></annotation><annotation id="4"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="671" length="5"/><text>BCAR1</text></annotation><annotation id="5"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="701" length="5"/><text>BCAR1</text></annotation><annotation id="6"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="920" length="5"/><text>BCAR1</text></annotation><annotation id="7"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1038" length="5"/><text>BCAR1</text></annotation><annotation id="8"><infon key="NCBI Gene">25414</infon><infon key="type">Gene</infon><location offset="1124" length="23"/><text>p130Cas-adapter protein</text></annotation><annotation id="9"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1180" length="5"/><text>BCAR1</text></annotation><annotation id="10"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1248" length="5"/><text>BCAR1</text></annotation><annotation id="11"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1408" length="5"/><text>BCAR1</text></annotation><annotation id="12"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1535" length="5"/><text>BCAR1</text></annotation><annotation id="13"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1602" length="5"/><text>BCAR1</text></annotation><annotation id="14"><infon key="NCBI Gene">9564</infon><infon key="type">Gene</infon><location offset="1701" length="5"/><text>BCAR1</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="214" length="8"/><text>patients</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="411" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">31931</infon><infon key="type">Species</infon><location offset="492" length="10"/><text>retrovirus</text></annotation><annotation id="18"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1032" length="5"/><text>human</text></annotation><annotation id="19"><infon key="NCBI Taxonomy">10116</infon><infon key="type">Species</infon><location offset="1120" length="3"/><text>rat</text></annotation><annotation id="20"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1292" length="5"/><text>human</text></annotation><annotation id="21"><infon key="NCBI Taxonomy">31931</infon><infon key="type">Species</infon><location offset="1358" length="10"/><text>retrovirus</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1648" length="5"/><text>human</text></annotation><annotation id="23"><infon key="NCBI Taxonomy">31931</infon><infon key="type">Species</infon><location offset="1710" length="10"/><text>retrovirus</text></annotation><annotation id="24"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1907" length="5"/><text>human</text></annotation><annotation id="25"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="436" length="7"/><text>ZR-75-1</text></annotation><annotation id="26"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="789" length="7"/><text>ZR-75-1</text></annotation><annotation id="27"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1377" length="7"/><text>ZR-75-1</text></annotation><annotation id="28"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1424" length="7"/><text>ZR-75-1</text></annotation><annotation id="29"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1654" length="7"/><text>ZR-75-1</text></annotation></passage></document></collection>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml
new file mode 100644
index 000000000..a2f9b537c
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml
@@ -0,0 +1,2 @@
+<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM
+        "/Users/faessler/Coding/git/jcore-base/jcore-gnp-bioc-reader/BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>1378843</id><passage><infon key="type">title</infon><offset>0</offset><text>Cloning and expression of a cell surface receptor for advanced glycosylation end products of proteins.</text></passage><passage><infon key="type">abstract</infon><offset>103</offset><text>Advanced glycosylation end products of proteins (AGEs) are nonenzymatically glycosylated proteins which accumulate in vascular tissue in aging and at an accelerated rate in diabetes. A approximately 35-kDa polypeptide with a unique NH2-terminal sequence has been isolated from bovine lung and found to be present on the surface of endothelial cells where it mediates the binding of AGEs (receptor for advanced glycosylation end product or RAGE). Using an oligonucleotide probe based on the amino-terminal sequence of RAGE, an apparently full-length cDNA of 1.5 kilobases was isolated from a bovine lung cDNA library. This cDNA encoded a 394 amino acid mature protein comprised of the following putative domains: an extracellular domain of 332 amino acids, a single hydrophobic membrane spanning domain of 19 amino acids, and a carboxyl-terminal domain of 43 amino acids. A partial clone encoding the human counterpart of RAGE, isolated from a human lung library, was found to be approximately 90% homologous to the bovine molecule. Based on computer analysis of the amino acid sequence of RAGE and comparison with databases, RAGE is a new member of the immunoglobulin superfamily of cell surface molecules and shares significant homology with MUC 18, NCAM, and the cytoplasmic domain of CD20. Expression of the RAGE cDNA in 293 cells allowed them to bind 125I-AGE-albumin in a saturable and dose-dependent manner (Kd approximately 100 nM), blocked by antibody to RAGE. Western blots of 293 cells transfected with RAGE cDNA probed with anti-RAGE IgG demonstrated expression of immunoreactive protein compared to its absence in mock-transfected cells. These results suggest that RAGE functions as a cell surface receptor for AGEs, which could potentially mediate cellular effects of this class of glycosylated proteins.</text><annotation id="0"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="542" length="4"/><text>RAGE</text></annotation><annotation id="1"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="620" length="4"/><text>RAGE</text></annotation><annotation id="2"><infon key="NCBI Gene">177</infon><infon key="type">Gene</infon><location offset="1024" length="4"/><text>RAGE</text></annotation><annotation id="3"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1192" length="4"/><text>RAGE</text></annotation><annotation id="4"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1228" length="4"/><text>RAGE</text></annotation><annotation id="7"><infon key="NCBI Gene">505653</infon><infon key="type">Gene</infon><location offset="1390" length="4"/><text>CD20</text></annotation><annotation id="8"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1414" length="4"/><text>RAGE</text></annotation><annotation id="9"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1566" length="4"/><text>RAGE</text></annotation><annotation id="10"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1616" length="4"/><text>RAGE</text></annotation><annotation id="11"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1643" length="4"/><text>RAGE</text></annotation><annotation id="12"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1780" length="4"/><text>RAGE</text></annotation><annotation id="13"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="380" length="6"/><text>bovine</text></annotation><annotation id="14"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="694" length="6"/><text>bovine</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1003" length="5"/><text>human</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1046" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1118" length="6"/><text>bovine</text></annotation></passage></document><document><id>10896916</id><passage><infon key="type">title</infon><offset>0</offset><text>Alpha(2) adrenoceptors regulate proliferation of human intestinal epithelial cells.</text><annotation id="0"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="0" length="22"/><text>Alpha(2) adrenoceptors</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="49" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>84</offset><text>Previous studies on rodents have suggested that catecholamines stimulate proliferation of the intestinal epithelium through activation of alpha(2) adrenoceptors located on crypt cells. The occurrence of this effect awaits demonstration in humans and the molecular mechanisms involved have not yet been elucidated. Here, we examined the effect of alpha(2) agonists on a clone of Caco2 cells expressing the human alpha(2A) adrenoceptor. Cells were transfected with a bicistronic plasmid containing the alpha2C10 and neomycin phosphotransferase genes. G418 resistant clones were assayed for receptor expression using radioligand binding. Receptor functionality was assessed by testing its ability to couple Gi proteins and to inhibit cAMP production. Mitogen activated protein kinase (MAPK) phosphorylation was followed by western blot, and cell proliferation was estimated by measuring protein and DNA content. Permanent transfection of Caco2 cells allowed us to obtain a clone (Caco2-3B) expressing alpha(2A) adrenoceptors at a density similar to that found in normal human intestinal epithelium. Caco2-3B retained morphological features and brush border enzyme expression characteristic of enterocytic differentiation. The receptor was coupled to Gi2/Gi3 proteins and its stimulation caused marked diminution of forskolin induced cAMP production. Treatment of Caco2-3B with UK14304 (alpha(2) agonist) induced a rapid increase in the phosphorylation state of MAPK, extracellular regulated protein kinase 1 (Erk1), and 2 (Erk2). This event was totally abolished in pertussis toxin treated cells and in the presence of kinase inhibitors (genistein or PD98059). It was unaffected by protein kinase C downregulation but correlated with a transient increase in Shc tyrosine phosphorylation. Finally, sustained exposure of Caco2-3B to UK14304 resulted in modest but significant acceleration of cell proliferation. None of these effects was observed in the parental cell line Caco2. The results obtained in the present study support a regulatory role for alpha(2) adrenoceptors in intestinal cell proliferation.</text><annotation id="2"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="222" length="22"/><text>alpha(2) adrenoceptors</text></annotation><annotation id="3"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="495" length="22"/><text>alpha(2A) adrenoceptor</text></annotation><annotation id="4"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="584" length="9"/><text>alpha2C10</text></annotation><annotation id="5"><infon key="NCBI Gene">5595;5594;5595</infon><infon key="type">Gene</infon><location offset="866" length="4"/><text>MAPK</text></annotation><annotation id="6"><infon key="NCBI Gene">5595;5594;5595</infon><infon key="type">Gene</infon><location offset="1542" length="4"/><text>MAPK</text></annotation><annotation id="7"><infon key="NCBI Gene">5595</infon><infon key="type">Gene</infon><location offset="1548" length="40"/><text>extracellular regulated protein kinase 1</text></annotation><annotation id="8"><infon key="NCBI Gene">5595</infon><infon key="type">Gene</infon><location offset="1590" length="4"/><text>Erk1</text></annotation><annotation id="9"><infon key="NCBI Gene">5594</infon><infon key="type">Gene</infon><location offset="1604" length="4"/><text>Erk2</text></annotation><annotation id="10"><infon key="NCBI Gene">6464</infon><infon key="type">Gene</infon><location offset="1839" length="3"/><text>Shc</text></annotation><annotation id="11"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="2131" length="22"/><text>alpha(2) adrenoceptors</text></annotation><annotation id="12"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="323" length="6"/><text>humans</text></annotation><annotation id="13"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="489" length="5"/><text>human</text></annotation><annotation id="14"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1151" length="5"/><text>human</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="462" length="5"/><text>Caco2</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1019" length="5"/><text>Caco2</text></annotation></passage></document></collection>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml
new file mode 100644
index 000000000..b2144e781
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml
@@ -0,0 +1,2 @@
+<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM
+        "/Users/faessler/Coding/git/jcore-base/jcore-gnp-bioc-reader/BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10880510</id><passage><infon key="type">title</infon><offset>0</offset><text>Human TREK2, a 2P domain mechano-sensitive K+ channel with multiple regulations by polyunsaturated fatty acids, lysophospholipids, and Gs, Gi, and Gq protein-coupled receptors.</text><annotation id="0"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="6" length="5"/><text>TREK2</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="0" length="5"/><text>Human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>177</offset><text>Mechano-sensitive and fatty acid-activated K(+) belong to the structural class of K(+) channel with two pore domains. Here, we report the isolation and the characterization of a novel member of this family. This channel, called TREK2, is closely related to TREK1 (78% of homology). Its gene is located on chromosome 14q31. TREK2 is abundantly expressed in pancreas and kidney and to a lower level in brain, testis, colon, and small intestine. In the central nervous system, TREK2 has a widespread distribution with the highest levels of expression in cerebellum, occipital lobe, putamen, and thalamus. In transfected cells, TREK2 produces rapidly activating and non-inactivating outward rectifier K(+) currents. The single-channel conductance is 100 picosiemens at +40 mV in 150 mm K(+). The currents can be strongly stimulated by polyunsaturated fatty acid such as arachidonic, docosahexaenoic, and linoleic acids and by lysophosphatidylcholine. The channel is also activated by acidification of the intracellular medium. TREK2 is blocked by application of intracellular cAMP. As with TREK1, TREK2 is activated by the volatile general anesthetics chloroform, halothane, and isoflurane and by the neuroprotective agent riluzole. TREK2 can be positively or negatively regulated by a variety of neurotransmitter receptors. Stimulation of the G(s)-coupled receptor 5HT4sR or the G(q)-coupled receptor mGluR1 inhibits channel activity, whereas activation of the G(i)-coupled receptor mGluR2 increases TREK2 currents. These multiple types of regulations suggest that TREK2 plays an important role as a target of neurotransmitter action.</text><annotation id="2"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="405" length="5"/><text>TREK2</text></annotation><annotation id="3"><infon key="NCBI Gene">3776</infon><infon key="type">Gene</infon><location offset="434" length="5"/><text>TREK1</text></annotation><annotation id="4"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="500" length="5"/><text>TREK2</text></annotation><annotation id="5"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="651" length="5"/><text>TREK2</text></annotation><annotation id="6"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="801" length="5"/><text>TREK2</text></annotation><annotation id="7"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1200" length="5"/><text>TREK2</text></annotation><annotation id="8"><infon key="NCBI Gene">3776</infon><infon key="type">Gene</infon><location offset="1263" length="5"/><text>TREK1</text></annotation><annotation id="9"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1270" length="5"/><text>TREK2</text></annotation><annotation id="10"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1406" length="5"/><text>TREK2</text></annotation><annotation id="11"><infon key="NCBI Gene">3360</infon><infon key="type">Gene</infon><location offset="1539" length="6"/><text>5HT4sR</text></annotation><annotation id="12"><infon key="NCBI Gene">2911</infon><infon key="type">Gene</infon><location offset="1575" length="6"/><text>mGluR1</text></annotation><annotation id="13"><infon key="NCBI Gene">14800</infon><infon key="type">Gene</infon><location offset="1657" length="6"/><text>mGluR2</text></annotation><annotation id="14"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1674" length="5"/><text>TREK2</text></annotation><annotation id="15"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1739" length="5"/><text>TREK2</text></annotation></passage></document><document><id>10803599</id><passage><infon key="type">title</infon><offset>0</offset><text>Enhanced growth of MCF-7 breast cancer cells overexpressing parathyroid hormone-related peptide.</text><annotation id="0"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="60" length="35"/><text>parathyroid hormone-related peptide</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="19" length="5"/><text>MCF-7</text></annotation></passage><passage><infon key="type">abstract</infon><offset>97</offset><text>PTH-related peptide (PTHrP) is a secreted protein produced by breast cancer cells both in vivo and in vitro. Because of its structural similarity to PTH at the amino terminus, the two proteins interact with a common cell surface receptor, the PTH/PTHrP receptor. When overproduced by tumor cells, PTHrP enters the circulation, giving rise to the common paraneoplastic syndrome of humoral hypercalcemia of malignancy. Although initially discovered in malignancies, PTHrP is now known to be produced by most cells and tissues in the body. It acts as an autocrine and paracrine mediator of cell proliferation and differentiation, effects which are mediated via the PTH/PTHrP receptor. Recent evidence also has shown that, directly after translation, PTHrP is able to enter the nucleus and/or nucleolus and influence cell cycle progression and apoptosis. In this study, we have either overproduced PTHrP or inhibited endogenous PTHrP production in the breast cancer cell line, MCF-7. Overexpression of PTHrP was associated with an increase in mitogenesis, whereas inhibiting endogenous PTHrP production resulted in decreased cell proliferation. The overexpressed peptide targeted to the perinuclear space. In contrast, PTHrP interaction with the cell surface PTH/PTHrP receptor resulted in decreased cell proliferation in the same cell line. This latter effect is dependent on interaction with the receptor, in that exogenously added PTHrP moieties known not to interact with the receptor had no effect on cell growth. Furthermore, neutralization of added peptide with an anti-PTHrP antiserum completely abolished the growth inhibitory effects. In contrast, this antibody has no effect on the increased proliferation rate of the MCF-7 transfectants that overexpress PTHrP, compared with control cells. The net effect of autocrine/paracrine and intracrine effects of PTHrP in MCF-7 cells overproducing the peptide is accelerated cell growth. These findings have critical implications regarding the role of PTHrP in breast cancer, and they suggest that controlling PTHrP production in breast cancer may be useful therapeutically.</text><annotation id="2"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="97" length="19"/><text>PTH-related peptide</text></annotation><annotation id="3"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="118" length="5"/><text>PTHrP</text></annotation><annotation id="4"><infon key="NCBI Gene">5741</infon><infon key="type">Gene</infon><location offset="246" length="3"/><text>PTH</text></annotation><annotation id="5"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="340" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="6"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="394" length="5"/><text>PTHrP</text></annotation><annotation id="7"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="561" length="5"/><text>PTHrP</text></annotation><annotation id="8"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="759" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="9"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="844" length="5"/><text>PTHrP</text></annotation><annotation id="10"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="991" length="5"/><text>PTHrP</text></annotation><annotation id="11"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1021" length="5"/><text>PTHrP</text></annotation><annotation id="12"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1095" length="5"/><text>PTHrP</text></annotation><annotation id="13"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1179" length="5"/><text>PTHrP</text></annotation><annotation id="14"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1312" length="5"/><text>PTHrP</text></annotation><annotation id="15"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="1352" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="16"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1527" length="5"/><text>PTHrP</text></annotation><annotation id="17"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1670" length="5"/><text>PTHrP</text></annotation><annotation id="18"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1859" length="5"/><text>PTHrP</text></annotation><annotation id="19"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1959" length="5"/><text>PTHrP</text></annotation><annotation id="20"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="2098" length="5"/><text>PTHrP</text></annotation><annotation id="21"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="2156" length="5"/><text>PTHrP</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1968" length="5"/><text>MCF-7</text></annotation></passage></document></collection>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml
new file mode 100644
index 000000000..2ed9fa4f7
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml
@@ -0,0 +1,2 @@
+<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM
+        "/Users/faessler/Coding/git/jcore-base/jcore-gnp-bioc-reader/BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10722742</id><passage><infon key="type">title</infon><offset>0</offset><text>Mdm2 is a RING finger-dependent ubiquitin protein ligase for itself and p53.</text><annotation id="0"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="0" length="4"/><text>Mdm2</text></annotation><annotation id="1"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="72" length="3"/><text>p53</text></annotation></passage><passage><infon key="type">abstract</infon><offset>77</offset><text>Mdm2 has been shown to regulate p53 stability by targeting the p53 protein for proteasomal degradation. We now report that Mdm2 is a ubiquitin protein ligase (E3) for p53 and that its activity is dependent on its RING finger. Furthermore, we show that Mdm2 mediates its own ubiquitination in a RING finger-dependent manner, which requires no eukaryotic proteins other than ubiquitin-activating enzyme (E1) and an ubiquitin-conjugating enzyme (E2). It is apparent, therefore, that Mdm2 manifests an intrinsic capacity to mediate ubiquitination. Mutation of putative zinc coordination residues abrogated this activity, as did chelation of divalent cations. After cation chelation, the full activity could be restored by addition of zinc. We further demonstrate that the degradation of p53 and Mdm2 in cells requires additional potential zinc-coordinating residues beyond those required for the intrinsic activity of Mdm2 in vitro. Replacement of the Mdm2 RING with that of another protein (Praja1) reconstituted ubiquitination and proteasomal degradation of Mdm2. However, this RING was ineffective in ubiquitination and proteasomal targeting of p53, suggesting that there may be specificity at the level of the RING in the recognition of heterologous substrates.</text><annotation id="2"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="77" length="4"/><text>Mdm2</text></annotation><annotation id="3"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="109" length="3"/><text>p53</text></annotation><annotation id="4"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="140" length="3"/><text>p53</text></annotation><annotation id="5"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="200" length="4"/><text>Mdm2</text></annotation><annotation id="6"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="244" length="3"/><text>p53</text></annotation><annotation id="7"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="329" length="4"/><text>Mdm2</text></annotation><annotation id="8"><infon key="NCBI Gene">7318</infon><infon key="type">Gene</infon><location offset="450" length="32"/><text>ubiquitin-activating enzyme (E1)</text></annotation><annotation id="9"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="557" length="4"/><text>Mdm2</text></annotation><annotation id="10"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="860" length="3"/><text>p53</text></annotation><annotation id="11"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="868" length="4"/><text>Mdm2</text></annotation><annotation id="12"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="991" length="4"/><text>Mdm2</text></annotation><annotation id="13"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="1025" length="4"/><text>Mdm2</text></annotation><annotation id="14"><infon key="NCBI Gene">64219</infon><infon key="type">Gene</infon><location offset="1065" length="6"/><text>Praja1</text></annotation><annotation id="15"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="1133" length="4"/><text>Mdm2</text></annotation><annotation id="16"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="1221" length="3"/><text>p53</text></annotation></passage></document><document><id>1770008</id><passage><infon key="type">title</infon><offset>0</offset><text>Structural analysis and expression of human desmoglein: a cadherin-like component of the desmosome.</text><annotation id="0"><infon key="NCBI Gene">1828;281131</infon><infon key="type">Gene</infon><location offset="44" length="10"/><text>desmoglein</text></annotation><annotation id="1"><infon key="NCBI Gene">1000</infon><infon key="type">Gene</infon><location offset="58" length="8"/><text>cadherin</text></annotation><annotation id="2"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="38" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>100</offset><text>Desmosomes are adhesive cell junctions found in great abundance in tissues that experience mechanical stress. The transmembrane desmosomal glycoproteins have been proposed to play a role in cell adhesion; desmoglein I (DGI) is a major member of this class of desmosomal molecules. However, evidence supporting a role for DGI in cell adhesion or in the plaque is lacking. In order to begin to understand DGI function we have identified human cDNA clones encoding the entire mature polypeptide of 1000 amino acids. Our data suggest that like the bovine DGI molecule human DGI is highly related to the calcium-dependent class of cell adhesion molecules known as cadherins. Four related extracellular domains located in the amino-terminal domain of the molecule contain putative calcium binding sites originally identified in the cadherins. The highest degree of similarity between human N-cadherin and human DGI, and likewise between bovine DGI and human DGI, is greatest in the most amino-terminal extracellular domain. This suggests a conserved functional role for the extracellular domains, perhaps in calcium-mediated cell adhesion. The cytoplasmic portion of the molecule contains a cadherin-like region and, like bovine DGI, a carboxy-terminal tail that is not present in the cadherins, comprising three additional domains. One of these contains a novel repeating motif of 29 +/- 1 residues, first identified in bovine DGI. Each of the highly homologous repeating units is likely to consist of two beta-strands and two turns with special characteristics. Five amino acids that are identical in bovine and human DGI lie in the second of the two predicted beta-strands, and intriguingly contain putative target sites for protein kinase C. On the basis of structural analysis, a model predicting the disposition of human DGI domains in the desmosome is proposed. Northern analysis suggests that unlike bovine epidermis, which expresses a single mRNA of reported size approximately 7.6 kb, human foreskin and cultured keratinocytes display a complex pattern with bands of approximately 7.2, 4.0 and 3.0 kb. Each of these cross-hybridizing mRNAs is coordinately expressed in normal human keratinocytes in response to long-term culture and increased calcium.</text><annotation id="3"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="305" length="12"/><text>desmoglein I</text></annotation><annotation id="4"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="319" length="3"/><text>DGI</text></annotation><annotation id="5"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="421" length="3"/><text>DGI</text></annotation><annotation id="6"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="503" length="3"/><text>DGI</text></annotation><annotation id="7"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="651" length="3"/><text>DGI</text></annotation><annotation id="8"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="670" length="3"/><text>DGI</text></annotation><annotation id="9"><infon key="NCBI Gene">1000</infon><infon key="type">Gene</infon><location offset="984" length="10"/><text>N-cadherin</text></annotation><annotation id="10"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1005" length="3"/><text>DGI</text></annotation><annotation id="11"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1038" length="3"/><text>DGI</text></annotation><annotation id="12"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1052" length="3"/><text>DGI</text></annotation><annotation id="13"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1323" length="3"/><text>DGI</text></annotation><annotation id="14"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1522" length="3"/><text>DGI</text></annotation><annotation id="15"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1714" length="3"/><text>DGI</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="535" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="644" length="6"/><text>bovine</text></annotation><annotation id="18"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="664" length="5"/><text>human</text></annotation><annotation id="19"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="978" length="5"/><text>human</text></annotation><annotation id="20"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="999" length="5"/><text>human</text></annotation><annotation id="21"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1031" length="6"/><text>bovine</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1046" length="5"/><text>human</text></annotation><annotation id="23"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1316" length="6"/><text>bovine</text></annotation><annotation id="24"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1515" length="6"/><text>bovine</text></annotation><annotation id="25"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1697" length="6"/><text>bovine</text></annotation><annotation id="26"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1708" length="5"/><text>human</text></annotation><annotation id="27"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1915" length="5"/><text>human</text></annotation><annotation id="28"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="2002" length="6"/><text>bovine</text></annotation><annotation id="29"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="2089" length="5"/><text>human</text></annotation><annotation id="30"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="2280" length="5"/><text>human</text></annotation><annotation id="31"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1921" length="3"/><text>DGI</text></annotation></passage></document></collection>
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 662cf49ef..8f0ad13d9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,435 +1,551 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-          
+            
+  
   
   
   <modelVersion>4.0.0</modelVersion>
-          
+            
+  
   
   
   <parent>
-                    
+                        
+    
     
     
     <groupId>de.julielab</groupId>
-                    
+                        
+    
     
     
     <artifactId>jcore-parent</artifactId>
-                    
+                        
+    
     
     
     <version>2.5.2-SNAPSHOT</version>
-                
+                    
+  
   
   
   </parent>
-          
+            
+  
   
   
   <artifactId>jcore-base</artifactId>
-          
+            
+  
   
   
   <packaging>pom</packaging>
-          
+            
+  
   
   
   <name>JCoRe Base</name>
-          
+            
+  
   
   
   <description>The POM for the JCoRe Base projects.</description>
-          
+            
+  
   
   
   <version>2.6.0-SNAPSHOT</version>
-          
+            
+  
   
   
   <organization>
-                    
+                        
+    
     
     
     <name>JULIE Lab, Germany</name>
-                    
+                        
+    
     
     
     <url>http://www.julielab.de</url>
-                
+                    
+  
   
   
   </organization>
-          
+            
+  
   
   
   <licenses>
-                    
+                        
+    
     
     
     <license>
-                              
+                                    
+      
       
       
       <name>BSD-2-Clause</name>
-                              
+                                    
+      
       
       
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-                          
+                                
+    
     
     
     </license>
-                
+                    
+  
   
   
   </licenses>
-          
+            
+  
   
   
   <url>https://github.com/JULIELab/jcore-base</url>
-          
+            
+  
   
   
   <dependencies>
-                    
+                        
+    
     
     
     <dependency>
-                              
+                                    
+      
       
       
       <groupId>org.apache.uima</groupId>
-                              
+                                    
+      
       
       
       <artifactId>uimaj-core</artifactId>
-                              
+                                    
+      
       
       
       <version>${uima-version}</version>
-                          
+                                
+    
     
     
     </dependency>
-                    
+                        
+    
     
     
     <dependency>
-                              
+                                    
+      
       
       
       <groupId>org.apache.uima</groupId>
-                              
+                                    
+      
       
       
       <artifactId>uimafit-core</artifactId>
-                              
+                                    
+      
       
       
       <version>${uimafit-version}</version>
-                          
+                                
+    
     
     
     </dependency>
-                
+                    
+  
   
   
   </dependencies>
-          
+            
+  
   
   
   <modules>
-            
+                
+    
     
     <module>jcore-annotation-adder-ae</module>
-            
+                
+    
     
     <module>jcore-ace-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-acronym-ae</module>
-            
+                
+    
     
     <module>jcore-acronym-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-banner-ae</module>
-            
+                
+    
     
     <module>jcore-bc2gm-reader</module>
-            
+                
+    
     
     <module>jcore-bc2gmformat-writer</module>
-            
+                
+    
     
     <module>jcore-biolemmatizer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-bionlpformat-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-bionlpformat-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-biosem-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-conll-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-coordination-baseline-ae</module>
-            
+                
+    
     
     <module>jcore-cord19-reader</module>
-            
+                
+    
     
     <module>jcore-coreference-writer</module>
-            
+                
+    
     
     <module>jcore-ct-reader</module>
-            
+                
+    
     
     <module>jcore-db-checkpoint-ae</module>
-            
+                
+    
     
     <module>jcore-descriptor-creator</module>
-            
+                
+    
     
     <module>jcore-dta-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-ec-code-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-elasticsearch-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-embedding-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-event-flattener-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-feature-value-replacement-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-file-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-flair-ner-ae</module>
-            
+                
+    
     
     <module>jcore-flair-token-embedding-ae</module>
-            
+                
+    
     
     <module>jcore-flow-controllers</module>
-                    
+                        
+    
     <module>jcore-gnp-bioc-writer</module>
+        
     
     <module>jcore-iexml-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-iexml-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-ign-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-iob-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-jnet-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-jpos-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-jsbd-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-jtbd-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-julielab-entity-evaluator-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-likelihood-assignment-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-likelihood-detection-ae</module>
-            
+                
+    
     
     <module>jcore-line-multiplier</module>
-            
+                
+    
     
     <module>jcore-lingpipegazetteer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-lingpipe-porterstemmer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-lingscope-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-linnaeus-species-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-mantra-xml-types</module>
-                    
+                        
+    
     
     
     <module>jcore-medxn-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-msdoc-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-mstparser-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-muc7-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-mutationfinder-ae</module>
-            
+                
+    
     
     <module>jcore-neo4j-relations-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-chunk-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-parser-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-postag-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-sentence-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-token-ae</module>
-            
+                
+    
     
     <module>jcore-ppd-writer</module>
-            
+                
+    
     
     <module>jcore-pmc-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-pubtator-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-stanford-lemmatizer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-topic-indexing-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-topics-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-txt-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-types</module>
-                    
+                        
+    
     
     
     <module>jcore-utilities</module>
-                    
+                        
+    
     
     
     <module>jcore-xml-mapper</module>
-                    
+                        
+    
     
     
     <module>jcore-xml-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-xmi-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-xmi-writer</module>
-                    
+                        
+    
     
     
     <module>jedis-parent</module>
-              
+                  
+    
     
     <module>jcore-jedis-integration-tests</module>
-            
+                
     
 
+    
+    <module>jcore-bnp-bioc-reader</module>
+      
   </modules>
-          
+            
+  
   
   
   <scm>
-                    
+                        
+    
     
     
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-                    
+                        
+    
     
     
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-                    
+                        
+    
     
     
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-                
+                    
+  
   
   
   </scm>
-      
+        
+
 
 
 </project>

From 618c10305fe239b173592d2984157a31a7ece5f2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Feb 2022 16:49:05 +0100
Subject: [PATCH 148/269] Write the multiplier and add a test.

Tests works, everything looks good.
---
 jcore-gnp-bioc-reader/pom.xml                 |  6 ++
 .../jcore/reader/BioCCasPopulator.java        |  7 ++
 .../reader/GNormPlusFormatMultiplier.java     | 65 +++++++++++++++++++
 .../de/julielab/jcore/reader/desc/PLACEHOLDER |  4 --
 .../reader/desc/jcore-bnp-bioc-reader.xml     | 20 ------
 .../jcore/reader/BioCCasPopulatorTest.java    |  2 +-
 .../reader/GNormPlusFormatMultiplierTest.java | 43 ++++++++++++
 7 files changed, 122 insertions(+), 25 deletions(-)
 create mode 100644 jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
 delete mode 100644 jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER
 delete mode 100644 jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml
 create mode 100644 jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierTest.java

diff --git a/jcore-gnp-bioc-reader/pom.xml b/jcore-gnp-bioc-reader/pom.xml
index 86008eabd..4ca0c48b5 100644
--- a/jcore-gnp-bioc-reader/pom.xml
+++ b/jcore-gnp-bioc-reader/pom.xml
@@ -45,6 +45,12 @@
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+            <scope>test</scope>
+        </dependency>
     </dependencies>
     <name>JCoRe GNormPlus BioC Reader</name>
     <organization>
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 4af6d0342..bfd4474e0 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -34,6 +34,7 @@ public BioCCasPopulator(Path biocCollectionPath) throws XMLStreamException, IOEx
 
     public void populateWithNextDocument(JCas jCas) throws XMLStreamException, IOException {
         BioCDocument document = bioCCollection.getDocument(pos++);
+        setDocumentId(jCas, document);
         setDocumentText(jCas, document);
         Iterator<BioCAnnotation> allAnnotations = Stream.concat(document.getAnnotations().stream(), document.getPassages().stream().map(BioCPassage::getAnnotations).flatMap(Collection::stream)).iterator();
         for (BioCAnnotation annotation : (Iterable<BioCAnnotation>)() ->allAnnotations) {
@@ -55,6 +56,12 @@ public void populateWithNextDocument(JCas jCas) throws XMLStreamException, IOExc
         }
     }
 
+    private void setDocumentId(JCas jCas, BioCDocument document) {
+        Header h = new Header(jCas);
+        h.setDocId(document.getID());
+        h.addToIndexes();
+    }
+
     private void setDocumentText(JCas jCas, BioCDocument document) {
         StringBuilder sb = new StringBuilder();
         // iterate over the passages and create the complete document text from their individual text elements
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
new file mode 100644
index 000000000..5e7d71580
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
@@ -0,0 +1,65 @@
+package de.julielab.jcore.reader;
+
+import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.URI;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.Iterator;
+
+@ResourceMetaData(name="GNormPlusFormatMultiplier", description = "Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.")
+public class GNormPlusFormatMultiplier extends JCasMultiplier_ImplBase {
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatMultiplier.class);
+    private Iterator<URI> currentUriBatch;
+    private BioCCasPopulator casPopulator;
+
+    @Override
+    public void process(JCas jCas) throws AnalysisEngineProcessException {
+        try {
+            Collection<JCoReURI> jcoreUris = JCasUtil.select(jCas, JCoReURI.class);
+            if (log.isDebugEnabled())
+                log.debug("Received batch of {} BioC XML URIs", jcoreUris.size());
+            currentUriBatch = jcoreUris.stream().map(JCoReURI::getUri).map(URI::create).iterator();
+        } catch (Throwable e) {
+            log.error("Unexpected error", e);
+            throw new AnalysisEngineProcessException(e);
+        }
+    }
+
+    @Override
+    public boolean hasNext() throws AnalysisEngineProcessException {
+        if ((casPopulator == null || casPopulator.documentsLeftInCollection() == 0) && currentUriBatch.hasNext()) {
+            URI nextUri = currentUriBatch.next();
+            try {
+                casPopulator = new BioCCasPopulator(Path.of(nextUri));
+            } catch (Exception e) {
+                log.error("Could not read from {}", nextUri, e);
+                throw new AnalysisEngineProcessException(e);
+            }
+        }
+        return casPopulator != null && casPopulator.documentsLeftInCollection() > 0;
+    }
+
+    @Override
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        if (hasNext()) {
+            JCas cas = getEmptyJCas();
+            try {
+                casPopulator.populateWithNextDocument(cas);
+                return cas;
+            } catch (Exception e) {
+                log.error("Could not populate CAS with the next BioC document.", e);
+                throw new AnalysisEngineProcessException(e);
+            }
+        }
+        return null;
+    }
+}
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER
deleted file mode 100644
index e4b0b196a..000000000
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/PLACEHOLDER
+++ /dev/null
@@ -1,4 +0,0 @@
-The actual descriptor must be created by UIMA fit.
-For this purpose, use UIMAfit annotations to annotate the reader component class.
-Then employ the jcore-descriptor-creator's main method to build the descriptor from the reader class.
-The jcore-descriptor-creator is already on the classpath as a Maven dependency.
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml
deleted file mode 100644
index 9ce0d444f..000000000
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-reader.xml
+++ /dev/null
@@ -1,20 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
-  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <implementationName>GNormPlusFormatMultiplierReader</implementationName>
-  <processingResourceMetaData>
-    <name>JCoRe GNormPlus BioC Reader</name>
-    <description>This is only a placeholder descriptor. Please use UIMAfit to annotate the component parameters. Then employ the jcore-descriptor-creator's main method to build the descriptor from the reader class GNormPlusFormatMultiplierReader. The jcore-descriptor-creator is already on the classpath as a Maven dependency.</description>
-    <version>2.3.0-SNAPSHOT</version>
-    <vendor>JULIE Lab Jena, Germany</vendor>
-    <configurationParameters/>
-    <configurationParameterSettings/>
-    <typeSystemDescription/>
-    <capabilities/>
-    <operationalProperties>
-      <modifiesCas>true</modifiesCas>
-      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-      <outputsNewCASes>true</outputsNewCASes>
-    </operationalProperties>
-  </processingResourceMetaData>
-</collectionReaderDescription>
diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
index dddbb8704..acea59b54 100644
--- a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
@@ -15,7 +15,7 @@
 class BioCCasPopulatorTest {
 
     private JCas getJCas() throws Exception {
-        return JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        return JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
     }
 
     @Test
diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierTest.java
new file mode 100644
index 000000000..a38744b34
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierTest.java
@@ -0,0 +1,43 @@
+package de.julielab.jcore.reader;
+
+import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.JCasIterator;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+class GNormPlusFormatMultiplierTest {
+    private JCas getCas() throws Exception {
+        return JCasFactory.createJCas("de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types");
+    }
+
+    @Test
+    void process() throws Exception {
+        JCas cas = getCas();
+        JCoReURI jCoReURI = new JCoReURI(cas);
+        jCoReURI.setUri(Path.of("src", "test", "resources", "test-input-path", "subdir1", "bioc_collection_0.xml").toUri().toString());
+        jCoReURI.addToIndexes();
+
+        JCoReURI jCoReURI2 = new JCoReURI(cas);
+        jCoReURI2.setUri(Path.of("src", "test", "resources", "test-input-path", "subdir2", "bioc_collection_2.xml").toUri().toString());
+        jCoReURI2.addToIndexes();
+
+        AnalysisEngine multiplier = AnalysisEngineFactory.createEngine(GNormPlusFormatMultiplier.class);
+        JCasIterator jCasIterator = multiplier.processAndOutputNewCASes(cas);
+        List<String> docIds = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas multiplierCas = jCasIterator.next();
+            docIds.add(JCoReTools.getDocId(multiplierCas));
+            multiplierCas.release();
+        }
+        assertThat(docIds).containsExactlyInAnyOrder("1378843", "10896916", "10722742", "1770008");
+    }
+}
\ No newline at end of file

From 489bd9bd0608ba4d28f10b14267722ac292d64e4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Feb 2022 16:50:11 +0100
Subject: [PATCH 149/269] Add the GNP reader descriptors. Resolves #131.

---
 jcore-gnp-bioc-reader/component.meta          | 25 ++++++++
 .../desc/jcore-bnp-bioc-multiplier-reader.xml | 58 +++++++++++++++++++
 .../reader/desc/jcore-bnp-bioc-multiplier.xml | 26 +++++++++
 3 files changed, 109 insertions(+)
 create mode 100644 jcore-gnp-bioc-reader/component.meta
 create mode 100644 jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
 create mode 100644 jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml

diff --git a/jcore-gnp-bioc-reader/component.meta b/jcore-gnp-bioc-reader/component.meta
new file mode 100644
index 000000000..c3b3f6e0a
--- /dev/null
+++ b/jcore-gnp-bioc-reader/component.meta
@@ -0,0 +1,25 @@
+{
+    "categories": [
+        "ae",
+        "reader"
+    ],
+    "description": "A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and genes.",
+    "descriptors": [
+        {
+            "category": "ae",
+            "location": "de.julielab.jcore.reader.desc.jcore-bnp-bioc-multiplier"
+        },
+        {
+            "category": "reader",
+            "location": "de.julielab.jcore.reader.desc.jcore-bnp-bioc-multiplier-reader"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-bnp-bioc-reader",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe GNormPlus BioC Reader"
+}
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
new file mode 100644
index 000000000..7081ae596
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
@@ -0,0 +1,58 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <implementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplierReader</implementationName>
+    <processingResourceMetaData>
+        <name>JCoRe GNormPlus Format Multiplier Reader</name>
+        <description>A reader for the BioC XML format used by GNormPlus. Requires the matching multiplier.</description>
+        <configurationParameters>
+            <configurationParameter>
+                <name>InputPath</name>
+                <description>Path to a directory or file to be read. In case of a directory, all files ending in .xml will be read.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>Recursive</name>
+                <description>Whether to read also the subdirectories of the input directory, if the input path points to a directory.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>BatchSize</name>
+                <description>The number of XML file URI references to send to the CAS multipliers in each work assignment. Defaults to 20.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>Recursive</name>
+                <value>
+                    <boolean>true</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>BatchSize</name>
+                <value>
+                    <integer>20</integer>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities/>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
new file mode 100644
index 000000000..2b64be30b
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>GNormPlusFormatMultiplier</name>
+        <description>Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.</description>
+        <configurationParameters/>
+        <configurationParameterSettings/>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities/>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file

From 833a275b876757ca2f476eaa8c2e06c5a8e211a6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Feb 2022 16:58:48 +0100
Subject: [PATCH 150/269] Correct the module structure of the parent pom.

---
 pom.xml | 859 ++++++++++++++++++++++++++++----------------------------
 1 file changed, 429 insertions(+), 430 deletions(-)

diff --git a/pom.xml b/pom.xml
index 8f0ad13d9..84fad31a2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,550 +1,549 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-            
-  
-  
-  
+
+
+
+
   <modelVersion>4.0.0</modelVersion>
-            
-  
-  
-  
+
+
+
+
   <parent>
-                        
-    
-    
-    
+
+
+
+
     <groupId>de.julielab</groupId>
-                        
-    
-    
-    
+
+
+
+
     <artifactId>jcore-parent</artifactId>
-                        
-    
-    
-    
+
+
+
+
     <version>2.5.2-SNAPSHOT</version>
-                    
-  
-  
-  
+
+
+
+
   </parent>
-            
-  
-  
-  
+
+
+
+
   <artifactId>jcore-base</artifactId>
-            
-  
-  
-  
+
+
+
+
   <packaging>pom</packaging>
-            
-  
-  
-  
+
+
+
+
   <name>JCoRe Base</name>
-            
-  
-  
-  
+
+
+
+
   <description>The POM for the JCoRe Base projects.</description>
-            
-  
-  
-  
+
+
+
+
   <version>2.6.0-SNAPSHOT</version>
-            
-  
-  
-  
+
+
+
+
   <organization>
-                        
-    
-    
-    
+
+
+
+
     <name>JULIE Lab, Germany</name>
-                        
-    
-    
-    
+
+
+
+
     <url>http://www.julielab.de</url>
-                    
-  
-  
-  
+
+
+
+
   </organization>
-            
-  
-  
-  
+
+
+
+
   <licenses>
-                        
-    
-    
-    
+
+
+
+
     <license>
-                                    
-      
-      
-      
+
+
+
+
       <name>BSD-2-Clause</name>
-                                    
-      
-      
-      
+
+
+
+
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-                                
-    
-    
-    
+
+
+
+
     </license>
-                    
-  
-  
-  
+
+
+
+
   </licenses>
-            
-  
-  
-  
+
+
+
+
   <url>https://github.com/JULIELab/jcore-base</url>
-            
-  
-  
-  
+
+
+
+
   <dependencies>
-                        
-    
-    
-    
+
+
+
+
     <dependency>
-                                    
-      
-      
-      
+
+
+
+
       <groupId>org.apache.uima</groupId>
-                                    
-      
-      
-      
+
+
+
+
       <artifactId>uimaj-core</artifactId>
-                                    
-      
-      
-      
+
+
+
+
       <version>${uima-version}</version>
-                                
-    
-    
-    
+
+
+
+
     </dependency>
-                        
-    
-    
-    
+
+
+
+
     <dependency>
-                                    
-      
-      
-      
+
+
+
+
       <groupId>org.apache.uima</groupId>
-                                    
-      
-      
-      
+
+
+
+
       <artifactId>uimafit-core</artifactId>
-                                    
-      
-      
-      
+
+
+
+
       <version>${uimafit-version}</version>
-                                
-    
-    
-    
+
+
+
+
     </dependency>
-                    
-  
-  
-  
+
+
+
+
   </dependencies>
-            
-  
-  
-  
+
+
+
+
   <modules>
-                
-    
-    
+
+
+
     <module>jcore-annotation-adder-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-ace-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-acronym-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-acronym-writer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-banner-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-bc2gm-reader</module>
-                
-    
-    
+
+
+
     <module>jcore-bc2gmformat-writer</module>
-                
-    
-    
+
+
+
     <module>jcore-biolemmatizer-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-bionlpformat-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-bionlpformat-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-biosem-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-conll-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-coordination-baseline-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-cord19-reader</module>
-                
-    
-    
+
+
+
     <module>jcore-coreference-writer</module>
-                
-    
-    
+
+
+
     <module>jcore-ct-reader</module>
-                
-    
-    
+
+
+
     <module>jcore-db-checkpoint-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-descriptor-creator</module>
-                
-    
-    
+
+
+
     <module>jcore-dta-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-ec-code-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-elasticsearch-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-embedding-writer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-event-flattener-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-feature-value-replacement-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-file-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-flair-ner-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-flair-token-embedding-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-flow-controllers</module>
-                        
-    
+
+    <module>jcore-gnp-bioc-reader</module>
+
     <module>jcore-gnp-bioc-writer</module>
-        
-    
+
+
     <module>jcore-iexml-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-iexml-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-ign-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-iob-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-jnet-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-jpos-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-jsbd-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-jtbd-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-julielab-entity-evaluator-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-likelihood-assignment-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-likelihood-detection-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-line-multiplier</module>
-                
-    
-    
+
+
+
     <module>jcore-lingpipegazetteer-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-lingpipe-porterstemmer-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-lingscope-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-linnaeus-species-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-mantra-xml-types</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-medxn-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-msdoc-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-mstparser-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-muc7-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-mutationfinder-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-neo4j-relations-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-opennlp-chunk-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-opennlp-parser-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-opennlp-postag-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-opennlp-sentence-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-opennlp-token-ae</module>
-                
-    
-    
+
+
+
     <module>jcore-ppd-writer</module>
-                
-    
-    
+
+
+
     <module>jcore-pmc-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-pubtator-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-stanford-lemmatizer-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-topic-indexing-ae</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-topics-writer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-txt-consumer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-types</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-utilities</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-xml-mapper</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-xml-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-xmi-reader</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jcore-xmi-writer</module>
-                        
-    
-    
-    
+
+
+
+
     <module>jedis-parent</module>
-                  
-    
-    
+
+
+
     <module>jcore-jedis-integration-tests</module>
-                
-    
 
-    
-    <module>jcore-bnp-bioc-reader</module>
-      
+
+
+
   </modules>
-            
-  
-  
-  
+
+
+
+
   <scm>
-                        
-    
-    
-    
+
+
+
+
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-                        
-    
-    
-    
+
+
+
+
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-                        
-    
-    
-    
+
+
+
+
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-                    
-  
-  
-  
+
+
+
+
   </scm>
-        
+
 
 
From 62269ae0b3b91bca4f051a5174ae81a6def047da Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Feb 2022 17:08:36 +0100
Subject: [PATCH 151/269] Remove the absolut path to BioC.dtd from the test
 document.

Intellij must have put it in there automatically to resolve the DTD.
---
 .../resources/test-input-path/subdir1/bioc_collection_0.xml     | 2 +-
 .../resources/test-input-path/subdir1/bioc_collection_1.xml     | 2 +-
 .../resources/test-input-path/subdir2/bioc_collection_2.xml     | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml
index a2f9b537c..9c1283a15 100644
--- a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_0.xml
@@ -1,2 +1,2 @@
 <?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM
-        "/Users/faessler/Coding/git/jcore-base/jcore-gnp-bioc-reader/BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>1378843</id><passage><infon key="type">title</infon><offset>0</offset><text>Cloning and expression of a cell surface receptor for advanced glycosylation end products of proteins.</text></passage><passage><infon key="type">abstract</infon><offset>103</offset><text>Advanced glycosylation end products of proteins (AGEs) are nonenzymatically glycosylated proteins which accumulate in vascular tissue in aging and at an accelerated rate in diabetes. A approximately 35-kDa polypeptide with a unique NH2-terminal sequence has been isolated from bovine lung and found to be present on the surface of endothelial cells where it mediates the binding of AGEs (receptor for advanced glycosylation end product or RAGE). Using an oligonucleotide probe based on the amino-terminal sequence of RAGE, an apparently full-length cDNA of 1.5 kilobases was isolated from a bovine lung cDNA library. This cDNA encoded a 394 amino acid mature protein comprised of the following putative domains: an extracellular domain of 332 amino acids, a single hydrophobic membrane spanning domain of 19 amino acids, and a carboxyl-terminal domain of 43 amino acids. A partial clone encoding the human counterpart of RAGE, isolated from a human lung library, was found to be approximately 90% homologous to the bovine molecule. Based on computer analysis of the amino acid sequence of RAGE and comparison with databases, RAGE is a new member of the immunoglobulin superfamily of cell surface molecules and shares significant homology with MUC 18, NCAM, and the cytoplasmic domain of CD20. Expression of the RAGE cDNA in 293 cells allowed them to bind 125I-AGE-albumin in a saturable and dose-dependent manner (Kd approximately 100 nM), blocked by antibody to RAGE. Western blots of 293 cells transfected with RAGE cDNA probed with anti-RAGE IgG demonstrated expression of immunoreactive protein compared to its absence in mock-transfected cells. These results suggest that RAGE functions as a cell surface receptor for AGEs, which could potentially mediate cellular effects of this class of glycosylated proteins.</text><annotation id="0"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="542" length="4"/><text>RAGE</text></annotation><annotation id="1"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="620" length="4"/><text>RAGE</text></annotation><annotation id="2"><infon key="NCBI Gene">177</infon><infon key="type">Gene</infon><location offset="1024" length="4"/><text>RAGE</text></annotation><annotation id="3"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1192" length="4"/><text>RAGE</text></annotation><annotation id="4"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1228" length="4"/><text>RAGE</text></annotation><annotation id="7"><infon key="NCBI Gene">505653</infon><infon key="type">Gene</infon><location offset="1390" length="4"/><text>CD20</text></annotation><annotation id="8"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1414" length="4"/><text>RAGE</text></annotation><annotation id="9"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1566" length="4"/><text>RAGE</text></annotation><annotation id="10"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1616" length="4"/><text>RAGE</text></annotation><annotation id="11"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1643" length="4"/><text>RAGE</text></annotation><annotation id="12"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1780" length="4"/><text>RAGE</text></annotation><annotation id="13"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="380" length="6"/><text>bovine</text></annotation><annotation id="14"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="694" length="6"/><text>bovine</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1003" length="5"/><text>human</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1046" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1118" length="6"/><text>bovine</text></annotation></passage></document><document><id>10896916</id><passage><infon key="type">title</infon><offset>0</offset><text>Alpha(2) adrenoceptors regulate proliferation of human intestinal epithelial cells.</text><annotation id="0"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="0" length="22"/><text>Alpha(2) adrenoceptors</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="49" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>84</offset><text>Previous studies on rodents have suggested that catecholamines stimulate proliferation of the intestinal epithelium through activation of alpha(2) adrenoceptors located on crypt cells. The occurrence of this effect awaits demonstration in humans and the molecular mechanisms involved have not yet been elucidated. Here, we examined the effect of alpha(2) agonists on a clone of Caco2 cells expressing the human alpha(2A) adrenoceptor. Cells were transfected with a bicistronic plasmid containing the alpha2C10 and neomycin phosphotransferase genes. G418 resistant clones were assayed for receptor expression using radioligand binding. Receptor functionality was assessed by testing its ability to couple Gi proteins and to inhibit cAMP production. Mitogen activated protein kinase (MAPK) phosphorylation was followed by western blot, and cell proliferation was estimated by measuring protein and DNA content. Permanent transfection of Caco2 cells allowed us to obtain a clone (Caco2-3B) expressing alpha(2A) adrenoceptors at a density similar to that found in normal human intestinal epithelium. Caco2-3B retained morphological features and brush border enzyme expression characteristic of enterocytic differentiation. The receptor was coupled to Gi2/Gi3 proteins and its stimulation caused marked diminution of forskolin induced cAMP production. Treatment of Caco2-3B with UK14304 (alpha(2) agonist) induced a rapid increase in the phosphorylation state of MAPK, extracellular regulated protein kinase 1 (Erk1), and 2 (Erk2). This event was totally abolished in pertussis toxin treated cells and in the presence of kinase inhibitors (genistein or PD98059). It was unaffected by protein kinase C downregulation but correlated with a transient increase in Shc tyrosine phosphorylation. Finally, sustained exposure of Caco2-3B to UK14304 resulted in modest but significant acceleration of cell proliferation. None of these effects was observed in the parental cell line Caco2. The results obtained in the present study support a regulatory role for alpha(2) adrenoceptors in intestinal cell proliferation.</text><annotation id="2"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="222" length="22"/><text>alpha(2) adrenoceptors</text></annotation><annotation id="3"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="495" length="22"/><text>alpha(2A) adrenoceptor</text></annotation><annotation id="4"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="584" length="9"/><text>alpha2C10</text></annotation><annotation id="5"><infon key="NCBI Gene">5595;5594;5595</infon><infon key="type">Gene</infon><location offset="866" length="4"/><text>MAPK</text></annotation><annotation id="6"><infon key="NCBI Gene">5595;5594;5595</infon><infon key="type">Gene</infon><location offset="1542" length="4"/><text>MAPK</text></annotation><annotation id="7"><infon key="NCBI Gene">5595</infon><infon key="type">Gene</infon><location offset="1548" length="40"/><text>extracellular regulated protein kinase 1</text></annotation><annotation id="8"><infon key="NCBI Gene">5595</infon><infon key="type">Gene</infon><location offset="1590" length="4"/><text>Erk1</text></annotation><annotation id="9"><infon key="NCBI Gene">5594</infon><infon key="type">Gene</infon><location offset="1604" length="4"/><text>Erk2</text></annotation><annotation id="10"><infon key="NCBI Gene">6464</infon><infon key="type">Gene</infon><location offset="1839" length="3"/><text>Shc</text></annotation><annotation id="11"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="2131" length="22"/><text>alpha(2) adrenoceptors</text></annotation><annotation id="12"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="323" length="6"/><text>humans</text></annotation><annotation id="13"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="489" length="5"/><text>human</text></annotation><annotation id="14"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1151" length="5"/><text>human</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="462" length="5"/><text>Caco2</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1019" length="5"/><text>Caco2</text></annotation></passage></document></collection>
\ No newline at end of file
+        "BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>1378843</id><passage><infon key="type">title</infon><offset>0</offset><text>Cloning and expression of a cell surface receptor for advanced glycosylation end products of proteins.</text></passage><passage><infon key="type">abstract</infon><offset>103</offset><text>Advanced glycosylation end products of proteins (AGEs) are nonenzymatically glycosylated proteins which accumulate in vascular tissue in aging and at an accelerated rate in diabetes. A approximately 35-kDa polypeptide with a unique NH2-terminal sequence has been isolated from bovine lung and found to be present on the surface of endothelial cells where it mediates the binding of AGEs (receptor for advanced glycosylation end product or RAGE). Using an oligonucleotide probe based on the amino-terminal sequence of RAGE, an apparently full-length cDNA of 1.5 kilobases was isolated from a bovine lung cDNA library. This cDNA encoded a 394 amino acid mature protein comprised of the following putative domains: an extracellular domain of 332 amino acids, a single hydrophobic membrane spanning domain of 19 amino acids, and a carboxyl-terminal domain of 43 amino acids. A partial clone encoding the human counterpart of RAGE, isolated from a human lung library, was found to be approximately 90% homologous to the bovine molecule. Based on computer analysis of the amino acid sequence of RAGE and comparison with databases, RAGE is a new member of the immunoglobulin superfamily of cell surface molecules and shares significant homology with MUC 18, NCAM, and the cytoplasmic domain of CD20. Expression of the RAGE cDNA in 293 cells allowed them to bind 125I-AGE-albumin in a saturable and dose-dependent manner (Kd approximately 100 nM), blocked by antibody to RAGE. Western blots of 293 cells transfected with RAGE cDNA probed with anti-RAGE IgG demonstrated expression of immunoreactive protein compared to its absence in mock-transfected cells. These results suggest that RAGE functions as a cell surface receptor for AGEs, which could potentially mediate cellular effects of this class of glycosylated proteins.</text><annotation id="0"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="542" length="4"/><text>RAGE</text></annotation><annotation id="1"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="620" length="4"/><text>RAGE</text></annotation><annotation id="2"><infon key="NCBI Gene">177</infon><infon key="type">Gene</infon><location offset="1024" length="4"/><text>RAGE</text></annotation><annotation id="3"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1192" length="4"/><text>RAGE</text></annotation><annotation id="4"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1228" length="4"/><text>RAGE</text></annotation><annotation id="7"><infon key="NCBI Gene">505653</infon><infon key="type">Gene</infon><location offset="1390" length="4"/><text>CD20</text></annotation><annotation id="8"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1414" length="4"/><text>RAGE</text></annotation><annotation id="9"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1566" length="4"/><text>RAGE</text></annotation><annotation id="10"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1616" length="4"/><text>RAGE</text></annotation><annotation id="11"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1643" length="4"/><text>RAGE</text></annotation><annotation id="12"><infon key="NCBI Gene">280986</infon><infon key="type">Gene</infon><location offset="1780" length="4"/><text>RAGE</text></annotation><annotation id="13"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="380" length="6"/><text>bovine</text></annotation><annotation id="14"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="694" length="6"/><text>bovine</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1003" length="5"/><text>human</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1046" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1118" length="6"/><text>bovine</text></annotation></passage></document><document><id>10896916</id><passage><infon key="type">title</infon><offset>0</offset><text>Alpha(2) adrenoceptors regulate proliferation of human intestinal epithelial cells.</text><annotation id="0"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="0" length="22"/><text>Alpha(2) adrenoceptors</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="49" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>84</offset><text>Previous studies on rodents have suggested that catecholamines stimulate proliferation of the intestinal epithelium through activation of alpha(2) adrenoceptors located on crypt cells. The occurrence of this effect awaits demonstration in humans and the molecular mechanisms involved have not yet been elucidated. Here, we examined the effect of alpha(2) agonists on a clone of Caco2 cells expressing the human alpha(2A) adrenoceptor. Cells were transfected with a bicistronic plasmid containing the alpha2C10 and neomycin phosphotransferase genes. G418 resistant clones were assayed for receptor expression using radioligand binding. Receptor functionality was assessed by testing its ability to couple Gi proteins and to inhibit cAMP production. Mitogen activated protein kinase (MAPK) phosphorylation was followed by western blot, and cell proliferation was estimated by measuring protein and DNA content. Permanent transfection of Caco2 cells allowed us to obtain a clone (Caco2-3B) expressing alpha(2A) adrenoceptors at a density similar to that found in normal human intestinal epithelium. Caco2-3B retained morphological features and brush border enzyme expression characteristic of enterocytic differentiation. The receptor was coupled to Gi2/Gi3 proteins and its stimulation caused marked diminution of forskolin induced cAMP production. Treatment of Caco2-3B with UK14304 (alpha(2) agonist) induced a rapid increase in the phosphorylation state of MAPK, extracellular regulated protein kinase 1 (Erk1), and 2 (Erk2). This event was totally abolished in pertussis toxin treated cells and in the presence of kinase inhibitors (genistein or PD98059). It was unaffected by protein kinase C downregulation but correlated with a transient increase in Shc tyrosine phosphorylation. Finally, sustained exposure of Caco2-3B to UK14304 resulted in modest but significant acceleration of cell proliferation. None of these effects was observed in the parental cell line Caco2. The results obtained in the present study support a regulatory role for alpha(2) adrenoceptors in intestinal cell proliferation.</text><annotation id="2"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="222" length="22"/><text>alpha(2) adrenoceptors</text></annotation><annotation id="3"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="495" length="22"/><text>alpha(2A) adrenoceptor</text></annotation><annotation id="4"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="584" length="9"/><text>alpha2C10</text></annotation><annotation id="5"><infon key="NCBI Gene">5595;5594;5595</infon><infon key="type">Gene</infon><location offset="866" length="4"/><text>MAPK</text></annotation><annotation id="6"><infon key="NCBI Gene">5595;5594;5595</infon><infon key="type">Gene</infon><location offset="1542" length="4"/><text>MAPK</text></annotation><annotation id="7"><infon key="NCBI Gene">5595</infon><infon key="type">Gene</infon><location offset="1548" length="40"/><text>extracellular regulated protein kinase 1</text></annotation><annotation id="8"><infon key="NCBI Gene">5595</infon><infon key="type">Gene</infon><location offset="1590" length="4"/><text>Erk1</text></annotation><annotation id="9"><infon key="NCBI Gene">5594</infon><infon key="type">Gene</infon><location offset="1604" length="4"/><text>Erk2</text></annotation><annotation id="10"><infon key="NCBI Gene">6464</infon><infon key="type">Gene</infon><location offset="1839" length="3"/><text>Shc</text></annotation><annotation id="11"><infon key="NCBI Gene">150</infon><infon key="type">Gene</infon><location offset="2131" length="22"/><text>alpha(2) adrenoceptors</text></annotation><annotation id="12"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="323" length="6"/><text>humans</text></annotation><annotation id="13"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="489" length="5"/><text>human</text></annotation><annotation id="14"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1151" length="5"/><text>human</text></annotation><annotation id="15"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="462" length="5"/><text>Caco2</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1019" length="5"/><text>Caco2</text></annotation></passage></document></collection>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml
index b2144e781..6676e8d34 100644
--- a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir1/bioc_collection_1.xml
@@ -1,2 +1,2 @@
 <?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM
-        "/Users/faessler/Coding/git/jcore-base/jcore-gnp-bioc-reader/BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10880510</id><passage><infon key="type">title</infon><offset>0</offset><text>Human TREK2, a 2P domain mechano-sensitive K+ channel with multiple regulations by polyunsaturated fatty acids, lysophospholipids, and Gs, Gi, and Gq protein-coupled receptors.</text><annotation id="0"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="6" length="5"/><text>TREK2</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="0" length="5"/><text>Human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>177</offset><text>Mechano-sensitive and fatty acid-activated K(+) belong to the structural class of K(+) channel with two pore domains. Here, we report the isolation and the characterization of a novel member of this family. This channel, called TREK2, is closely related to TREK1 (78% of homology). Its gene is located on chromosome 14q31. TREK2 is abundantly expressed in pancreas and kidney and to a lower level in brain, testis, colon, and small intestine. In the central nervous system, TREK2 has a widespread distribution with the highest levels of expression in cerebellum, occipital lobe, putamen, and thalamus. In transfected cells, TREK2 produces rapidly activating and non-inactivating outward rectifier K(+) currents. The single-channel conductance is 100 picosiemens at +40 mV in 150 mm K(+). The currents can be strongly stimulated by polyunsaturated fatty acid such as arachidonic, docosahexaenoic, and linoleic acids and by lysophosphatidylcholine. The channel is also activated by acidification of the intracellular medium. TREK2 is blocked by application of intracellular cAMP. As with TREK1, TREK2 is activated by the volatile general anesthetics chloroform, halothane, and isoflurane and by the neuroprotective agent riluzole. TREK2 can be positively or negatively regulated by a variety of neurotransmitter receptors. Stimulation of the G(s)-coupled receptor 5HT4sR or the G(q)-coupled receptor mGluR1 inhibits channel activity, whereas activation of the G(i)-coupled receptor mGluR2 increases TREK2 currents. These multiple types of regulations suggest that TREK2 plays an important role as a target of neurotransmitter action.</text><annotation id="2"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="405" length="5"/><text>TREK2</text></annotation><annotation id="3"><infon key="NCBI Gene">3776</infon><infon key="type">Gene</infon><location offset="434" length="5"/><text>TREK1</text></annotation><annotation id="4"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="500" length="5"/><text>TREK2</text></annotation><annotation id="5"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="651" length="5"/><text>TREK2</text></annotation><annotation id="6"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="801" length="5"/><text>TREK2</text></annotation><annotation id="7"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1200" length="5"/><text>TREK2</text></annotation><annotation id="8"><infon key="NCBI Gene">3776</infon><infon key="type">Gene</infon><location offset="1263" length="5"/><text>TREK1</text></annotation><annotation id="9"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1270" length="5"/><text>TREK2</text></annotation><annotation id="10"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1406" length="5"/><text>TREK2</text></annotation><annotation id="11"><infon key="NCBI Gene">3360</infon><infon key="type">Gene</infon><location offset="1539" length="6"/><text>5HT4sR</text></annotation><annotation id="12"><infon key="NCBI Gene">2911</infon><infon key="type">Gene</infon><location offset="1575" length="6"/><text>mGluR1</text></annotation><annotation id="13"><infon key="NCBI Gene">14800</infon><infon key="type">Gene</infon><location offset="1657" length="6"/><text>mGluR2</text></annotation><annotation id="14"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1674" length="5"/><text>TREK2</text></annotation><annotation id="15"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1739" length="5"/><text>TREK2</text></annotation></passage></document><document><id>10803599</id><passage><infon key="type">title</infon><offset>0</offset><text>Enhanced growth of MCF-7 breast cancer cells overexpressing parathyroid hormone-related peptide.</text><annotation id="0"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="60" length="35"/><text>parathyroid hormone-related peptide</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="19" length="5"/><text>MCF-7</text></annotation></passage><passage><infon key="type">abstract</infon><offset>97</offset><text>PTH-related peptide (PTHrP) is a secreted protein produced by breast cancer cells both in vivo and in vitro. Because of its structural similarity to PTH at the amino terminus, the two proteins interact with a common cell surface receptor, the PTH/PTHrP receptor. When overproduced by tumor cells, PTHrP enters the circulation, giving rise to the common paraneoplastic syndrome of humoral hypercalcemia of malignancy. Although initially discovered in malignancies, PTHrP is now known to be produced by most cells and tissues in the body. It acts as an autocrine and paracrine mediator of cell proliferation and differentiation, effects which are mediated via the PTH/PTHrP receptor. Recent evidence also has shown that, directly after translation, PTHrP is able to enter the nucleus and/or nucleolus and influence cell cycle progression and apoptosis. In this study, we have either overproduced PTHrP or inhibited endogenous PTHrP production in the breast cancer cell line, MCF-7. Overexpression of PTHrP was associated with an increase in mitogenesis, whereas inhibiting endogenous PTHrP production resulted in decreased cell proliferation. The overexpressed peptide targeted to the perinuclear space. In contrast, PTHrP interaction with the cell surface PTH/PTHrP receptor resulted in decreased cell proliferation in the same cell line. This latter effect is dependent on interaction with the receptor, in that exogenously added PTHrP moieties known not to interact with the receptor had no effect on cell growth. Furthermore, neutralization of added peptide with an anti-PTHrP antiserum completely abolished the growth inhibitory effects. In contrast, this antibody has no effect on the increased proliferation rate of the MCF-7 transfectants that overexpress PTHrP, compared with control cells. The net effect of autocrine/paracrine and intracrine effects of PTHrP in MCF-7 cells overproducing the peptide is accelerated cell growth. These findings have critical implications regarding the role of PTHrP in breast cancer, and they suggest that controlling PTHrP production in breast cancer may be useful therapeutically.</text><annotation id="2"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="97" length="19"/><text>PTH-related peptide</text></annotation><annotation id="3"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="118" length="5"/><text>PTHrP</text></annotation><annotation id="4"><infon key="NCBI Gene">5741</infon><infon key="type">Gene</infon><location offset="246" length="3"/><text>PTH</text></annotation><annotation id="5"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="340" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="6"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="394" length="5"/><text>PTHrP</text></annotation><annotation id="7"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="561" length="5"/><text>PTHrP</text></annotation><annotation id="8"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="759" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="9"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="844" length="5"/><text>PTHrP</text></annotation><annotation id="10"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="991" length="5"/><text>PTHrP</text></annotation><annotation id="11"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1021" length="5"/><text>PTHrP</text></annotation><annotation id="12"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1095" length="5"/><text>PTHrP</text></annotation><annotation id="13"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1179" length="5"/><text>PTHrP</text></annotation><annotation id="14"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1312" length="5"/><text>PTHrP</text></annotation><annotation id="15"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="1352" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="16"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1527" length="5"/><text>PTHrP</text></annotation><annotation id="17"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1670" length="5"/><text>PTHrP</text></annotation><annotation id="18"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1859" length="5"/><text>PTHrP</text></annotation><annotation id="19"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1959" length="5"/><text>PTHrP</text></annotation><annotation id="20"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="2098" length="5"/><text>PTHrP</text></annotation><annotation id="21"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="2156" length="5"/><text>PTHrP</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1968" length="5"/><text>MCF-7</text></annotation></passage></document></collection>
\ No newline at end of file
+        "BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10880510</id><passage><infon key="type">title</infon><offset>0</offset><text>Human TREK2, a 2P domain mechano-sensitive K+ channel with multiple regulations by polyunsaturated fatty acids, lysophospholipids, and Gs, Gi, and Gq protein-coupled receptors.</text><annotation id="0"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="6" length="5"/><text>TREK2</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="0" length="5"/><text>Human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>177</offset><text>Mechano-sensitive and fatty acid-activated K(+) belong to the structural class of K(+) channel with two pore domains. Here, we report the isolation and the characterization of a novel member of this family. This channel, called TREK2, is closely related to TREK1 (78% of homology). Its gene is located on chromosome 14q31. TREK2 is abundantly expressed in pancreas and kidney and to a lower level in brain, testis, colon, and small intestine. In the central nervous system, TREK2 has a widespread distribution with the highest levels of expression in cerebellum, occipital lobe, putamen, and thalamus. In transfected cells, TREK2 produces rapidly activating and non-inactivating outward rectifier K(+) currents. The single-channel conductance is 100 picosiemens at +40 mV in 150 mm K(+). The currents can be strongly stimulated by polyunsaturated fatty acid such as arachidonic, docosahexaenoic, and linoleic acids and by lysophosphatidylcholine. The channel is also activated by acidification of the intracellular medium. TREK2 is blocked by application of intracellular cAMP. As with TREK1, TREK2 is activated by the volatile general anesthetics chloroform, halothane, and isoflurane and by the neuroprotective agent riluzole. TREK2 can be positively or negatively regulated by a variety of neurotransmitter receptors. Stimulation of the G(s)-coupled receptor 5HT4sR or the G(q)-coupled receptor mGluR1 inhibits channel activity, whereas activation of the G(i)-coupled receptor mGluR2 increases TREK2 currents. These multiple types of regulations suggest that TREK2 plays an important role as a target of neurotransmitter action.</text><annotation id="2"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="405" length="5"/><text>TREK2</text></annotation><annotation id="3"><infon key="NCBI Gene">3776</infon><infon key="type">Gene</infon><location offset="434" length="5"/><text>TREK1</text></annotation><annotation id="4"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="500" length="5"/><text>TREK2</text></annotation><annotation id="5"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="651" length="5"/><text>TREK2</text></annotation><annotation id="6"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="801" length="5"/><text>TREK2</text></annotation><annotation id="7"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1200" length="5"/><text>TREK2</text></annotation><annotation id="8"><infon key="NCBI Gene">3776</infon><infon key="type">Gene</infon><location offset="1263" length="5"/><text>TREK1</text></annotation><annotation id="9"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1270" length="5"/><text>TREK2</text></annotation><annotation id="10"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1406" length="5"/><text>TREK2</text></annotation><annotation id="11"><infon key="NCBI Gene">3360</infon><infon key="type">Gene</infon><location offset="1539" length="6"/><text>5HT4sR</text></annotation><annotation id="12"><infon key="NCBI Gene">2911</infon><infon key="type">Gene</infon><location offset="1575" length="6"/><text>mGluR1</text></annotation><annotation id="13"><infon key="NCBI Gene">14800</infon><infon key="type">Gene</infon><location offset="1657" length="6"/><text>mGluR2</text></annotation><annotation id="14"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1674" length="5"/><text>TREK2</text></annotation><annotation id="15"><infon key="NCBI Gene">54207</infon><infon key="type">Gene</infon><location offset="1739" length="5"/><text>TREK2</text></annotation></passage></document><document><id>10803599</id><passage><infon key="type">title</infon><offset>0</offset><text>Enhanced growth of MCF-7 breast cancer cells overexpressing parathyroid hormone-related peptide.</text><annotation id="0"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="60" length="35"/><text>parathyroid hormone-related peptide</text></annotation><annotation id="1"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="19" length="5"/><text>MCF-7</text></annotation></passage><passage><infon key="type">abstract</infon><offset>97</offset><text>PTH-related peptide (PTHrP) is a secreted protein produced by breast cancer cells both in vivo and in vitro. Because of its structural similarity to PTH at the amino terminus, the two proteins interact with a common cell surface receptor, the PTH/PTHrP receptor. When overproduced by tumor cells, PTHrP enters the circulation, giving rise to the common paraneoplastic syndrome of humoral hypercalcemia of malignancy. Although initially discovered in malignancies, PTHrP is now known to be produced by most cells and tissues in the body. It acts as an autocrine and paracrine mediator of cell proliferation and differentiation, effects which are mediated via the PTH/PTHrP receptor. Recent evidence also has shown that, directly after translation, PTHrP is able to enter the nucleus and/or nucleolus and influence cell cycle progression and apoptosis. In this study, we have either overproduced PTHrP or inhibited endogenous PTHrP production in the breast cancer cell line, MCF-7. Overexpression of PTHrP was associated with an increase in mitogenesis, whereas inhibiting endogenous PTHrP production resulted in decreased cell proliferation. The overexpressed peptide targeted to the perinuclear space. In contrast, PTHrP interaction with the cell surface PTH/PTHrP receptor resulted in decreased cell proliferation in the same cell line. This latter effect is dependent on interaction with the receptor, in that exogenously added PTHrP moieties known not to interact with the receptor had no effect on cell growth. Furthermore, neutralization of added peptide with an anti-PTHrP antiserum completely abolished the growth inhibitory effects. In contrast, this antibody has no effect on the increased proliferation rate of the MCF-7 transfectants that overexpress PTHrP, compared with control cells. The net effect of autocrine/paracrine and intracrine effects of PTHrP in MCF-7 cells overproducing the peptide is accelerated cell growth. These findings have critical implications regarding the role of PTHrP in breast cancer, and they suggest that controlling PTHrP production in breast cancer may be useful therapeutically.</text><annotation id="2"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="97" length="19"/><text>PTH-related peptide</text></annotation><annotation id="3"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="118" length="5"/><text>PTHrP</text></annotation><annotation id="4"><infon key="NCBI Gene">5741</infon><infon key="type">Gene</infon><location offset="246" length="3"/><text>PTH</text></annotation><annotation id="5"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="340" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="6"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="394" length="5"/><text>PTHrP</text></annotation><annotation id="7"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="561" length="5"/><text>PTHrP</text></annotation><annotation id="8"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="759" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="9"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="844" length="5"/><text>PTHrP</text></annotation><annotation id="10"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="991" length="5"/><text>PTHrP</text></annotation><annotation id="11"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1021" length="5"/><text>PTHrP</text></annotation><annotation id="12"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1095" length="5"/><text>PTHrP</text></annotation><annotation id="13"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1179" length="5"/><text>PTHrP</text></annotation><annotation id="14"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1312" length="5"/><text>PTHrP</text></annotation><annotation id="15"><infon key="NCBI Gene">5745</infon><infon key="type">Gene</infon><location offset="1352" length="18"/><text>PTH/PTHrP receptor</text></annotation><annotation id="16"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1527" length="5"/><text>PTHrP</text></annotation><annotation id="17"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1670" length="5"/><text>PTHrP</text></annotation><annotation id="18"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1859" length="5"/><text>PTHrP</text></annotation><annotation id="19"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="1959" length="5"/><text>PTHrP</text></annotation><annotation id="20"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="2098" length="5"/><text>PTHrP</text></annotation><annotation id="21"><infon key="NCBI Gene">5744</infon><infon key="type">Gene</infon><location offset="2156" length="5"/><text>PTHrP</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1968" length="5"/><text>MCF-7</text></annotation></passage></document></collection>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml
index 2ed9fa4f7..dc8927c84 100644
--- a/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml
+++ b/jcore-gnp-bioc-reader/src/test/resources/test-input-path/subdir2/bioc_collection_2.xml
@@ -1,2 +1,2 @@
 <?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM
-        "/Users/faessler/Coding/git/jcore-base/jcore-gnp-bioc-reader/BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10722742</id><passage><infon key="type">title</infon><offset>0</offset><text>Mdm2 is a RING finger-dependent ubiquitin protein ligase for itself and p53.</text><annotation id="0"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="0" length="4"/><text>Mdm2</text></annotation><annotation id="1"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="72" length="3"/><text>p53</text></annotation></passage><passage><infon key="type">abstract</infon><offset>77</offset><text>Mdm2 has been shown to regulate p53 stability by targeting the p53 protein for proteasomal degradation. We now report that Mdm2 is a ubiquitin protein ligase (E3) for p53 and that its activity is dependent on its RING finger. Furthermore, we show that Mdm2 mediates its own ubiquitination in a RING finger-dependent manner, which requires no eukaryotic proteins other than ubiquitin-activating enzyme (E1) and an ubiquitin-conjugating enzyme (E2). It is apparent, therefore, that Mdm2 manifests an intrinsic capacity to mediate ubiquitination. Mutation of putative zinc coordination residues abrogated this activity, as did chelation of divalent cations. After cation chelation, the full activity could be restored by addition of zinc. We further demonstrate that the degradation of p53 and Mdm2 in cells requires additional potential zinc-coordinating residues beyond those required for the intrinsic activity of Mdm2 in vitro. Replacement of the Mdm2 RING with that of another protein (Praja1) reconstituted ubiquitination and proteasomal degradation of Mdm2. However, this RING was ineffective in ubiquitination and proteasomal targeting of p53, suggesting that there may be specificity at the level of the RING in the recognition of heterologous substrates.</text><annotation id="2"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="77" length="4"/><text>Mdm2</text></annotation><annotation id="3"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="109" length="3"/><text>p53</text></annotation><annotation id="4"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="140" length="3"/><text>p53</text></annotation><annotation id="5"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="200" length="4"/><text>Mdm2</text></annotation><annotation id="6"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="244" length="3"/><text>p53</text></annotation><annotation id="7"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="329" length="4"/><text>Mdm2</text></annotation><annotation id="8"><infon key="NCBI Gene">7318</infon><infon key="type">Gene</infon><location offset="450" length="32"/><text>ubiquitin-activating enzyme (E1)</text></annotation><annotation id="9"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="557" length="4"/><text>Mdm2</text></annotation><annotation id="10"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="860" length="3"/><text>p53</text></annotation><annotation id="11"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="868" length="4"/><text>Mdm2</text></annotation><annotation id="12"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="991" length="4"/><text>Mdm2</text></annotation><annotation id="13"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="1025" length="4"/><text>Mdm2</text></annotation><annotation id="14"><infon key="NCBI Gene">64219</infon><infon key="type">Gene</infon><location offset="1065" length="6"/><text>Praja1</text></annotation><annotation id="15"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="1133" length="4"/><text>Mdm2</text></annotation><annotation id="16"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="1221" length="3"/><text>p53</text></annotation></passage></document><document><id>1770008</id><passage><infon key="type">title</infon><offset>0</offset><text>Structural analysis and expression of human desmoglein: a cadherin-like component of the desmosome.</text><annotation id="0"><infon key="NCBI Gene">1828;281131</infon><infon key="type">Gene</infon><location offset="44" length="10"/><text>desmoglein</text></annotation><annotation id="1"><infon key="NCBI Gene">1000</infon><infon key="type">Gene</infon><location offset="58" length="8"/><text>cadherin</text></annotation><annotation id="2"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="38" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>100</offset><text>Desmosomes are adhesive cell junctions found in great abundance in tissues that experience mechanical stress. The transmembrane desmosomal glycoproteins have been proposed to play a role in cell adhesion; desmoglein I (DGI) is a major member of this class of desmosomal molecules. However, evidence supporting a role for DGI in cell adhesion or in the plaque is lacking. In order to begin to understand DGI function we have identified human cDNA clones encoding the entire mature polypeptide of 1000 amino acids. Our data suggest that like the bovine DGI molecule human DGI is highly related to the calcium-dependent class of cell adhesion molecules known as cadherins. Four related extracellular domains located in the amino-terminal domain of the molecule contain putative calcium binding sites originally identified in the cadherins. The highest degree of similarity between human N-cadherin and human DGI, and likewise between bovine DGI and human DGI, is greatest in the most amino-terminal extracellular domain. This suggests a conserved functional role for the extracellular domains, perhaps in calcium-mediated cell adhesion. The cytoplasmic portion of the molecule contains a cadherin-like region and, like bovine DGI, a carboxy-terminal tail that is not present in the cadherins, comprising three additional domains. One of these contains a novel repeating motif of 29 +/- 1 residues, first identified in bovine DGI. Each of the highly homologous repeating units is likely to consist of two beta-strands and two turns with special characteristics. Five amino acids that are identical in bovine and human DGI lie in the second of the two predicted beta-strands, and intriguingly contain putative target sites for protein kinase C. On the basis of structural analysis, a model predicting the disposition of human DGI domains in the desmosome is proposed. Northern analysis suggests that unlike bovine epidermis, which expresses a single mRNA of reported size approximately 7.6 kb, human foreskin and cultured keratinocytes display a complex pattern with bands of approximately 7.2, 4.0 and 3.0 kb. Each of these cross-hybridizing mRNAs is coordinately expressed in normal human keratinocytes in response to long-term culture and increased calcium.</text><annotation id="3"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="305" length="12"/><text>desmoglein I</text></annotation><annotation id="4"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="319" length="3"/><text>DGI</text></annotation><annotation id="5"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="421" length="3"/><text>DGI</text></annotation><annotation id="6"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="503" length="3"/><text>DGI</text></annotation><annotation id="7"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="651" length="3"/><text>DGI</text></annotation><annotation id="8"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="670" length="3"/><text>DGI</text></annotation><annotation id="9"><infon key="NCBI Gene">1000</infon><infon key="type">Gene</infon><location offset="984" length="10"/><text>N-cadherin</text></annotation><annotation id="10"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1005" length="3"/><text>DGI</text></annotation><annotation id="11"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1038" length="3"/><text>DGI</text></annotation><annotation id="12"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1052" length="3"/><text>DGI</text></annotation><annotation id="13"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1323" length="3"/><text>DGI</text></annotation><annotation id="14"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1522" length="3"/><text>DGI</text></annotation><annotation id="15"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1714" length="3"/><text>DGI</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="535" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="644" length="6"/><text>bovine</text></annotation><annotation id="18"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="664" length="5"/><text>human</text></annotation><annotation id="19"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="978" length="5"/><text>human</text></annotation><annotation id="20"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="999" length="5"/><text>human</text></annotation><annotation id="21"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1031" length="6"/><text>bovine</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1046" length="5"/><text>human</text></annotation><annotation id="23"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1316" length="6"/><text>bovine</text></annotation><annotation id="24"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1515" length="6"/><text>bovine</text></annotation><annotation id="25"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1697" length="6"/><text>bovine</text></annotation><annotation id="26"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1708" length="5"/><text>human</text></annotation><annotation id="27"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1915" length="5"/><text>human</text></annotation><annotation id="28"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="2002" length="6"/><text>bovine</text></annotation><annotation id="29"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="2089" length="5"/><text>human</text></annotation><annotation id="30"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="2280" length="5"/><text>human</text></annotation><annotation id="31"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1921" length="3"/><text>DGI</text></annotation></passage></document></collection>
\ No newline at end of file
+        "BioC.dtd"><collection><source>JCoRe GNormPlus BioC Writer</source><date>Fri Feb 18 13:55:36 CET 2022</date><key>PubTator.key</key><document><id>10722742</id><passage><infon key="type">title</infon><offset>0</offset><text>Mdm2 is a RING finger-dependent ubiquitin protein ligase for itself and p53.</text><annotation id="0"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="0" length="4"/><text>Mdm2</text></annotation><annotation id="1"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="72" length="3"/><text>p53</text></annotation></passage><passage><infon key="type">abstract</infon><offset>77</offset><text>Mdm2 has been shown to regulate p53 stability by targeting the p53 protein for proteasomal degradation. We now report that Mdm2 is a ubiquitin protein ligase (E3) for p53 and that its activity is dependent on its RING finger. Furthermore, we show that Mdm2 mediates its own ubiquitination in a RING finger-dependent manner, which requires no eukaryotic proteins other than ubiquitin-activating enzyme (E1) and an ubiquitin-conjugating enzyme (E2). It is apparent, therefore, that Mdm2 manifests an intrinsic capacity to mediate ubiquitination. Mutation of putative zinc coordination residues abrogated this activity, as did chelation of divalent cations. After cation chelation, the full activity could be restored by addition of zinc. We further demonstrate that the degradation of p53 and Mdm2 in cells requires additional potential zinc-coordinating residues beyond those required for the intrinsic activity of Mdm2 in vitro. Replacement of the Mdm2 RING with that of another protein (Praja1) reconstituted ubiquitination and proteasomal degradation of Mdm2. However, this RING was ineffective in ubiquitination and proteasomal targeting of p53, suggesting that there may be specificity at the level of the RING in the recognition of heterologous substrates.</text><annotation id="2"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="77" length="4"/><text>Mdm2</text></annotation><annotation id="3"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="109" length="3"/><text>p53</text></annotation><annotation id="4"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="140" length="3"/><text>p53</text></annotation><annotation id="5"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="200" length="4"/><text>Mdm2</text></annotation><annotation id="6"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="244" length="3"/><text>p53</text></annotation><annotation id="7"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="329" length="4"/><text>Mdm2</text></annotation><annotation id="8"><infon key="NCBI Gene">7318</infon><infon key="type">Gene</infon><location offset="450" length="32"/><text>ubiquitin-activating enzyme (E1)</text></annotation><annotation id="9"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="557" length="4"/><text>Mdm2</text></annotation><annotation id="10"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="860" length="3"/><text>p53</text></annotation><annotation id="11"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="868" length="4"/><text>Mdm2</text></annotation><annotation id="12"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="991" length="4"/><text>Mdm2</text></annotation><annotation id="13"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="1025" length="4"/><text>Mdm2</text></annotation><annotation id="14"><infon key="NCBI Gene">64219</infon><infon key="type">Gene</infon><location offset="1065" length="6"/><text>Praja1</text></annotation><annotation id="15"><infon key="NCBI Gene">4193</infon><infon key="type">Gene</infon><location offset="1133" length="4"/><text>Mdm2</text></annotation><annotation id="16"><infon key="NCBI Gene">7157</infon><infon key="type">Gene</infon><location offset="1221" length="3"/><text>p53</text></annotation></passage></document><document><id>1770008</id><passage><infon key="type">title</infon><offset>0</offset><text>Structural analysis and expression of human desmoglein: a cadherin-like component of the desmosome.</text><annotation id="0"><infon key="NCBI Gene">1828;281131</infon><infon key="type">Gene</infon><location offset="44" length="10"/><text>desmoglein</text></annotation><annotation id="1"><infon key="NCBI Gene">1000</infon><infon key="type">Gene</infon><location offset="58" length="8"/><text>cadherin</text></annotation><annotation id="2"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="38" length="5"/><text>human</text></annotation></passage><passage><infon key="type">abstract</infon><offset>100</offset><text>Desmosomes are adhesive cell junctions found in great abundance in tissues that experience mechanical stress. The transmembrane desmosomal glycoproteins have been proposed to play a role in cell adhesion; desmoglein I (DGI) is a major member of this class of desmosomal molecules. However, evidence supporting a role for DGI in cell adhesion or in the plaque is lacking. In order to begin to understand DGI function we have identified human cDNA clones encoding the entire mature polypeptide of 1000 amino acids. Our data suggest that like the bovine DGI molecule human DGI is highly related to the calcium-dependent class of cell adhesion molecules known as cadherins. Four related extracellular domains located in the amino-terminal domain of the molecule contain putative calcium binding sites originally identified in the cadherins. The highest degree of similarity between human N-cadherin and human DGI, and likewise between bovine DGI and human DGI, is greatest in the most amino-terminal extracellular domain. This suggests a conserved functional role for the extracellular domains, perhaps in calcium-mediated cell adhesion. The cytoplasmic portion of the molecule contains a cadherin-like region and, like bovine DGI, a carboxy-terminal tail that is not present in the cadherins, comprising three additional domains. One of these contains a novel repeating motif of 29 +/- 1 residues, first identified in bovine DGI. Each of the highly homologous repeating units is likely to consist of two beta-strands and two turns with special characteristics. Five amino acids that are identical in bovine and human DGI lie in the second of the two predicted beta-strands, and intriguingly contain putative target sites for protein kinase C. On the basis of structural analysis, a model predicting the disposition of human DGI domains in the desmosome is proposed. Northern analysis suggests that unlike bovine epidermis, which expresses a single mRNA of reported size approximately 7.6 kb, human foreskin and cultured keratinocytes display a complex pattern with bands of approximately 7.2, 4.0 and 3.0 kb. Each of these cross-hybridizing mRNAs is coordinately expressed in normal human keratinocytes in response to long-term culture and increased calcium.</text><annotation id="3"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="305" length="12"/><text>desmoglein I</text></annotation><annotation id="4"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="319" length="3"/><text>DGI</text></annotation><annotation id="5"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="421" length="3"/><text>DGI</text></annotation><annotation id="6"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="503" length="3"/><text>DGI</text></annotation><annotation id="7"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="651" length="3"/><text>DGI</text></annotation><annotation id="8"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="670" length="3"/><text>DGI</text></annotation><annotation id="9"><infon key="NCBI Gene">1000</infon><infon key="type">Gene</infon><location offset="984" length="10"/><text>N-cadherin</text></annotation><annotation id="10"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1005" length="3"/><text>DGI</text></annotation><annotation id="11"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1038" length="3"/><text>DGI</text></annotation><annotation id="12"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1052" length="3"/><text>DGI</text></annotation><annotation id="13"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1323" length="3"/><text>DGI</text></annotation><annotation id="14"><infon key="NCBI Gene">281131</infon><infon key="type">Gene</infon><location offset="1522" length="3"/><text>DGI</text></annotation><annotation id="15"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1714" length="3"/><text>DGI</text></annotation><annotation id="16"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="535" length="5"/><text>human</text></annotation><annotation id="17"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="644" length="6"/><text>bovine</text></annotation><annotation id="18"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="664" length="5"/><text>human</text></annotation><annotation id="19"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="978" length="5"/><text>human</text></annotation><annotation id="20"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="999" length="5"/><text>human</text></annotation><annotation id="21"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1031" length="6"/><text>bovine</text></annotation><annotation id="22"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1046" length="5"/><text>human</text></annotation><annotation id="23"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1316" length="6"/><text>bovine</text></annotation><annotation id="24"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1515" length="6"/><text>bovine</text></annotation><annotation id="25"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="1697" length="6"/><text>bovine</text></annotation><annotation id="26"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1708" length="5"/><text>human</text></annotation><annotation id="27"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="1915" length="5"/><text>human</text></annotation><annotation id="28"><infon key="NCBI Taxonomy">9913</infon><infon key="type">Species</infon><location offset="2002" length="6"/><text>bovine</text></annotation><annotation id="29"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="2089" length="5"/><text>human</text></annotation><annotation id="30"><infon key="NCBI Taxonomy">9606</infon><infon key="type">Species</infon><location offset="2280" length="5"/><text>human</text></annotation><annotation id="31"><infon key="NCBI Gene">1828</infon><infon key="type">Gene</infon><location offset="1921" length="3"/><text>DGI</text></annotation></passage></document></collection>
\ No newline at end of file

From 2afeed5a57d247e6022e97c9be30eb6ba6252508 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 18 Feb 2022 17:17:20 +0100
Subject: [PATCH 152/269] Adapt the BioC Writer test to the again
 omitted-from-text abstract section headings.

---
 .../julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
index 9f085bc0b..55601393a 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
@@ -28,7 +28,7 @@ public void populate() throws Exception {
         assertThat(resultXml).containsOnlyOnce("<text>This is the title of document 1.</text>");
         assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">title</infon>");
         // The abstract should be one single string
-        assertThat(resultXml).containsOnlyOnce("<text>BACKGROUND: This abstract section belongs to document 1. RESULTS: There are certainly some results reported by document 1.</text>");
+        assertThat(resultXml).containsOnlyOnce("<text>This abstract section belongs to document 1. There are certainly some results reported by document 1.</text>");
         assertThat(resultXml).containsOnlyOnce("INTRODUCTION");
         assertThat(resultXml).containsOnlyOnce("<infon key=\"type\">section_title</infon>");
         assertThat(resultXml).contains("<infon key=\"type\">paragraph</infon>");

From 84c42c5bc94646033688a9283829c5b3daf5db9c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 2 Mar 2022 14:41:06 +0100
Subject: [PATCH 153/269] Avoid writing empty BioC documents.

GNormPlus doesn't handle this but expects that a) there are documents in every collection and b) that each document has at least one passage.
---
 .../consumer/gnp/BioCCollectionWriter.java    |  6 ++-
 .../consumer/gnp/BioCDocumentPopulator.java   | 26 ++++++++--
 .../consumer/gnp/GNormPlusFormatWriter.java   |  5 +-
 .../gnp/GNormPlusFormatWriterTest.java        | 49 +++++++++++++++++--
 4 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
index df5b12587..9d16ba23f 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCCollectionWriter.java
@@ -19,6 +19,7 @@ public class BioCCollectionWriter {
     private Path baseDir;
     private Path currentDir;
     private int numWrittenIntoCurrentDir;
+    private int currentDirNum;
 
     public BioCCollectionWriter(int numFilesPerDir, Path baseDir) {
         this.numFilesPerDir = numFilesPerDir;
@@ -32,12 +33,13 @@ public void writeBioCCollection(BioCCollection collection) throws XMLStreamExcep
             if (currentDir == null) {
                 int i = 0;
                 do {
-                    currentDir = Path.of(baseDir.toString(), "bioc_collections_" + i++);
+                    currentDirNum = i++;
+                    currentDir = Path.of(baseDir.toString(), "bioc_collections_" + currentDirNum);
                 } while (Files.exists(currentDir));
             }
             int i = 0;
             do {
-                collectionFile = Path.of(currentDir.toString(), "bioc_collection_" + i++ + ".xml");
+                collectionFile = Path.of(currentDir.toString(), "bioc_collection_" + currentDirNum + "_" + i++ + ".xml");
             } while (Files.exists(collectionFile));
             if (!Files.exists(collectionFile.getParent())) {
                 log.debug("Creating base BioC collection directory {}", baseDir);
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 488f42613..1a2182bed 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -39,14 +39,20 @@ public BioCDocument populate(JCas jCas) {
                     case "table":
                         titleType = "table_title";
                         break;
+                    case "abstractSection":
+                        // abstract sections are part of the AbstractText which is handled below
+                        titleType = "null";
+                        break;
                     default:
                         log.debug("Unhandled title type {}", t.getTitleType());
                         titleType = "other_title";
                         break;
                 }
-                BioCPassage p = getPassageForAnnotation(t);
-                p.putInfon("type", titleType);
-                doc.addPassage(p);
+                if (titleType != null) {
+                    BioCPassage p = getPassageForAnnotation(t);
+                    p.putInfon("type", titleType);
+                    doc.addPassage(p);
+                }
             } else if (z instanceof AbstractText) {
                 AbstractText at = (AbstractText) z;
                 BioCPassage p = getPassageForAnnotation(at);
@@ -69,6 +75,18 @@ public BioCDocument populate(JCas jCas) {
         return doc;
     }
 
+//    private BioCPassage getPassageForAbstract(AbstractText at) {
+//        FSArray structuredAbstractParts = at.getStructuredAbstractParts();
+//        boolean foundAbstractParts = false;
+//        if (structuredAbstractParts != null) {
+//            for (int i = 0; i < structuredAbstractParts.size(); ++i) {
+//                AbstractSection as = (AbstractSection) structuredAbstractParts.get(i);
+//
+//            }
+//        }
+//        return null;
+//    }
+
     /**
      * Creates a BioCPassage with offset and text corresponding to the passed annotation <tt>a</tt>.
      *
@@ -78,6 +96,8 @@ public BioCDocument populate(JCas jCas) {
     private BioCPassage getPassageForAnnotation(Annotation a) {
         BioCPassage p = new BioCPassage();
         p.setOffset(a.getBegin());
+        // GNormPlus doesn't seem to handle newlines well. It resulted in missing annotations when testing if the
+        // output format is handled well by GNormPlus.
         p.setText(a.getCoveredText().replaceAll("\n", " "));
         return p;
     }
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
index 24f016a69..002407a0e 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -59,7 +59,8 @@ public void initialize(final UimaContext aContext) {
     public void process(final JCas jCas) throws AnalysisEngineProcessException {
         try {
             BioCDocument doc = bioCDocumentPopulator.populate(jCas);
-            currentCollection.addDocument(doc);
+            if (doc.getPassageCount() > 0)
+                currentCollection.addDocument(doc);
             if (currentCollection.getDocmentCount() >= numDocsPerFile) {
                 bioCCollectionWriter.writeBioCCollection(currentCollection);
                 currentCollection.clearDocuments();
@@ -75,7 +76,7 @@ public void process(final JCas jCas) throws AnalysisEngineProcessException {
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
         try {
-            if (currentCollection.getDocmentCount() != 0)
+//            if (currentCollection.getDocmentCount() != 0)
                 bioCCollectionWriter.writeBioCCollection(currentCollection);
         } catch (Exception e) {
             log.error("Could not write final batch of BioCDocuments.", e);
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
index 16a3ec233..d2d9d0f40 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
@@ -3,6 +3,8 @@
 
 import com.pengyifan.bioc.BioCCollection;
 import com.pengyifan.bioc.io.BioCCollectionReader;
+import de.julielab.jcore.types.Title;
+import de.julielab.jcore.types.pubmed.Header;
 import org.apache.commons.io.FileUtils;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -15,6 +17,7 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
+import java.util.stream.Collectors;
 
 import static org.assertj.core.api.Assertions.assertThat;
 
@@ -47,7 +50,7 @@ public void process1() throws Exception {
         writer.process(jCas);
         writer.collectionProcessComplete();
 
-        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0.xml")).exists().isNotEmptyFile();
+        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0_0.xml")).exists().isNotEmptyFile();
     }
 
     @Test
@@ -66,17 +69,55 @@ public void process2() throws Exception {
         for (int i : List.of(0, 1, 2)) {
             List<Integer> fileIndices = i < 2 ? List.of(0, 1, 2) : List.of(0,1);
             for (int j : fileIndices) {
-                assertThat(Path.of(BASEDIR.toString(), "bioc_collections_"+i, "bioc_collection_"+j+".xml")).exists().isNotEmptyFile();
+                assertThat(Path.of(BASEDIR.toString(), "bioc_collections_"+i, "bioc_collection_"+i+"_"+j+".xml")).exists().isNotEmptyFile();
             }
         }
         // there should only be two files in the last directory
-        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_2", "bioc_collection_2.xml")).doesNotExist();
+        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_2", "bioc_collection_2_2.xml")).doesNotExist();
 
         // the last file should only contain a single document
-        BioCCollectionReader reader = new BioCCollectionReader(Path.of(BASEDIR.toString(), "bioc_collections_2", "bioc_collection_1.xml"));
+        BioCCollectionReader reader = new BioCCollectionReader(Path.of(BASEDIR.toString(), "bioc_collections_2", "bioc_collection_2_1.xml"));
         BioCCollection lastCollection = reader.readCollection();
         assertThat(lastCollection.getDocmentCount()).isEqualTo(1);
 
     }
 
+    @Test
+    public void omitEmptyDocuments() throws Exception {
+        // GNormPlus doesn't handle documents well which do not have any passage. Then, at some later document in the same collection, array out of bounds exceptions appear.
+        // Make sure we just don't write empty documents. They wouldn't have any annotations anyway.
+        JCas jCas = TestDocumentGenerator.createTestJCas();
+        Header h = new Header(jCas);
+        h.setDocId("1");
+        h.addToIndexes();
+        AnalysisEngine writer = getWriterInstance(1, 1);
+        writer.process(jCas);
+        jCas.reset();
+        jCas.setDocumentText("Hello.");
+        Header h2 = new Header(jCas);
+        h2.setDocId("2");
+        h2.addToIndexes();
+        Title title = new Title(jCas, 0, 6);
+        title.setTitleType("document");
+        title.addToIndexes();
+        writer.process(jCas);
+        writer.collectionProcessComplete();
+        // assert that no empty documents were written into the collection
+        assertThat(Files.lines(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0_0.xml")).map(String::trim).collect(Collectors.joining())).doesNotContain("</id></document>");
+        assertThat(Files.lines(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0_0.xml")).map(String::trim).collect(Collectors.joining())).contains("<document><id>2</id><passage>");
+    }
+
+    @Test
+    public void omitEmptyDocuments2() throws Exception {
+        // Additionally to not writing empty documents, we also don't want to write empty collections. This, too, causes out of bounds errors in GNormPlus.
+        JCas jCas = TestDocumentGenerator.createTestJCas();
+        Header h = new Header(jCas);
+        h.setDocId("1");
+        h.addToIndexes();
+        AnalysisEngine writer = getWriterInstance(1, 1);
+        writer.process(jCas);
+        // assert that no empty documents were written into the collection
+        assertThat(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0_0.xml")).doesNotExist();
+    }
+
 }

From 15c5d5111b6e373389a13c72d9cb4f58a7b1842c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 2 Mar 2022 14:42:14 +0100
Subject: [PATCH 154/269] Add logging message for BANNER observation.

BANNER keeps on having irregular concurrency issues. Keep the debug output so we have it when we need it.
---
 .../src/main/java/banner/tagging/pipe/LemmaPOS.java           | 4 ++--
 .../java/de/julielab/jcore/ae/banner/BANNERAnnotator.java     | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
index 41a0a8e5c..e5cb62761 100644
--- a/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
+++ b/jcore-banner-ae/src/main/java/banner/tagging/pipe/LemmaPOS.java
@@ -43,13 +43,13 @@ public LemmaPOS(Lemmatiser lemmatiser, Tagger posTagger) {
     public void setLemmatiser(Lemmatiser lemmatiser) {
         initResourcesMap();
         getResources().lemmatiser = lemmatiser;
-//        System.out.println("Setting lemmatiser to " + Thread.currentThread());
+        System.out.println("Setting lemmatiser to " + Thread.currentThread() + " in object " + this);
     }
 
     public void setPosTagger(Tagger posTagger) {
         initResourcesMap();
         getResources().posTagger = posTagger;
-//        System.out.println("Setting PoS Tagger to " + Thread.currentThread());
+        System.out.println("Setting PoS Tagger to " + Thread.currentThread() + " in object " + this);
     }
 
     synchronized private void initResourcesMap() {
diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
index 9241d430f..a29132d5c 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
@@ -139,7 +139,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
                 // model is deserialized multiple times, the FeatureSet#pipe field seems to be always the
                 // exact same instance, containing a single instance of LemmaPOS (again, despite reading the model
                 // file and deserializing it multiple times). This is why the Thread -> resources map was added.
-//                System.out.println("Initializing BANNER: " + Thread.currentThread() + " with lemmatiser " + lemmatiser + " and POS tagger " + posTagger);
+                System.out.println("Initializing BANNER: " + Thread.currentThread() + " with lemmatiser " + lemmatiser + " and POS tagger " + posTagger);
                 synchronized (BANNERAnnotator.class) {
                     tagger = CRFTagger.load(modelIs, lemmatiser, posTagger, dictionary);
                 }

From ace255e8497450c366f7222562a4449669aeabbc Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 2 Mar 2022 14:43:14 +0100
Subject: [PATCH 155/269] Fix issues in PMC reader.

Avoid an NPE when a section in XML is omitted.
Add a default titleType to label elements.
---
 .../jcore/reader/pmc/parser/FrontParser.java  | 331 +++++++++---------
 .../reader/pmc/parser/NxmlDocumentParser.java |  19 +-
 .../reader/pmc/parser/SectionParser.java      |  20 +-
 .../elementproperties-no-bib-refs.yml         |   6 +
 .../pmc/resources/elementproperties.yml       |   6 +
 5 files changed, 206 insertions(+), 176 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index 124e47bef..560f9877d 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -1,11 +1,10 @@
-/** 
- * 
+/**
  * Copyright (c) 2017, JULIE Lab.
- * All rights reserved. This program and the accompanying materials 
+ * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the BSD-2-Clause License
- *
- * Author: 
- * 
+ * <p>
+ * Author:
+ * <p>
  * Description:
  **/
 package de.julielab.jcore.reader.pmc.parser;
@@ -21,6 +20,7 @@
 import de.julielab.jcore.types.pubmed.OtherID;
 import org.apache.uima.jcas.cas.FSArray;
 
+import java.io.File;
 import java.util.List;
 import java.util.Optional;
 import java.util.stream.Collectors;
@@ -28,163 +28,166 @@
 
 public class FrontParser extends NxmlElementParser {
 
-	public FrontParser(NxmlDocumentParser nxmlDocumentParser) {
-		super(nxmlDocumentParser);
-		elementName = "front";
-	}
-
-	@Override
-	protected void parseElement(ElementParsingResult frontResult) throws ElementParsingException {
-		try {
-			// Only handle the front matter of the actual article, not sub-articles
-			final String elementPath = getElementPath();
-			if (!elementPath.endsWith("/article/front")) {
-				int firstIndexAfterElement = skipElement();
-				frontResult.setLastTokenIndex(firstIndexAfterElement);
-				frontResult.setResultType(ParsingResult.ResultType.NONE);
-				return;
-			}
-
-			// title and abstract
-			parseXPath("/article/front/article-meta/title-group/article-title").ifPresent(r -> {
-				ElementParsingResult er = (ElementParsingResult) r;
-				Title articleTitle = (Title) er.getAnnotation();
-				articleTitle.setTitleType("document");
-				frontResult.addSubResult(r);
-			});
-			parseXPath("/article/front/article-meta/abstract").ifPresent(r -> {
-				ElementParsingResult er = (ElementParsingResult) r;
-				AbstractText abstractText = (AbstractText) er.getAnnotation();
-				List<AbstractSection> abstractSections = er.getSubResultAnnotations(AbstractSection.class);
-				FSArray fsArray = new FSArray(nxmlDocumentParser.cas, abstractSections.size());
-				IntStream.range(0, abstractSections.size()).forEach(i -> fsArray.set(i, abstractSections.get(i)));
-				abstractText.setStructuredAbstractParts(fsArray);
-				frontResult.addSubResult(r);
-			});
-
-			// article IDs
-			Optional<String> pmid = getXPathValue("/article/front/article-meta/article-id[@pub-id-type='pmid']");
-			Optional<String> pmcid = getXPathValue("/article/front/article-meta/article-id[@pub-id-type='pmc']");
-			Optional<String> doi = getXPathValue("/article/front/article-meta/article-id[@pub-id-type='doi']");
-
-			// publication details
-			String pubType = "";
-			String pubDateFmt = "/article/front/article-meta/pub-date[@pub-type='%s']";
-			if (xPathExists(String.format(pubDateFmt, "epub")))
-				pubType = "epub";
-			else if (xPathExists(String.format(pubDateFmt, "ppub")))
-				pubType = "ppub";
-			else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
-				pubType = "pmc-release";
-			Optional<String> year = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/year", pubType));
-			Optional<String> month = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/month", pubType));
-			Optional<String> day = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/day", pubType));
-			Optional<String> journalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3 || nxmlDocumentParser.getTagset() == Tagset.NLM_3_0
-					? getXPathValue("/article/front/journal-meta/journal-title")
-					: getXPathValue("/article/front/journal-meta/journal-title-group/journal-title");
-			// there actually might be several abbreviated titles but here, we
-			// only use the first; our type system currently cannot represent
-			// more anyway. One could try to decide for a preferred one since the
-			// abbrev-type attribute disposes the source of the abbreviated
-			// title (e.g. publisher or nlm-ta).
-			Optional<String> abbrevJournalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3 || nxmlDocumentParser.getTagset() == Tagset.NLM_3_0
-					? getXPathValue("/article/front/journal-meta/abbrev-journal-title")
-					: getXPathValue("/article/front/journal-meta/journal-title-group/abbrev-journal-title");
-			Optional<String> volume = getXPathValue("/article/front/article-meta/volume");
-			Optional<String> issue = getXPathValue("/article/front/article-meta/issue");
-			Optional<String> firstPage = getXPathValue("/article/front/article-meta/fpage");
-			Optional<String> lastPage = getXPathValue("/article/front/article-meta/lpage");
-			Optional<String> elocation = getXPathValue("/article/front/article-meta/elocation-id");
-			Optional<String> issn = getXPathValue("/article/front/journal-meta/issn[@pub-type='ppub']");
-
-			// copyright statement
-			Optional<String> copyrightStatement = getXPathValue(
-					"/article/front/article-meta/permissions/copyright-statement");
-
-			// keywords
-			Optional<List<String>> keywords = getXPathValues("/article/front/article-meta/kwd-group/kwd");
-
-			assert volume.isPresent();
-
-			Header header = new Header(nxmlDocumentParser.cas);
-			header.setSource("PubMed Central");
-			header.setComponentId(PMCReader.class.getName());
-
-			pmcid.ifPresent(id -> header.setDocId(id.startsWith("PMC") ? id : "PMC" + id));
-			pmid.ifPresent(p -> {
-				OtherID otherID = new OtherID(nxmlDocumentParser.cas);
-				otherID.setComponentId(PMCReader.class.getName());
-				otherID.setId(p);
-				otherID.setSource("PubMed");
-				FSArray otherIDs = new FSArray(nxmlDocumentParser.cas, 1);
-				otherIDs.set(0, otherID);
-				header.setOtherIDs(otherIDs);
-			});
-			doi.ifPresent(header::setDoi);
-
-			copyrightStatement.ifPresent(header::setCopyright);
-
-			Journal journal = new Journal(nxmlDocumentParser.cas);
-			journal.setComponentId(PMCReader.class.getName());
-			journalTitle.ifPresent(journal::setTitle);
-			abbrevJournalTitle.ifPresent(journal::setShortTitle);
-			volume.ifPresent(journal::setVolume);
-			issue.ifPresent(journal::setIssue);
-			issn.ifPresent(journal::setISSN);
-			String pages = null;
-			if (firstPage.isPresent() && lastPage.isPresent())
-				pages = firstPage.get() + "--" + lastPage.get();
-			else if (firstPage.isPresent())
-				pages = firstPage.get();
-			else if (elocation.isPresent())
-				pages = elocation.get();
-			journal.setPages(pages);
-			FSArray pubTypes = new FSArray(nxmlDocumentParser.cas, 1);
-			pubTypes.set(0, journal);
-			Date pubDate = new Date(nxmlDocumentParser.cas);
-			pubDate.setComponentId(PMCReader.class.getName());
-			day.map(Integer::parseInt).ifPresent(pubDate::setDay);
-			month.map(Integer::parseInt).ifPresent(pubDate::setMonth);
-			year.map(Integer::parseInt).ifPresent(pubDate::setYear);
-			journal.setPubDate(pubDate);
-			header.setPubTypeList(pubTypes);
-
-			// authors (more general: contributors; but for the moment we
-			// restrict ourselves to authors)
-			parseXPath("/article/front/article-meta/contrib-group").map(ElementParsingResult.class::cast)
-					.ifPresent(r -> {
-						// currently only authors
-						List<AuthorInfo> authors = r.getSubResults().stream().map(ElementParsingResult.class::cast)
-								.map(e -> e.getAnnotation()).filter(AuthorInfo.class::isInstance)
-								.map(AuthorInfo.class::cast).collect(Collectors.toList());
-						FSArray aiArray = new FSArray(nxmlDocumentParser.cas, authors.size());
-						IntStream.range(0, authors.size()).forEach(i -> {
-							aiArray.set(i, authors.get(i));
-						});
-						if (aiArray.size() > 0)
-							header.setAuthors(aiArray);
-					});
-
-			frontResult.setAnnotation(header);
-
-			if (keywords.isPresent()) {
-				List<String> keywordList = keywords.get();
-				FSArray fsArray = new FSArray(nxmlDocumentParser.cas, keywordList.size());
-				IntStream.range(0, keywordList.size()).forEach(i -> {
-					Keyword keyword = new Keyword(nxmlDocumentParser.cas);
-					keyword.setComponentId(PMCReader.class.getName());
-					keyword.setName(keywordList.get(i));
-					fsArray.set(i, keyword);
-				});
-				ManualDescriptor manualDescriptor = new ManualDescriptor(nxmlDocumentParser.cas);
-				manualDescriptor.setComponentId(PMCReader.class.getName());
-				manualDescriptor.setKeywordList(fsArray);
-				manualDescriptor.addToIndexes();
-			}
-
-		} catch (XPathParseException | XPathEvalException | NavException e) {
-			throw new ElementParsingException(e);
-		}
-	}
+    public FrontParser(NxmlDocumentParser nxmlDocumentParser) {
+        super(nxmlDocumentParser);
+        elementName = "front";
+    }
+
+    @Override
+    protected void parseElement(ElementParsingResult frontResult) throws ElementParsingException {
+        try {
+            // Only handle the front matter of the actual article, not sub-articles
+            final String elementPath = getElementPath();
+            if (!elementPath.endsWith("/article/front")) {
+                int firstIndexAfterElement = skipElement();
+                frontResult.setLastTokenIndex(firstIndexAfterElement);
+                frontResult.setResultType(ParsingResult.ResultType.NONE);
+                return;
+            }
+
+            // title and abstract
+            parseXPath("/article/front/article-meta/title-group/article-title").ifPresent(r -> {
+                ElementParsingResult er = (ElementParsingResult) r;
+                Title articleTitle = (Title) er.getAnnotation();
+                articleTitle.setTitleType("document");
+                frontResult.addSubResult(r);
+            });
+            parseXPath("/article/front/article-meta/abstract").ifPresent(r -> {
+                ElementParsingResult er = (ElementParsingResult) r;
+                AbstractText abstractText = (AbstractText) er.getAnnotation();
+                List<AbstractSection> abstractSections = er.getSubResultAnnotations(AbstractSection.class);
+                FSArray fsArray = new FSArray(nxmlDocumentParser.cas, abstractSections.size());
+                IntStream.range(0, abstractSections.size()).forEach(i -> fsArray.set(i, abstractSections.get(i)));
+                abstractText.setStructuredAbstractParts(fsArray);
+                frontResult.addSubResult(r);
+            });
+
+            // article IDs
+            Optional<String> pmid = getXPathValue("/article/front/article-meta/article-id[@pub-id-type='pmid']");
+            Optional<String> pmcid = getXPathValue("/article/front/article-meta/article-id[@pub-id-type='pmc']");
+            Optional<String> doi = getXPathValue("/article/front/article-meta/article-id[@pub-id-type='doi']");
+
+            // publication details
+            String pubType = "";
+            String pubDateFmt = "/article/front/article-meta/pub-date[@pub-type='%s']";
+            if (xPathExists(String.format(pubDateFmt, "epub")))
+                pubType = "epub";
+            else if (xPathExists(String.format(pubDateFmt, "ppub")))
+                pubType = "ppub";
+            else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
+                pubType = "pmc-release";
+            Optional<String> year = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/year", pubType));
+            Optional<String> month = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/month", pubType));
+            Optional<String> day = getXPathValue(String.format("/article/front/article-meta/pub-date[@pub-type='%s']/day", pubType));
+            Optional<String> journalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3 || nxmlDocumentParser.getTagset() == Tagset.NLM_3_0
+                    ? getXPathValue("/article/front/journal-meta/journal-title")
+                    : getXPathValue("/article/front/journal-meta/journal-title-group/journal-title");
+            // there actually might be several abbreviated titles but here, we
+            // only use the first; our type system currently cannot represent
+            // more anyway. One could try to decide for a preferred one since the
+            // abbrev-type attribute disposes the source of the abbreviated
+            // title (e.g. publisher or nlm-ta).
+            Optional<String> abbrevJournalTitle = nxmlDocumentParser.getTagset() == Tagset.NLM_2_3 || nxmlDocumentParser.getTagset() == Tagset.NLM_3_0
+                    ? getXPathValue("/article/front/journal-meta/abbrev-journal-title")
+                    : getXPathValue("/article/front/journal-meta/journal-title-group/abbrev-journal-title");
+            Optional<String> volume = getXPathValue("/article/front/article-meta/volume");
+            Optional<String> issue = getXPathValue("/article/front/article-meta/issue");
+            Optional<String> firstPage = getXPathValue("/article/front/article-meta/fpage");
+            Optional<String> lastPage = getXPathValue("/article/front/article-meta/lpage");
+            Optional<String> elocation = getXPathValue("/article/front/article-meta/elocation-id");
+            Optional<String> issn = getXPathValue("/article/front/journal-meta/issn[@pub-type='ppub']");
+
+            // copyright statement
+            Optional<String> copyrightStatement = getXPathValue(
+                    "/article/front/article-meta/permissions/copyright-statement");
+
+            // keywords
+            Optional<List<String>> keywords = getXPathValues("/article/front/article-meta/kwd-group/kwd");
+
+            assert volume.isPresent();
+
+            Header header = new Header(nxmlDocumentParser.cas);
+            header.setSource("PubMed Central");
+            header.setComponentId(PMCReader.class.getName());
+
+            pmcid.ifPresentOrElse(id -> header.setDocId(id.startsWith("PMC") ? id : "PMC" + id), () -> {
+                String filenameId = nxmlDocumentParser.getCurrentSource().toString().substring(nxmlDocumentParser.getCurrentSource().toString().lastIndexOf(File.separatorChar)+1, nxmlDocumentParser.getCurrentSource().toString().lastIndexOf('.'));
+                header.setDocId(filenameId.startsWith("PMC") ? filenameId : "PMC" + filenameId);
+            });
+            pmid.ifPresent(p -> {
+                OtherID otherID = new OtherID(nxmlDocumentParser.cas);
+                otherID.setComponentId(PMCReader.class.getName());
+                otherID.setId(p);
+                otherID.setSource("PubMed");
+                FSArray otherIDs = new FSArray(nxmlDocumentParser.cas, 1);
+                otherIDs.set(0, otherID);
+                header.setOtherIDs(otherIDs);
+            });
+            doi.ifPresent(header::setDoi);
+
+            copyrightStatement.ifPresent(header::setCopyright);
+
+            Journal journal = new Journal(nxmlDocumentParser.cas);
+            journal.setComponentId(PMCReader.class.getName());
+            journalTitle.ifPresent(journal::setTitle);
+            abbrevJournalTitle.ifPresent(journal::setShortTitle);
+            volume.ifPresent(journal::setVolume);
+            issue.ifPresent(journal::setIssue);
+            issn.ifPresent(journal::setISSN);
+            String pages = null;
+            if (firstPage.isPresent() && lastPage.isPresent())
+                pages = firstPage.get() + "--" + lastPage.get();
+            else if (firstPage.isPresent())
+                pages = firstPage.get();
+            else if (elocation.isPresent())
+                pages = elocation.get();
+            journal.setPages(pages);
+            FSArray pubTypes = new FSArray(nxmlDocumentParser.cas, 1);
+            pubTypes.set(0, journal);
+            Date pubDate = new Date(nxmlDocumentParser.cas);
+            pubDate.setComponentId(PMCReader.class.getName());
+            day.map(Integer::parseInt).ifPresent(pubDate::setDay);
+            month.map(Integer::parseInt).ifPresent(pubDate::setMonth);
+            year.map(Integer::parseInt).ifPresent(pubDate::setYear);
+            journal.setPubDate(pubDate);
+            header.setPubTypeList(pubTypes);
+
+            // authors (more general: contributors; but for the moment we
+            // restrict ourselves to authors)
+            parseXPath("/article/front/article-meta/contrib-group").map(ElementParsingResult.class::cast)
+                    .ifPresent(r -> {
+                        // currently only authors
+                        List<AuthorInfo> authors = r.getSubResults().stream().map(ElementParsingResult.class::cast)
+                                .map(e -> e.getAnnotation()).filter(AuthorInfo.class::isInstance)
+                                .map(AuthorInfo.class::cast).collect(Collectors.toList());
+                        FSArray aiArray = new FSArray(nxmlDocumentParser.cas, authors.size());
+                        IntStream.range(0, authors.size()).forEach(i -> {
+                            aiArray.set(i, authors.get(i));
+                        });
+                        if (aiArray.size() > 0)
+                            header.setAuthors(aiArray);
+                    });
+
+            frontResult.setAnnotation(header);
+
+            if (keywords.isPresent()) {
+                List<String> keywordList = keywords.get();
+                FSArray fsArray = new FSArray(nxmlDocumentParser.cas, keywordList.size());
+                IntStream.range(0, keywordList.size()).forEach(i -> {
+                    Keyword keyword = new Keyword(nxmlDocumentParser.cas);
+                    keyword.setComponentId(PMCReader.class.getName());
+                    keyword.setName(keywordList.get(i));
+                    fsArray.set(i, keyword);
+                });
+                ManualDescriptor manualDescriptor = new ManualDescriptor(nxmlDocumentParser.cas);
+                manualDescriptor.setComponentId(PMCReader.class.getName());
+                manualDescriptor.setKeywordList(fsArray);
+                manualDescriptor.addToIndexes();
+            }
+
+        } catch (XPathParseException | XPathEvalException | NavException e) {
+            throw new ElementParsingException(e);
+        }
+    }
 
 }
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
index 5285ee138..7bafb1a39 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/NxmlDocumentParser.java
@@ -40,9 +40,15 @@ public class NxmlDocumentParser extends NxmlParser {
     private DefaultElementParser defaultElementParser;
     private Map<String, Map<String, Object>> tagProperties;
     private Tagset tagset;
+    private Object currentSource;
 
     public void reset(File nxmlFile, JCas cas) throws DocumentParsingException {
         reset(nxmlFile.toURI(), cas);
+        currentSource = nxmlFile;
+    }
+
+    public Object getCurrentSource() {
+        return currentSource;
     }
 
     public void reset(URI uri, JCas cas) throws DocumentParsingException {
@@ -53,6 +59,7 @@ public void reset(URI uri, JCas cas) throws DocumentParsingException {
             if (gzipped)
                 is = new GZIPInputStream(is);
             reset(is, cas);
+            currentSource = uri;
         } catch (IOException e) {
             throw new DocumentParsingException(e);
         }
@@ -73,6 +80,7 @@ public void reset(InputStream is, JCas cas) throws DocumentParsingException {
             vn = vg.getNav();
             setTagset();
             setupParserRegistry();
+            currentSource = "<input stream>";
         } catch (IOException | VTDException e) {
             throw new DocumentParsingException(e);
         }
@@ -152,9 +160,14 @@ public Map<String, NxmlElementParser> getParserRegistry() {
     }
 
     public ElementParsingResult parse() throws ElementParsingException, DocumentParsingException {
-        String startingElement = moveToNextStartingTag();
-        assert startingElement.equals("article") : "Did not encounter an article element as first start element";
-        return getParser(startingElement).parse();
+        try {
+            String startingElement = moveToNextStartingTag();
+            assert startingElement.equals("article") : "Did not encounter an article element as first start element";
+            return getParser(startingElement).parse();
+        } catch (Exception e) {
+            log.error("Exception while parsing document from source {}", currentSource);
+            throw e;
+        }
     }
 
     public NxmlElementParser getParser(String tagName) {
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/SectionParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/SectionParser.java
index 6283db703..787e0e8aa 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/SectionParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/SectionParser.java
@@ -66,15 +66,17 @@ protected void parseElement(ElementParsingResult parsingResult) throws ElementPa
 				if (!secTitleAnnotations.isEmpty())
 					sectionHeading = secTitleAnnotations.get(0);
 				Section section = (Section) parsingResult.getAnnotation();
-				section.setComponentId(PMCReader.class.getName());
-				section.setSectionHeading(sectionHeading);
-				section.setDepth(depth);
-				section.setSectionId(sectionId);
-				List<ParsingResult> label = parsingResult.getSubResults("label");
-				if (!label.isEmpty()) {
-					// there is only one label element
-					ElementParsingResult labelParsingResult = (ElementParsingResult) label.get(0);
-					section.setLabel(labelParsingResult.getResultText());
+				if (section != null) {
+					section.setComponentId(PMCReader.class.getName());
+					section.setSectionHeading(sectionHeading);
+					section.setDepth(depth);
+					section.setSectionId(sectionId);
+					List<ParsingResult> label = parsingResult.getSubResults("label");
+					if (!label.isEmpty()) {
+						// there is only one label element
+						ElementParsingResult labelParsingResult = (ElementParsingResult) label.get(0);
+						section.setLabel(labelParsingResult.getResultText());
+					}
 				}
 			}
 		} catch (NavException e) {
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
index 16d5355bb..09bc0123c 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
@@ -21,6 +21,8 @@ abstract:
 label:
     block-element: true
     type: de.julielab.jcore.types.Title
+    default-feature-values:
+      titleType: other
     paths:
         - path: list-item/label
           omit-element: true
@@ -30,6 +32,10 @@ sec:
     paths:
         - path: abstract/sec
           type: de.julielab.jcore.types.AbstractSection
+    attributes:
+      - name: sec-type
+        value: supplementary-material
+        omit-element: true
 p:
     block-element: true
     type: de.julielab.jcore.types.Paragraph
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
index 321ddf287..8037e3cf4 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
@@ -24,6 +24,8 @@ abstract:
 label:
     block-element: true
     type: de.julielab.jcore.types.Title
+    default-feature-values:
+        titleType: other
     paths:
         - path: list-item/label
           omit-element: true
@@ -33,6 +35,10 @@ sec:
     paths:
         - path: abstract/sec
           type: de.julielab.jcore.types.AbstractSection
+    attributes:
+      - name: sec-type
+        value: supplementary-material
+        omit-element: true
 p:
     block-element: true
     type: de.julielab.jcore.types.Paragraph

From df18dfdc214adeb3fcae07b6bf19173667511de8 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 2 Mar 2022 14:43:35 +0100
Subject: [PATCH 156/269] Correct the XMI DB Multiplier name in its descriptor.

---
 .../julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index bd4929ad1..fa909d57c 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -4,7 +4,7 @@
     <primitive>true</primitive>
     <annotatorImplementationName>de.julielab.jcore.reader.xmi.XmiDBMultiplier</annotatorImplementationName>
     <analysisEngineMetaData>
-        <name>JCoRe Abstract Database Multiplier</name>
+        <name>JCoRe XMI Database Multiplier</name>
         <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
         <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>

From 4314bebd6138e85f02056fb033236749529f026d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Mar 2022 14:19:40 +0100
Subject: [PATCH 157/269] Descriptor enhancements.

---
 jcore-gnp-bioc-reader/component.meta                        | 6 +++---
 .../jcore/reader/desc/jcore-bnp-bioc-multiplier.xml         | 2 +-
 .../jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml         | 1 +
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/jcore-gnp-bioc-reader/component.meta b/jcore-gnp-bioc-reader/component.meta
index c3b3f6e0a..4bd445551 100644
--- a/jcore-gnp-bioc-reader/component.meta
+++ b/jcore-gnp-bioc-reader/component.meta
@@ -1,12 +1,12 @@
 {
     "categories": [
-        "ae",
-        "reader"
+        "reader",
+        "multiplier"
     ],
     "description": "A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and genes.",
     "descriptors": [
         {
-            "category": "ae",
+            "category": "multiplier",
             "location": "de.julielab.jcore.reader.desc.jcore-bnp-bioc-multiplier"
         },
         {
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 2b64be30b..018d3db3b 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -20,7 +20,7 @@
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
+            <outputsNewCASes>true</outputsNewCASes>
         </operationalProperties>
     </analysisEngineMetaData>
 </analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml b/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
index b3b5afac1..01f4ca1e3 100644
--- a/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
+++ b/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
@@ -190,6 +190,7 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
                 <import name="de.julielab.jcore.types.jcore-xmi-splitter-types" />
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
                 <import name="org.apache.uima.ducc.FlowControllerTS" />

From 1caec4e8444e6e1373a937cb8db4fc964d36eb77 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Mar 2022 16:13:04 +0100
Subject: [PATCH 158/269] Let the BioC GNormPlus reader add max XMI IDs from
 the database on request. Fixes #133.

---
 jcore-gnp-bioc-reader/pom.xml                 |  7 ++-
 .../jcore/reader/BioCCasPopulator.java        | 56 +++++++++++++++++--
 .../reader/GNormPlusFormatMultiplier.java     | 21 ++++++-
 .../desc/jcore-bnp-bioc-multiplier-reader.xml | 54 +++++++-----------
 .../reader/desc/jcore-bnp-bioc-multiplier.xml |  1 +
 .../jcore/reader/BioCCasPopulatorTest.java    |  2 +-
 .../jcore/consumer/xmi/XmiDBWriterTest.java   |  1 +
 7 files changed, 99 insertions(+), 43 deletions(-)

diff --git a/jcore-gnp-bioc-reader/pom.xml b/jcore-gnp-bioc-reader/pom.xml
index 4ca0c48b5..ccbfad282 100644
--- a/jcore-gnp-bioc-reader/pom.xml
+++ b/jcore-gnp-bioc-reader/pom.xml
@@ -3,7 +3,7 @@
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 
     <modelVersion>4.0.0</modelVersion>
-    <artifactId>jcore-bnp-bioc-reader</artifactId>
+    <artifactId>jcore-gnp-bioc-reader</artifactId>
     <packaging>jar</packaging>
     <groupId>de.julielab</groupId>
 
@@ -51,6 +51,11 @@
             <version>${jcore-utilities-version}</version>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>costosys</artifactId>
+            <version>[1.6.0, )</version>
+        </dependency>
     </dependencies>
     <name>JCoRe GNormPlus BioC Reader</name>
     <organization>
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index bfd4474e0..fa76ad27c 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -2,6 +2,8 @@
 
 import com.pengyifan.bioc.*;
 import com.pengyifan.bioc.io.BioCCollectionReader;
+import de.julielab.costosys.dbconnection.CoStoSysConnection;
+import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.types.*;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
@@ -11,9 +13,10 @@
 import javax.xml.stream.XMLStreamException;
 import java.io.IOException;
 import java.nio.file.Path;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Optional;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.*;
 import java.util.stream.Stream;
 
 /**
@@ -23,19 +26,51 @@ public class BioCCasPopulator {
 
     private final static Logger log = LoggerFactory.getLogger(BioCCasPopulator.class);
     private final BioCCollection bioCCollection;
+    private Map<String, Integer> maxXmiIdMap;
     private int pos;
 
-    public BioCCasPopulator(Path biocCollectionPath) throws XMLStreamException, IOException {
+    public BioCCasPopulator(Path biocCollectionPath, Path costosysConfiguration, String documentsTable) throws XMLStreamException, IOException, SQLException {
         try (BioCCollectionReader bioCCollectionReader = new BioCCollectionReader(biocCollectionPath)) {
             bioCCollection = bioCCollectionReader.readCollection();
         }
+        if (costosysConfiguration != null) {
+            maxXmiIdMap = new HashMap<>();
+            DataBaseConnector dbc = new DataBaseConnector(costosysConfiguration.toString());
+            try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+                retrieveMaxXmiIds(documentsTable, dbc, conn);
+            }
+        }
         pos = 0;
     }
 
-    public void populateWithNextDocument(JCas jCas) throws XMLStreamException, IOException {
+    private void retrieveMaxXmiIds(String documentsTable, DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
+        log.debug("Retrieving the max XMI IDs for the current BioC collection of size {} from the database.", bioCCollection.getDocmentCount());
+        Statement stmt = conn.createStatement();
+        StringBuilder maxIdQueryBuilder = new StringBuilder();
+        if (dbc.getActiveTableFieldConfiguration().getPrimaryKey().length > 1)
+            throw new IllegalArgumentException("The primary key of the active field schema '" + dbc.getActiveTableFieldConfiguration().getName() + "' is a compound key. Compound primary keys are currently not supported in this component.");
+        String pkString = dbc.getActiveTableFieldConfiguration().getPrimaryKeyString();
+        maxIdQueryBuilder.append("SELECT ").append(pkString).append(",max_xmi_id FROM ").append(documentsTable).append(" WHERE ").append(pkString).append(" in ").append("(");
+        for (BioCDocument document : bioCCollection.getDocuments()) {
+            String docId = document.getID();
+            maxIdQueryBuilder.append("'").append(docId).append("'").append(",");
+        }
+        // remove trailing comma
+        maxIdQueryBuilder.deleteCharAt(maxIdQueryBuilder.length() - 1);
+        maxIdQueryBuilder.append(")");
+        String maxIdQuery = maxIdQueryBuilder.toString();
+        ResultSet rs = stmt.executeQuery(maxIdQuery);
+        while (rs.next()) {
+            maxXmiIdMap.put(rs.getString(1), rs.getInt(2));
+        }
+        log.debug("Obtained {} max XMI IDs.", maxXmiIdMap.size());
+    }
+
+    public void populateWithNextDocument(JCas jCas) {
         BioCDocument document = bioCCollection.getDocument(pos++);
         setDocumentId(jCas, document);
         setDocumentText(jCas, document);
+        setMaxXmiId(jCas, document);
         Iterator<BioCAnnotation> allAnnotations = Stream.concat(document.getAnnotations().stream(), document.getPassages().stream().map(BioCPassage::getAnnotations).flatMap(Collection::stream)).iterator();
         for (BioCAnnotation annotation : (Iterable<BioCAnnotation>)() ->allAnnotations) {
             Optional<String> type = annotation.getInfon("type");
@@ -56,6 +91,17 @@ public void populateWithNextDocument(JCas jCas) throws XMLStreamException, IOExc
         }
     }
 
+    private void setMaxXmiId(JCas jCas, BioCDocument document) {
+        if (maxXmiIdMap != null) {
+            Integer maxXmiId = maxXmiIdMap.get(document.getID());
+            if (maxXmiId == null)
+                throw new IllegalStateException("No max XMI ID was obtained for the document with ID " + document.getID() + ". This means that this document is not already part of the database documents table. When adding annotations to existing database documents, make sure that all documents exist in the database already.");
+            XmiMetaData xmiMetaData = new XmiMetaData(jCas);
+            xmiMetaData.setMaxXmiId(maxXmiId);
+            xmiMetaData.addToIndexes();
+        }
+    }
+
     private void setDocumentId(JCas jCas, BioCDocument document) {
         Header h = new Header(jCas);
         h.setDocId(document.getID());
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
index 5e7d71580..654c595dc 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
@@ -1,12 +1,15 @@
 package de.julielab.jcore.reader;
 
 import de.julielab.jcore.types.casmultiplier.JCoReURI;
+import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasMultiplier_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -18,9 +21,25 @@
 @ResourceMetaData(name="GNormPlusFormatMultiplier", description = "Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.")
 public class GNormPlusFormatMultiplier extends JCasMultiplier_ImplBase {
     private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatMultiplier.class);
+    public static final String PARAM_COSTOSYS_CONFIG = "CostosysConfigFile";
+    public static final String PARAM_XMI_DOCUMENTS_TABLE = "DocumentsTable";
     private Iterator<URI> currentUriBatch;
     private BioCCasPopulator casPopulator;
 
+@ConfigurationParameter(name=PARAM_COSTOSYS_CONFIG, mandatory = false, description = "Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.")
+    private String costosysConfiguration;
+@ConfigurationParameter(name=PARAM_XMI_DOCUMENTS_TABLE, mandatory = false, description = "Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.")
+    private String documentsTable;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        costosysConfiguration = (String) aContext.getConfigParameterValue(PARAM_COSTOSYS_CONFIG);
+        documentsTable = (String) aContext.getConfigParameterValue(PARAM_XMI_DOCUMENTS_TABLE);
+        if (costosysConfiguration == null ^ documentsTable == null)
+            throw new ResourceInitializationException(new IllegalArgumentException("Either both or none parameters must be defined: " + PARAM_COSTOSYS_CONFIG + ", " + PARAM_XMI_DOCUMENTS_TABLE));
+    }
+
     @Override
     public void process(JCas jCas) throws AnalysisEngineProcessException {
         try {
@@ -39,7 +58,7 @@ public boolean hasNext() throws AnalysisEngineProcessException {
         if ((casPopulator == null || casPopulator.documentsLeftInCollection() == 0) && currentUriBatch.hasNext()) {
             URI nextUri = currentUriBatch.next();
             try {
-                casPopulator = new BioCCasPopulator(Path.of(nextUri));
+                casPopulator = new BioCCasPopulator(Path.of(nextUri), costosysConfiguration != null ? Path.of(costosysConfiguration) : null, documentsTable);
             } catch (Exception e) {
                 log.error("Could not read from {}", nextUri, e);
                 throw new AnalysisEngineProcessException(e);
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
index 7081ae596..6095bb281 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
@@ -1,58 +1,42 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <implementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplierReader</implementationName>
-    <processingResourceMetaData>
-        <name>JCoRe GNormPlus Format Multiplier Reader</name>
-        <description>A reader for the BioC XML format used by GNormPlus. Requires the matching multiplier.</description>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>GNormPlusFormatMultiplier</name>
+        <description>Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.</description>
         <configurationParameters>
             <configurationParameter>
-                <name>InputPath</name>
-                <description>Path to a directory or file to be read. In case of a directory, all files ending in .xml will be read.</description>
+                <name>CostosysConfigFile</name>
+                <description>Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.</description>
                 <type>String</type>
                 <multiValued>false</multiValued>
-                <mandatory>true</mandatory>
-            </configurationParameter>
-            <configurationParameter>
-                <name>Recursive</name>
-                <description>Whether to read also the subdirectories of the input directory, if the input path points to a directory.</description>
-                <type>Boolean</type>
-                <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
             <configurationParameter>
-                <name>BatchSize</name>
-                <description>The number of XML file URI references to send to the CAS multipliers in each work assignment. Defaults to 20.</description>
-                <type>Integer</type>
+                <name>DocumentsTable</name>
+                <description>Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.</description>
+                <type>String</type>
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
         </configurationParameters>
-        <configurationParameterSettings>
-            <nameValuePair>
-                <name>Recursive</name>
-                <value>
-                    <boolean>true</boolean>
-                </value>
-            </nameValuePair>
-            <nameValuePair>
-                <name>BatchSize</name>
-                <value>
-                    <integer>20</integer>
-                </value>
-            </nameValuePair>
-        </configurationParameterSettings>
+        <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-xmi-splitter-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>
         <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-            <outputsNewCASes>true</outputsNewCASes>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>false</outputsNewCASes>
         </operationalProperties>
-    </processingResourceMetaData>
-</collectionReaderDescription>
\ No newline at end of file
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 018d3db3b..4981c441b 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -13,6 +13,7 @@
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-xmi-splitter-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>
diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
index acea59b54..3b7e0dba5 100644
--- a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
@@ -20,7 +20,7 @@ private JCas getJCas() throws Exception {
 
     @Test
     public void populateWithNextDocument() throws Exception {
-        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources", "test-input-path", "bioc_collection_3.xml"));
+        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources", "test-input-path", "bioc_collection_3.xml"), null, null);
         assertThat(bioCCasPopulator.documentsLeftInCollection()).isEqualTo(2);
         JCas jCas = getJCas();
         bioCCasPopulator.populateWithNextDocument(jCas);
diff --git a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
index fc93a2138..306ab2820 100644
--- a/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
+++ b/jcore-xmi-db-writer/src/test/java/de/julielab/jcore/consumer/xmi/XmiDBWriterTest.java
@@ -354,4 +354,5 @@ public void testMirrorSubsetNotReset() throws Exception {
             }
         }
     }
+
 }

From a3402fab1b9b9cab77e19f7a7d2310340e294792 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Mar 2022 17:02:27 +0100
Subject: [PATCH 159/269] Correct meta typo.

---
 jcore-gnp-bioc-reader/component.meta          |  6 +--
 .../reader/GNormPlusFormatMultiplier.java     |  2 +-
 .../desc/jcore-bnp-bioc-multiplier-reader.xml | 54 ++++++++++++-------
 .../reader/desc/jcore-bnp-bioc-multiplier.xml | 21 ++++++--
 4 files changed, 57 insertions(+), 26 deletions(-)

diff --git a/jcore-gnp-bioc-reader/component.meta b/jcore-gnp-bioc-reader/component.meta
index 4bd445551..630d71b06 100644
--- a/jcore-gnp-bioc-reader/component.meta
+++ b/jcore-gnp-bioc-reader/component.meta
@@ -1,12 +1,12 @@
 {
     "categories": [
         "reader",
-        "multiplier"
+        "ae"
     ],
     "description": "A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and genes.",
     "descriptors": [
         {
-            "category": "multiplier",
+            "category": "ae",
             "location": "de.julielab.jcore.reader.desc.jcore-bnp-bioc-multiplier"
         },
         {
@@ -17,7 +17,7 @@
     "exposable": true,
     "group": "general",
     "maven-artifact": {
-        "artifactId": "jcore-bnp-bioc-reader",
+        "artifactId": "jcore-gnp-bioc-reader",
         "groupId": "de.julielab",
         "version": "2.6.0-SNAPSHOT"
     },
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
index 654c595dc..8cf8616cf 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
@@ -18,7 +18,7 @@
 import java.util.Collection;
 import java.util.Iterator;
 
-@ResourceMetaData(name="GNormPlusFormatMultiplier", description = "Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.")
+@ResourceMetaData(name="JCoRe GNormPlus BioC Format Multiplier", description = "Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.")
 public class GNormPlusFormatMultiplier extends JCasMultiplier_ImplBase {
     private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatMultiplier.class);
     public static final String PARAM_COSTOSYS_CONFIG = "CostosysConfigFile";
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
index 6095bb281..7081ae596 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
@@ -1,42 +1,58 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplier</annotatorImplementationName>
-    <analysisEngineMetaData>
-        <name>GNormPlusFormatMultiplier</name>
-        <description>Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.</description>
+    <implementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplierReader</implementationName>
+    <processingResourceMetaData>
+        <name>JCoRe GNormPlus Format Multiplier Reader</name>
+        <description>A reader for the BioC XML format used by GNormPlus. Requires the matching multiplier.</description>
         <configurationParameters>
             <configurationParameter>
-                <name>CostosysConfigFile</name>
-                <description>Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.</description>
+                <name>InputPath</name>
+                <description>Path to a directory or file to be read. In case of a directory, all files ending in .xml will be read.</description>
                 <type>String</type>
                 <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>Recursive</name>
+                <description>Whether to read also the subdirectories of the input directory, if the input path points to a directory.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
             <configurationParameter>
-                <name>DocumentsTable</name>
-                <description>Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.</description>
-                <type>String</type>
+                <name>BatchSize</name>
+                <description>The number of XML file URI references to send to the CAS multipliers in each work assignment. Defaults to 20.</description>
+                <type>Integer</type>
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
         </configurationParameters>
-        <configurationParameterSettings/>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>Recursive</name>
+                <value>
+                    <boolean>true</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>BatchSize</name>
+                <value>
+                    <integer>20</integer>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-xmi-splitter-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>
         <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
+            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
         </operationalProperties>
-    </analysisEngineMetaData>
-</analysisEngineDescription>
\ No newline at end of file
+    </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 4981c441b..6f016f017 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -4,9 +4,24 @@
     <primitive>true</primitive>
     <annotatorImplementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplier</annotatorImplementationName>
     <analysisEngineMetaData>
-        <name>GNormPlusFormatMultiplier</name>
+        <name>JCoRe GNormPlus BioC Format Multiplier</name>
         <description>Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.</description>
-        <configurationParameters/>
+        <configurationParameters>
+            <configurationParameter>
+                <name>CostosysConfigFile</name>
+                <description>Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentsTable</name>
+                <description>Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
         <configurationParameterSettings/>
         <typeSystemDescription>
             <imports>
@@ -21,7 +36,7 @@
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>true</outputsNewCASes>
+            <outputsNewCASes>false</outputsNewCASes>
         </operationalProperties>
     </analysisEngineMetaData>
 </analysisEngineDescription>
\ No newline at end of file

From 32d92a879b70e029b6c7b16046adbb7ef3e7631a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Mar 2022 17:10:37 +0100
Subject: [PATCH 160/269] Set the BioC GNP multiplier again as outputting new
 CASes.

---
 jcore-gnp-bioc-reader/component.meta                          | 4 ++--
 .../julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/jcore-gnp-bioc-reader/component.meta b/jcore-gnp-bioc-reader/component.meta
index 630d71b06..91006a646 100644
--- a/jcore-gnp-bioc-reader/component.meta
+++ b/jcore-gnp-bioc-reader/component.meta
@@ -1,12 +1,12 @@
 {
     "categories": [
         "reader",
-        "ae"
+        "multiplier"
     ],
     "description": "A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and genes.",
     "descriptors": [
         {
-            "category": "ae",
+            "category": "multiplier",
             "location": "de.julielab.jcore.reader.desc.jcore-bnp-bioc-multiplier"
         },
         {
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 6f016f017..4b329d06a 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -36,7 +36,7 @@
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
+            <outputsNewCASes>true</outputsNewCASes>
         </operationalProperties>
     </analysisEngineMetaData>
 </analysisEngineDescription>
\ No newline at end of file

From 6f4b8b9f18baf6cec065896916ed6184ed8c6e4c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Mar 2022 17:38:55 +0100
Subject: [PATCH 161/269] Add the sofa map besides the xmi IDs with the GNP
 reader.

---
 .../jcore/reader/BioCCasPopulator.java        | 27 ++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index fa76ad27c..81320a24c 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -7,6 +7,7 @@
 import de.julielab.jcore.types.*;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.jcas.cas.StringArray;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -17,6 +18,7 @@
 import java.sql.SQLException;
 import java.sql.Statement;
 import java.util.*;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
@@ -27,6 +29,7 @@ public class BioCCasPopulator {
     private final static Logger log = LoggerFactory.getLogger(BioCCasPopulator.class);
     private final BioCCollection bioCCollection;
     private Map<String, Integer> maxXmiIdMap;
+    private Map<String, String> sofaMaps;
     private int pos;
 
     public BioCCasPopulator(Path biocCollectionPath, Path costosysConfiguration, String documentsTable) throws XMLStreamException, IOException, SQLException {
@@ -35,22 +38,23 @@ public BioCCasPopulator(Path biocCollectionPath, Path costosysConfiguration, Str
         }
         if (costosysConfiguration != null) {
             maxXmiIdMap = new HashMap<>();
+            sofaMaps = new HashMap<>();
             DataBaseConnector dbc = new DataBaseConnector(costosysConfiguration.toString());
             try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
-                retrieveMaxXmiIds(documentsTable, dbc, conn);
+                retrieveXmiMetaData(documentsTable, dbc, conn);
             }
         }
         pos = 0;
     }
 
-    private void retrieveMaxXmiIds(String documentsTable, DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
+    private void retrieveXmiMetaData(String documentsTable, DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
         log.debug("Retrieving the max XMI IDs for the current BioC collection of size {} from the database.", bioCCollection.getDocmentCount());
         Statement stmt = conn.createStatement();
         StringBuilder maxIdQueryBuilder = new StringBuilder();
         if (dbc.getActiveTableFieldConfiguration().getPrimaryKey().length > 1)
             throw new IllegalArgumentException("The primary key of the active field schema '" + dbc.getActiveTableFieldConfiguration().getName() + "' is a compound key. Compound primary keys are currently not supported in this component.");
         String pkString = dbc.getActiveTableFieldConfiguration().getPrimaryKeyString();
-        maxIdQueryBuilder.append("SELECT ").append(pkString).append(",max_xmi_id FROM ").append(documentsTable).append(" WHERE ").append(pkString).append(" in ").append("(");
+        maxIdQueryBuilder.append("SELECT ").append(pkString).append(",max_xmi_id,sofa_mapping FROM ").append(documentsTable).append(" WHERE ").append(pkString).append(" in ").append("(");
         for (BioCDocument document : bioCCollection.getDocuments()) {
             String docId = document.getID();
             maxIdQueryBuilder.append("'").append(docId).append("'").append(",");
@@ -62,6 +66,10 @@ private void retrieveMaxXmiIds(String documentsTable, DataBaseConnector dbc, CoS
         ResultSet rs = stmt.executeQuery(maxIdQuery);
         while (rs.next()) {
             maxXmiIdMap.put(rs.getString(1), rs.getInt(2));
+            sofaMaps.put(rs.getString(1), rs.getString(3));
+        }
+        if (log.isTraceEnabled()) {
+            log.trace("XMI ID map sample: {}", maxXmiIdMap.entrySet().stream().limit(10).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
         }
         log.debug("Obtained {} max XMI IDs.", maxXmiIdMap.size());
     }
@@ -94,10 +102,23 @@ public void populateWithNextDocument(JCas jCas) {
     private void setMaxXmiId(JCas jCas, BioCDocument document) {
         if (maxXmiIdMap != null) {
             Integer maxXmiId = maxXmiIdMap.get(document.getID());
+            String mappingString = sofaMaps.get(document.getID());
             if (maxXmiId == null)
                 throw new IllegalStateException("No max XMI ID was obtained for the document with ID " + document.getID() + ". This means that this document is not already part of the database documents table. When adding annotations to existing database documents, make sure that all documents exist in the database already.");
             XmiMetaData xmiMetaData = new XmiMetaData(jCas);
             xmiMetaData.setMaxXmiId(maxXmiId);
+            String[] mappings = mappingString != null ? mappingString.split("\\|") : null;
+            StringArray mappingsArray = null;
+            if (mappings != null) {
+                mappingsArray = new StringArray(jCas, mappings.length);
+                for (int i = 0; i < mappings.length; i++) {
+                    String mapping = mappings[i];
+                    mappingsArray.set(i, mapping);
+                    log.trace("Retrieved sofa_id_mapping {} for document {}.", mappingsArray.get(i), document.getID());
+                }
+            }
+            if (mappingsArray != null)
+                xmiMetaData.setSofaIdMappings(mappingsArray);
             xmiMetaData.addToIndexes();
         }
     }

From 8d300903d7bb51f08a336eb2faf11b7126b7282e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 7 Mar 2022 17:45:07 +0100
Subject: [PATCH 162/269] Add biology types to GNP BioC multiplier.

---
 .../main/java/de/julielab/jcore/reader/BioCCasPopulator.java  | 4 ++--
 .../julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 81320a24c..49003430c 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -69,7 +69,8 @@ private void retrieveXmiMetaData(String documentsTable, DataBaseConnector dbc, C
             sofaMaps.put(rs.getString(1), rs.getString(3));
         }
         if (log.isTraceEnabled()) {
-            log.trace("XMI ID map sample: {}", maxXmiIdMap.entrySet().stream().limit(10).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
+            log.trace("XMI ID sample: {}", maxXmiIdMap.entrySet().stream().limit(10).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
+            log.trace("Sofa map sample: {}", sofaMaps.entrySet().stream().limit(10).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));
         }
         log.debug("Obtained {} max XMI IDs.", maxXmiIdMap.size());
     }
@@ -114,7 +115,6 @@ private void setMaxXmiId(JCas jCas, BioCDocument document) {
                 for (int i = 0; i < mappings.length; i++) {
                     String mapping = mappings[i];
                     mappingsArray.set(i, mapping);
-                    log.trace("Retrieved sofa_id_mapping {} for document {}.", mappingsArray.get(i), document.getID());
                 }
             }
             if (mappingsArray != null)
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 4b329d06a..8ee9bab7a 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -29,6 +29,7 @@
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
                 <import name="de.julielab.jcore.types.jcore-xmi-splitter-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>

From aeb6ac63d21292f6ab99d60be2710b178acb5f9d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Mar 2022 17:43:22 +0100
Subject: [PATCH 163/269] Fix a bug where the processed doc IDs in the
 XmiDateInserter were not cleared.

The clearing was missing when the pipeline did not start with the DB reader reading from a subset table.
---
 .../jcore/ae/flairner/FlairNerAnnotator.java  |  4 +++-
 .../jcore/reader/BioCCasPopulator.java        | 12 +++++++++++
 .../reader/GNormPlusFormatMultiplier.java     | 21 +++++++++++++++----
 .../GNormPlusFormatMultiplierReader.java      |  3 +++
 .../reader/desc/jcore-bnp-bioc-multiplier.xml | 13 ++++++++++--
 .../jcore/consumer/xmi/XmiDataInserter.java   | 14 ++++++++++---
 6 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
index 04d65d3cf..cf36e6c22 100644
--- a/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
+++ b/jcore-flair-ner-ae/src/main/java/de/julielab/jcore/ae/flairner/FlairNerAnnotator.java
@@ -24,6 +24,7 @@
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.DoubleArray;
 import org.apache.uima.jcas.tcas.Annotation;
@@ -174,7 +175,8 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
             }
             JCoReOverlapAnnotationIndex<InternalReference> intRefIndex = new JCoReOverlapAnnotationIndex<>(aJCas, InternalReference.type);
             final AnnotationAdderHelper helper = new AnnotationAdderHelper();
-            log.trace("Sending document sentences to flair for entity tagging.");
+            if (log.isTraceEnabled())
+            log.trace("Sending document sentences to flair for entity tagging: {}", JCasUtil.select(aJCas, Sentence.class).stream().map(Sentence::getCoveredText).collect(Collectors.toList()));
             final NerTaggingResponse taggingResponse = connector.tagSentences(StreamSupport.stream(sentIndex.spliterator(), false));
             final List<TaggedEntity> taggedEntities = taggingResponse.getTaggedEntities();
             for (TaggedEntity entity : taggedEntities) {
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 49003430c..75f58fa02 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -162,6 +162,10 @@ private void setDocumentText(JCas jCas, BioCDocument document) {
                             passageAnnotation = new Title(jCas, offset, passageEnd);
                             ((Title) passageAnnotation).setTitleType("table");
                             break;
+                        case "other_title":
+                            passageAnnotation = new Title(jCas, offset, passageEnd);
+                            ((Title) passageAnnotation).setTitleType("other");
+                            break;
                         case "abstract":
                             passageAnnotation = new AbstractText(jCas, offset, passageEnd);
                             break;
@@ -224,4 +228,12 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
     public int documentsLeftInCollection() {
         return bioCCollection.getDocmentCount() - pos;
     }
+
+    public long getCollectionTextLength() {
+        return bioCCollection.getDocuments().stream().map(BioCDocument::getPassages).flatMap(Collection::stream).mapToInt(passage -> passage.getText().orElse("").length()).sum();
+    }
+
+    public int getNumDocumentsInCollection() {
+        return bioCCollection.getDocmentCount();
+    }
 }
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
index 8cf8616cf..1739e461e 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplier.java
@@ -7,6 +7,7 @@
 import org.apache.uima.cas.AbstractCas;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
@@ -15,22 +16,27 @@
 
 import java.net.URI;
 import java.nio.file.Path;
+import java.text.DecimalFormat;
 import java.util.Collection;
 import java.util.Iterator;
 
-@ResourceMetaData(name="JCoRe GNormPlus BioC Format Multiplier", description = "Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.")
+@ResourceMetaData(name = "JCoRe GNormPlus BioC Format Multiplier", description = "Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.")
+@TypeCapability(outputs = {"de.julielab.jcore.types.Gene", "de.julielab.jcore.types.Organism"})
 public class GNormPlusFormatMultiplier extends JCasMultiplier_ImplBase {
-    private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatMultiplier.class);
     public static final String PARAM_COSTOSYS_CONFIG = "CostosysConfigFile";
     public static final String PARAM_XMI_DOCUMENTS_TABLE = "DocumentsTable";
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatMultiplier.class);
     private Iterator<URI> currentUriBatch;
     private BioCCasPopulator casPopulator;
+    private DecimalFormat df = new DecimalFormat();
 
-@ConfigurationParameter(name=PARAM_COSTOSYS_CONFIG, mandatory = false, description = "Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.")
+    @ConfigurationParameter(name = PARAM_COSTOSYS_CONFIG, mandatory = false, description = "Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.")
     private String costosysConfiguration;
-@ConfigurationParameter(name=PARAM_XMI_DOCUMENTS_TABLE, mandatory = false, description = "Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.")
+    @ConfigurationParameter(name = PARAM_XMI_DOCUMENTS_TABLE, mandatory = false, description = "Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.")
     private String documentsTable;
 
+    private long lastTimeStamp;
+
     @Override
     public void initialize(UimaContext aContext) throws ResourceInitializationException {
         super.initialize(aContext);
@@ -38,6 +44,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         documentsTable = (String) aContext.getConfigParameterValue(PARAM_XMI_DOCUMENTS_TABLE);
         if (costosysConfiguration == null ^ documentsTable == null)
             throw new ResourceInitializationException(new IllegalArgumentException("Either both or none parameters must be defined: " + PARAM_COSTOSYS_CONFIG + ", " + PARAM_XMI_DOCUMENTS_TABLE));
+        lastTimeStamp = 0;
     }
 
     @Override
@@ -58,6 +65,12 @@ public boolean hasNext() throws AnalysisEngineProcessException {
         if ((casPopulator == null || casPopulator.documentsLeftInCollection() == 0) && currentUriBatch.hasNext()) {
             URI nextUri = currentUriBatch.next();
             try {
+                if (log.isDebugEnabled() && lastTimeStamp != 0) {
+                    long collectionTextLength = casPopulator.getCollectionTextLength();
+                    long passedMillis = System.currentTimeMillis() - lastTimeStamp;
+                    log.debug("Last document batch of size {} processing time: {}s for text length of {} characters; that is {}ms per character.", casPopulator.getNumDocumentsInCollection(), passedMillis / 1000, collectionTextLength, df.format((double)passedMillis/collectionTextLength));
+                }
+                lastTimeStamp = System.currentTimeMillis();
                 casPopulator = new BioCCasPopulator(Path.of(nextUri), costosysConfiguration != null ? Path.of(costosysConfiguration) : null, documentsTable);
             } catch (Exception e) {
                 log.error("Could not read from {}", nextUri, e);
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
index dc04596e4..019437c25 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
@@ -77,6 +77,9 @@ public void getNext(JCas jCas) throws CollectionException {
                 throw new CollectionException(e);
             }
             completed++;
+            if (completed % 10 == 0) {
+                log.debug("{} input files read", completed);
+            }
         }
     }
 
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 8ee9bab7a..15f62b47b 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -33,11 +33,20 @@
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>
-        <capabilities/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs>
+                    <type>de.julielab.jcore.types.Gene</type>
+                    <type>de.julielab.jcore.types.Organism</type>
+                </outputs>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>true</outputsNewCASes>
+            <outputsNewCASes>false</outputsNewCASes>
         </operationalProperties>
     </analysisEngineMetaData>
 </analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
index 390e27e67..1cad80ec4 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
@@ -16,6 +16,7 @@
 import java.sql.BatchUpdateException;
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
+import java.text.DecimalFormat;
 import java.util.*;
 import java.util.function.Function;
 import java.util.function.Predicate;
@@ -34,6 +35,7 @@ public class XmiDataInserter {
     private Map<DocumentId, Integer> maxXmiIdMap;
     private String componentDbName;
     private String hashColumnName;
+    private DecimalFormat df = new DecimalFormat();
 
     private List<DocumentId> processedDocumentIds;
 
@@ -97,7 +99,7 @@ public boolean hasNext() {
 
             @Override
             public Map<String, Object> next() {
-                Map<String, Object> row = new HashMap<String, Object>();
+                Map<String, Object> row = new HashMap<>();
                 final DocumentId docId = docIdIterator.next();
                 // There might actually be no data when we only write the SHA hashes
                 final List<XmiData> dataList = dataByDoc.getOrDefault(docId, Collections.emptyList());
@@ -166,6 +168,7 @@ public Map<String, Object> next() {
                     row.put(hashColumnName, hash);
                     log.trace("{}={}", hashColumnName, hash);
                 }
+                System.out.println("XmiInserter: " + row);
                 return row;
             }
 
@@ -175,7 +178,9 @@ public void remove() {
             }
         }
 
+        long time = System.currentTimeMillis();
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
+            log.debug("Obtained connection after {}ms", System.currentTimeMillis()-time);
             conn.setAutoCommit(false);
 
             // This is the private in-line defined class from above. All values are already contained in the class
@@ -199,6 +204,7 @@ public void remove() {
                 throw new XmiDataInsertionException(e);
             }
             setLastComponent(conn, subsetTableName);
+            processedDocumentIds.clear();
             log.debug("Committing XMI data to database.");
             conn.commit();
             maxXmiIdMap.clear();
@@ -209,6 +215,10 @@ public void remove() {
             if (null != ne)
                 ne.printStackTrace();
         }
+        if (log.isDebugEnabled()) {
+            time = System.currentTimeMillis() - time;
+            log.debug("Database import of {} XMI documents took {}ms ({}ms per document)", documentIdsWithData.size(), time, df.format((double) time / documentIdsWithData.size()));
+        }
     }
 
     /**
@@ -261,8 +271,6 @@ private void setLastComponent(CoStoSysConnection conn, String subsetTableName) t
             else
                 nextException.printStackTrace();
             throw new XmiDataInsertionException(nextException);
-        } finally {
-            processedDocumentIds.clear();
         }
     }
 

From 086f095f6e2514016f3cb815dd955cc58891f305 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Mar 2022 17:47:40 +0100
Subject: [PATCH 164/269] Remove debug log message.

---
 .../java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
index 1cad80ec4..bcbe2f439 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
@@ -168,7 +168,6 @@ public Map<String, Object> next() {
                     row.put(hashColumnName, hash);
                     log.trace("{}={}", hashColumnName, hash);
                 }
-                System.out.println("XmiInserter: " + row);
                 return row;
             }
 

From f973d8d7c8f7040e56bb123f31d1fea48526a2ce Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Mar 2022 16:32:47 +0100
Subject: [PATCH 165/269] Omit annotation deletion for unchanged documents.

---
 .../jcore/ae/biosem/BioSemEventAnnotator.java | 13 ++++++++-
 .../jcore/consumer/xmi/XMIDBWriter.java       | 16 ++++++++---
 .../jcore/consumer/xmi/XmiDataInserter.java   | 28 ++++++++++---------
 3 files changed, 39 insertions(+), 18 deletions(-)

diff --git a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
index 12720ec9d..e263b203f 100644
--- a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
+++ b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
@@ -17,7 +17,9 @@
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ExternalResource;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.FSArray;
@@ -36,11 +38,13 @@
 import java.util.*;
 import java.util.Map.Entry;
 
+@ResourceMetaData(name="JCoRe BioSem Event Annotator", description = "Adds annotations for event triggers and events according to the BioNLP Shared Task event definition.")
 @TypeCapability(inputs = {"de.julielab.jcore.types.Gene"}, outputs = {"de.julielab.jcore.types.EventTrigger", "de.julielab.jcore.types.EventMention"})
 public class BioSemEventAnnotator extends JCasAnnotator_ImplBase {
 
 	private final static Logger log = LoggerFactory.getLogger(BioSemEventAnnotator.class);
 
+	public static final String PARAM_COMPONENT_ID = "ComponentId";
 	public final static String RESOURCE_TRAINED_DB = "TrainedDB";
 
 	private DataLoader loader;
@@ -49,6 +53,8 @@ public class BioSemEventAnnotator extends JCasAnnotator_ImplBase {
 
 	@ExternalResource(key = RESOURCE_TRAINED_DB)
 	private DBUtilsProvider dbUtilsProvider;
+	@ConfigurationParameter(name=PARAM_COMPONENT_ID, mandatory = false, defaultValue = "BioSemEventAnnotator", description = "Optional. If set, the 'componentId' feature of the created annotations will be set to the value of this parameter.")
+	private String componentId;
 
 	private EventExtraction xtr;
 
@@ -66,6 +72,7 @@ public class BioSemEventAnnotator extends JCasAnnotator_ImplBase {
 	public void initialize(UimaContext aContext) throws ResourceInitializationException {
 		super.initialize(aContext);
 		try {
+			componentId = (String) aContext.getConfigParameterValue(PARAM_COMPONENT_ID);
 			dbUtilsProvider = (DBUtilsProvider) aContext.getResourceObject(RESOURCE_TRAINED_DB);
 			trainedDb = dbUtilsProvider.getTrainedDatabase();
 		} catch (ResourceAccessException e) {
@@ -200,6 +207,7 @@ private EventMention addEventToIndexes(PData event, Map<String, Gene> proteinMap
 			PData eventArg1 = event.getPdata1();
 			PData eventArg2 = event.getPdata2();
 			uimaEvent = new EventMention(aJCas, begin, end);
+			uimaEvent.setComponentId(componentId);
 			uimaEvent.setId(event.PID);
 			uimaEvent.setSpecificType(uimaTrigger.getSpecificType());
 			uimaEvent.setTrigger(uimaTrigger);
@@ -281,6 +289,7 @@ private void addUimaEventArgument(EventMention uimaEvent, Object bioSemArg, int
 			// if we don't want to use the writer).
 			protein.setSpecificType("protein");
 			uimaArg = new ArgumentMention(aJCas, protein.getBegin(), protein.getEnd());
+			uimaArg.setComponentId(componentId);
 			uimaArg.setRef(protein);
 			uimaArg.setRole(determineArgumentRole(uimaEvent, uimaArg, argPos));
 		} else if (bioSemArg instanceof PData) {
@@ -295,9 +304,10 @@ private void addUimaEventArgument(EventMention uimaEvent, Object bioSemArg, int
 			}
 			if (null == uimaEventArg) {
 				throw new IllegalStateException("Creating UIMA EventMention annotation for BioSem event \""
-						+ eventArg.toString() + "\" failed, the UIMA EventMention is null.");
+						+ eventArg + "\" failed, the UIMA EventMention is null.");
 			}
 			uimaArg = new ArgumentMention(aJCas, uimaEventArg.getBegin(), uimaEventArg.getEnd());
+			uimaArg.setComponentId(componentId);
 			uimaArg.setRef(uimaEventArg);
 			uimaArg.setRole(determineArgumentRole(uimaEvent, uimaArg, argPos));
 		} else {
@@ -361,6 +371,7 @@ private EventTrigger addTriggerToIndexes(Word trg, JCas aJCas) {
 		int end = trg.locs[1];
 		String type = trg.type;
 		EventTrigger uimaTrigger = new EventTrigger(aJCas, begin, end);
+		uimaTrigger.setComponentId(componentId);
 		uimaTrigger.setId(id);
 		uimaTrigger.setSpecificType(type);
 		return uimaTrigger;
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index 8a085cf8b..ef6d7735c 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -252,6 +252,7 @@ public class XMIDBWriter extends JCasAnnotator_ImplBase {
     private String documentItemToHash;
     private Map<DocumentId, String> shaMap;
     private Set<DocumentId> mirrorResetIds;
+    private Set<DocumentId> unchangedDocuments;
     private String mappingCacheKey;
     private DocumentReleaseCheckpoint docReleaseCheckpoint;
     private List<DocumentId> currentDocumentIdBatch;
@@ -426,6 +427,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
             this.binaryEncoder = new BinaryJeDISNodeEncoder();
         }
         mirrorResetIds = new HashSet<>();
+        unchangedDocuments = new HashSet<>();
 
         log.info(XMIDBWriter.class.getName() + " initialized.");
         log.info("Effective document table name: {}", effectiveDocTableName);
@@ -519,6 +521,8 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             Optional<DBProcessingMetaData> metaData = metaDatas.stream().findAny();
             DocumentId docId = getDocumentId(aJCas, metaData);
             setMirrorResetStateForDocId(docId, metaData);
+            if (metaData.isPresent() && metaData.get().getIsDocumentHashUnchanged())
+                unchangedDocuments.add(docId);
             if (docId == null) {
                 log.warn("The current document does not have a document ID. It is omitted from database import.");
                 return;
@@ -571,8 +575,10 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
     private void setMirrorResetStateForDocId(DocumentId docId, Optional<DBProcessingMetaData> metaData) {
         if (metaData.isPresent()) {
             // mirror subset reset is only necessary if we store the base document in any way;
-            // additionally, we check if the document text hash key is reported to by different to its already
-            // existing database entry. Only then the mirror subsets should be reset for this document.
+            // additionally, we check if the document text hash key is reported to be different to its already
+            // existing database entry. Only then the mirror subsets should be reset for this document because only
+            // then a re-processing of the document makes sense.
+            // The isDocumentHashUnchanged feature is set by the XMLDBMultiplier.
             if (storeBaseDocument && !metaData.get().getIsDocumentHashUnchanged())
                 mirrorResetIds.add(docId);
         } else {
@@ -1022,7 +1028,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
             final boolean readyToSendData = processXmiBuffer();
             if (readyToSendData) {
                 if (!(featuresToMapDryRun && useBinaryFormat))
-                    annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, mirrorResetIds, deleteObsolete, shaMap);
+                    annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, mirrorResetIds, unchangedDocuments, deleteObsolete, shaMap);
                 else
                     log.info("The dry run to see details about features to be mapped in the binary format is activated. No contents are written into the database.");
                 log.trace("Clearing {} annotation modules", annotationModules.size());
@@ -1033,6 +1039,7 @@ public void batchProcessComplete() throws AnalysisEngineProcessException {
                     docReleaseCheckpoint.release(jedisSyncKey, currentDocumentIdBatch.stream());
                 currentDocumentIdBatch.clear();
                 mirrorResetIds.clear();
+                unchangedDocuments.clear();
             }
         } catch (XmiDataInsertionException e) {
             throw new AnalysisEngineProcessException(e);
@@ -1052,7 +1059,7 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
         try {
             processXmiBuffer();
             if (!(featuresToMapDryRun && useBinaryFormat))
-                annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, mirrorResetIds, deleteObsolete, shaMap);
+                annotationInserter.sendXmiDataToDatabase(effectiveDocTableName, annotationModules, subsetTable, mirrorResetIds, unchangedDocuments, deleteObsolete, shaMap);
             else
                 log.info("The dry run to see details about features to be mapped in the binary format is activated. No contents are written into the database.");
             annotationModules.clear();
@@ -1062,6 +1069,7 @@ public void collectionProcessComplete() throws AnalysisEngineProcessException {
                 docReleaseCheckpoint.release(jedisSyncKey, currentDocumentIdBatch.stream());
             currentDocumentIdBatch.clear();
             mirrorResetIds.clear();
+            unchangedDocuments.clear();
         } catch (XmiDataInsertionException e) {
             throw new AnalysisEngineProcessException(e);
         }
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
index bcbe2f439..d561432fe 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XmiDataInserter.java
@@ -63,35 +63,35 @@ public XmiDataInserter(Set<String> annotationModuleColumnNames,
      *
      * @param annotationModules
      * @param mirrorResetIds
+     * @param unchangedDocuments
      * @param deleteObsolete
      * @param shaMap
      * @throws XmiDataInsertionException
      * @throws AnalysisEngineProcessException
      */
-    public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> annotationModules, String subsetTableName, Set<DocumentId> mirrorResetIds, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
+    public void sendXmiDataToDatabase(String xmiTableName, List<XmiData> annotationModules, String subsetTableName, Set<DocumentId> mirrorResetIds, Set<DocumentId> unchangedDocuments, Boolean deleteObsolete, Map<DocumentId, String> shaMap) throws XmiDataInsertionException {
         log.trace("Sending {} XMI data items", annotationModules.size());
         final Map<DocumentId, List<XmiData>> dataByDoc = annotationModules.stream().collect(Collectors.groupingBy(XmiData::getDocId));
         // Collect all document IDs we want to add something for into the database. This can be annotations or the hash.
-         final Set<DocumentId> documentIdsWithData = shaMap != null ? Sets.union(dataByDoc.keySet(), shaMap.keySet()) : dataByDoc.keySet();
+        final Set<DocumentId> documentIdsWithData = shaMap != null ? Sets.union(dataByDoc.keySet(), shaMap.keySet()) : dataByDoc.keySet();
         log.trace("There are {} documents with values to be updated in the database.", documentIdsWithData.size());
         class RowIterator implements Iterator<Map<String, Object>> {
+            // Add documents that have been processed but no data. We need to do this to override potentially existing
+            // annotation values with null to remove them.
+            private Iterator<DocumentId> docIdIterator;
+            private FieldConfig fieldConfig = dbc.getFieldConfiguration(schemaDocument);
+            private List<Map<String, String>> fields = fieldConfig.getFields();
             /**
              * An iterator that always returns only rows for a subset of document IDs. Either the ones that need mirror subsets to be reset or those for which mirror subsets should not be reset.
              * @param returnDocumentsWithMirrorReset
              */
             public RowIterator(boolean returnDocumentsWithMirrorReset) {
-                Predicate<DocumentId> mirrorResetFilterPredicate = docId -> mirrorResetIds.contains(docId);
+                Predicate<DocumentId> mirrorResetFilterPredicate = docId -> !unchangedDocuments.contains(docId);
                 if (!returnDocumentsWithMirrorReset)
                     mirrorResetFilterPredicate = Predicate.not(mirrorResetFilterPredicate);
                 docIdIterator = Stream.concat(documentIdsWithData.stream(), processedDocumentIds.stream()).filter(mirrorResetFilterPredicate).distinct().iterator();
             }
 
-            // Add documents that have been processed but no data. We need to do this to override potentially existing
-            // annotation values with null to remove them.
-            private Iterator<DocumentId> docIdIterator;
-            private FieldConfig fieldConfig = dbc.getFieldConfiguration(schemaDocument);
-            private List<Map<String, String>> fields = fieldConfig.getFields();
-
             @Override
             public boolean hasNext() {
                 return docIdIterator.hasNext();
@@ -153,7 +153,9 @@ public Map<String, Object> next() {
                     missingColumns.forEach(c -> row.put(c, null));
                 }
                 // Set columns without a value to null to delete a potentially existing value.
-                if (updateMode) {
+                // But only if the document text had changed. Otherwise we would just delete all the annotations we
+                // actually want to keep.
+                if (updateMode && !unchangedDocuments.contains(docId)) {
                     Set<String> annotationColumnsWithValues = dataList.stream().map(XmiData::getColumnName).collect(Collectors.toSet());
                     log.trace("Annotation columns with values: {}", annotationColumnsWithValues);
                     final Sets.SetView<String> columnsWithoutValues = Sets.difference(annotationModuleColumnNames, annotationColumnsWithValues);
@@ -179,7 +181,7 @@ public void remove() {
 
         long time = System.currentTimeMillis();
         try (CoStoSysConnection conn = dbc.obtainOrReserveConnection()) {
-            log.debug("Obtained connection after {}ms", System.currentTimeMillis()-time);
+            log.debug("Obtained connection after {}ms", System.currentTimeMillis() - time);
             conn.setAutoCommit(false);
 
             // This is the private in-line defined class from above. All values are already contained in the class
@@ -188,10 +190,10 @@ public void remove() {
             try {
                 if (updateMode) {
                     log.debug("Updating {} XMI CAS data in database table '{}' for documents with mirror subset resets.",
-                            mirrorResetIds.size(), xmiTableName);
+                            processedDocumentIds.size() - unchangedDocuments.size(), xmiTableName);
                     dbc.updateFromRowIterator(iterator, xmiTableName, false, true, schemaDocument);
                     log.debug("Updating {} XMI CAS data in database table '{}' for documents without mirror subset resets.",
-                            annotationModules.size()-mirrorResetIds.size(), xmiTableName);
+                            unchangedDocuments.size(), xmiTableName);
                     dbc.updateFromRowIterator(new RowIterator(false), xmiTableName, false, false, schemaDocument);
                 } else {
                     log.debug("Inserting {} XMI CAS data into database table '{}'.",

From cb93c15a6c6f9576fd2d21021f3606fe5fb7cb95 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Mar 2022 13:10:36 +0100
Subject: [PATCH 166/269] Add the possibility to write gene annotations to
 BioC.

Then, we can provide GNormPlus with our own gene annotations, e.g. from flair.
---
 .../consumer/gnp/BioCDocumentPopulator.java   | 50 +++++++++-----
 .../consumer/gnp/GNormPlusFormatWriter.java   |  8 ++-
 .../gnp/BioCDocumentPopulatorTest.java        | 67 ++++++++++++++++++-
 .../consumer/gnp/TestDocumentGenerator.java   |  2 +-
 jcore-xml-db-reader/pom.xml                   |  9 +++
 .../jcore/reader/xml/XMLDBMultiplier.java     | 43 ++++++++----
 .../jcore/reader/xml/XMLDBMultiplierTest.java |  6 +-
 7 files changed, 150 insertions(+), 35 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 1a2182bed..bca360265 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -1,10 +1,13 @@
 package de.julielab.jcore.consumer.gnp;
 
+import com.pengyifan.bioc.BioCAnnotation;
 import com.pengyifan.bioc.BioCDocument;
+import com.pengyifan.bioc.BioCLocation;
 import com.pengyifan.bioc.BioCPassage;
 import de.julielab.jcore.types.*;
 import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -14,13 +17,20 @@
  */
 public class BioCDocumentPopulator {
     private final static Logger log = LoggerFactory.getLogger(BioCDocumentPopulator.class);
+    private boolean addGenes;
+
+    public BioCDocumentPopulator(boolean addGenes) {
+        this.addGenes = addGenes;
+    }
 
     public BioCDocument populate(JCas jCas) {
         BioCDocument doc = new BioCDocument(JCoReTools.getDocId(jCas));
         AnnotationIndex<Zone> zoneIndex = jCas.getAnnotationIndex(Zone.type);
+        int annotationId = 0;
         for (Zone z : zoneIndex) {
             if (z.getEnd() - z.getBegin() <= 0)
                 continue;
+            BioCPassage p = null;
             if (z instanceof Title) {
                 Title t = (Title) z;
                 String titleType;
@@ -49,43 +59,53 @@ public BioCDocument populate(JCas jCas) {
                         break;
                 }
                 if (titleType != null) {
-                    BioCPassage p = getPassageForAnnotation(t);
+                    p = getPassageForAnnotation(t);
                     p.putInfon("type", titleType);
                     doc.addPassage(p);
                 }
             } else if (z instanceof AbstractText) {
                 AbstractText at = (AbstractText) z;
-                BioCPassage p = getPassageForAnnotation(at);
+                p = getPassageForAnnotation(at);
                 p.putInfon("type", "abstract");
                 doc.addPassage(p);
             } else if (z instanceof Paragraph) {
                 Paragraph pa = (Paragraph) z;
-                BioCPassage p = getPassageForAnnotation(pa);
+                p = getPassageForAnnotation(pa);
                 p.putInfon("type", "paragraph");
                 doc.addPassage(p);
             } else if (z instanceof Caption) {
                 Caption c = (Caption) z;
-                BioCPassage p = getPassageForAnnotation(c);
+                p = getPassageForAnnotation(c);
                 if (c.getCaptionType() == null)
                     throw new IllegalArgumentException("The captionType feature is null for " + c);
                 p.putInfon("type", c.getCaptionType());
                 doc.addPassage(p);
             }
+            if (addGenes) {
+                annotationId = addGenesToPassage(jCas, z, p, annotationId);
+            }
         }
         return doc;
     }
 
-//    private BioCPassage getPassageForAbstract(AbstractText at) {
-//        FSArray structuredAbstractParts = at.getStructuredAbstractParts();
-//        boolean foundAbstractParts = false;
-//        if (structuredAbstractParts != null) {
-//            for (int i = 0; i < structuredAbstractParts.size(); ++i) {
-//                AbstractSection as = (AbstractSection) structuredAbstractParts.get(i);
-//
-//            }
-//        }
-//        return null;
-//    }
+    private int addGenesToPassage(JCas jCas, Zone z, BioCPassage p, int annotationId) {
+        if (p != null) {
+            Iterable<Gene> geneIt = JCasUtil.subiterate(jCas, Gene.class, z, false, true);
+            for (Gene g : geneIt) {
+                BioCAnnotation annotation = new BioCAnnotation(String.valueOf(annotationId++));
+                annotation.setText(g.getCoveredText());
+                String type = "Gene";
+                String specificType = g.getSpecificType().toLowerCase();
+                // 'familiy' is an entity name typo in the ProGene corpus
+                if (specificType != null && (specificType.contains("familiy") || specificType.contains("family") || specificType.contains("complex")))
+                    type = "FamilyName";
+                annotation.putInfon("type", type);
+                annotation.addLocation(new BioCLocation(g.getBegin(), g.getEnd() - g.getBegin()));
+                p.addAnnotation(annotation);
+            }
+        }
+        return annotationId;
+    }
 
     /**
      * Creates a BioCPassage with offset and text corresponding to the passed annotation <tt>a</tt>.
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
index 002407a0e..08f10fab4 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -15,6 +15,7 @@
 
 import java.nio.file.Path;
 import java.util.Date;
+import java.util.Optional;
 
 @ResourceMetaData(name = "JCoRe GNormPlus BioC Writer", description = "Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.", vendor = "JULIE Lab Jena, Germany")
 @TypeCapability(inputs = {}, outputs = {})
@@ -23,6 +24,7 @@ public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
     public static final String PARAM_NUM_DOCS_PER_FILE = "NumDocsPerFile";
     public static final String PARAM_NUM_FILES_PER_DIR = "NumFilesPerDir";
     public static final String PARAM_BASE_DIR = "BaseDirectory";
+    public static final String PARAM_ADD_GENES = "AddGenes";
     private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatWriter.class);
     @ConfigurationParameter(name = PARAM_NUM_DOCS_PER_FILE, description = "The number of documents (i.e. CASes) that should be written into a single BioC XML file.")
     private int numDocsPerFile;
@@ -30,6 +32,8 @@ public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
     private int numDocsPerDir;
     @ConfigurationParameter(name = PARAM_BASE_DIR, description = "The base directory into which to create new directories that contain the actual BioC collection files.")
     private String baseDirectory;
+    @ConfigurationParameter(name=PARAM_ADD_GENES, mandatory = false, description = "false", defaultValue = "If set to true, all Gene annotations in the CAS will be added to the BioC documents.")
+    private boolean addGenes;
 
     private BioCDocumentPopulator bioCDocumentPopulator;
     private BioCCollectionWriter bioCCollectionWriter;
@@ -44,8 +48,9 @@ public void initialize(final UimaContext aContext) {
         numDocsPerFile = (int) aContext.getConfigParameterValue(PARAM_NUM_DOCS_PER_FILE);
         numDocsPerDir = (int) aContext.getConfigParameterValue(PARAM_NUM_FILES_PER_DIR);
         baseDirectory = (String) aContext.getConfigParameterValue(PARAM_BASE_DIR);
+        addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
 
-        bioCDocumentPopulator = new BioCDocumentPopulator();
+        bioCDocumentPopulator = new BioCDocumentPopulator(addGenes);
         bioCCollectionWriter = new BioCCollectionWriter(numDocsPerDir, Path.of(baseDirectory));
 
         currentCollection = new BioCCollection("UTF-8", "1.0", new Date().toString(), true, "JCoRe GNormPlus BioC Writer", "PubTator.key");
@@ -76,7 +81,6 @@ public void process(final JCas jCas) throws AnalysisEngineProcessException {
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
         try {
-//            if (currentCollection.getDocmentCount() != 0)
                 bioCCollectionWriter.writeBioCCollection(currentCollection);
         } catch (Exception e) {
             log.error("Could not write final batch of BioCDocuments.", e);
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
index 55601393a..25dc4e0ff 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
@@ -3,6 +3,7 @@
 import com.pengyifan.bioc.BioCCollection;
 import com.pengyifan.bioc.BioCDocument;
 import com.pengyifan.bioc.io.BioCCollectionWriter;
+import de.julielab.jcore.types.Gene;
 import org.apache.uima.jcas.JCas;
 import org.junit.jupiter.api.Test;
 
@@ -14,7 +15,7 @@
 class BioCDocumentPopulatorTest {
     @Test
     public void populate() throws Exception {
-        BioCDocumentPopulator populator = new BioCDocumentPopulator();
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(false);
         JCas jCas = TestDocumentGenerator.prepareCas(1);
         BioCDocument biocDoc = populator.populate(jCas);
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -38,4 +39,68 @@ public void populate() throws Exception {
         assertThat(resultXml).containsOnlyOnce("Tab1.");
         assertThat(resultXml).containsOnlyOnce("This is the table1 caption.");
     }
+
+    @Test
+    public void populateWithGenes() throws Exception {
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(true);
+        JCas jCas = TestDocumentGenerator.prepareCas(1);
+        new Gene(jCas, 0, 4).addToIndexes();
+        new Gene(jCas, 87, 96).addToIndexes();
+        BioCDocument biocDoc = populator.populate(jCas);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        BioCCollection collection = new BioCCollection("UTF-8", "1.0", (new Date()).toString(), true, "jUnit Test", "PubTator.key");
+        collection.addDocument(biocDoc);
+        BioCCollectionWriter collectionWriter = new BioCCollectionWriter(baos);
+        collectionWriter.writeCollection(collection);
+        String resultXml = baos.toString(StandardCharsets.UTF_8);
+        assertThat(resultXml).containsOnlyOnce("<annotation id=\"0\">");
+        assertThat(resultXml).contains("<infon key=\"type\">Gene</infon>");
+        assertThat(resultXml).containsOnlyOnce("<location offset=\"0\" length=\"4\"/>");
+        assertThat(resultXml).containsOnlyOnce("<text>This</text>");
+
+        assertThat(resultXml).contains("<annotation id=\"1\">");
+        assertThat(resultXml).contains("<infon key=\"type\">Gene</infon>");
+        assertThat(resultXml).containsOnlyOnce("<location offset=\"87\" length=\"9\"/>");
+        assertThat(resultXml).containsOnlyOnce("<text>certainly</text>");
+    }
+
+    @Test
+    public void populateWithGeneFamilies() throws Exception {
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(true);
+        JCas jCas = TestDocumentGenerator.prepareCas(1);
+        Gene gene = new Gene(jCas, 0, 4);
+        gene.setSpecificType("protein_familiy_or_group");
+        gene.addToIndexes();
+        BioCDocument biocDoc = populator.populate(jCas);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        BioCCollection collection = new BioCCollection("UTF-8", "1.0", (new Date()).toString(), true, "jUnit Test", "PubTator.key");
+        collection.addDocument(biocDoc);
+        BioCCollectionWriter collectionWriter = new BioCCollectionWriter(baos);
+        collectionWriter.writeCollection(collection);
+        String resultXml = baos.toString(StandardCharsets.UTF_8);
+        assertThat(resultXml).containsOnlyOnce("<annotation id=\"0\">");
+        assertThat(resultXml).contains("<infon key=\"type\">FamilyName</infon>");
+        assertThat(resultXml).containsOnlyOnce("<location offset=\"0\" length=\"4\"/>");
+        assertThat(resultXml).containsOnlyOnce("<text>This</text>");
+    }
+
+    @Test
+    public void populateWithGeneFamilies2() throws Exception {
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(true);
+        JCas jCas = TestDocumentGenerator.prepareCas(1);
+        Gene gene = new Gene(jCas, 0, 4);
+        gene.setSpecificType("FamilyName");
+        gene.addToIndexes();
+        BioCDocument biocDoc = populator.populate(jCas);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        BioCCollection collection = new BioCCollection("UTF-8", "1.0", (new Date()).toString(), true, "jUnit Test", "PubTator.key");
+        collection.addDocument(biocDoc);
+        BioCCollectionWriter collectionWriter = new BioCCollectionWriter(baos);
+        collectionWriter.writeCollection(collection);
+        String resultXml = baos.toString(StandardCharsets.UTF_8);
+        assertThat(resultXml).containsOnlyOnce("<annotation id=\"0\">");
+        assertThat(resultXml).contains("<infon key=\"type\">FamilyName</infon>");
+        assertThat(resultXml).containsOnlyOnce("<location offset=\"0\" length=\"4\"/>");
+        assertThat(resultXml).containsOnlyOnce("<text>This</text>");
+    }
 }
\ No newline at end of file
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
index 55ca81a02..17e13f984 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/TestDocumentGenerator.java
@@ -9,7 +9,7 @@
 public class TestDocumentGenerator {
 
     public static JCas createTestJCas() throws UIMAException {
-        return JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+        return JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
     }
 
     public static JCas prepareCas(int docId) throws UIMAException {
diff --git a/jcore-xml-db-reader/pom.xml b/jcore-xml-db-reader/pom.xml
index 3342d08b7..24dd2febd 100644
--- a/jcore-xml-db-reader/pom.xml
+++ b/jcore-xml-db-reader/pom.xml
@@ -75,6 +75,15 @@
             <groupId>org.assertj</groupId>
             <artifactId>assertj-core</artifactId>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>provided</scope>
+        </dependency>
     </dependencies>
     <url>https://github.com/JULIELab/jcore-base/jcore-xml-db-reader</url>
     <licenses>
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index f3c3d7790..6f0eda6aa 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -46,6 +46,8 @@ public class XMLDBMultiplier extends DBMultiplier {
     public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
     public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
     public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
+    public static final String PARAM_ADD_TO_VISIT_KEYS = "AddToVisitKeys";
+    public static final String PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG = "AddUnchangedDocumentTextFlag";
 
     private final static Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
     /**
@@ -64,8 +66,12 @@ public class XMLDBMultiplier extends DBMultiplier {
     private String xmiStorageDataTable;
     @ConfigurationParameter(name = PARAM_TABLE_DOCUMENT_SCHEMA, mandatory = false, description = "For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the " + PARAM_TABLE_DOCUMENT + " parameter - adheres to. Only the primary key part is required for hash value retrieval.")
     private String xmiStorageDataTableSchema;
-    @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.")
+    @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. Specifies the delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. The task of the AnnotationDefinedFlowController is then to read those annotations and route the CAS accordingly.")
     private String[] toVisitKeys;
+    @ConfigurationParameter(name = PARAM_ADD_TO_VISIT_KEYS, mandatory = false, description = "Toggles the creation of annotations for the AnnotationDefinedFlowController. Only needed when such a flow controller is used in the pipeline. For details, see the description of " + PARAM_TO_VISIT_KEYS + ".")
+    private boolean addToVisitKeys;
+    @ConfigurationParameter(name = PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG, mandatory = false, description = "Toggles the addition of the 'document text is unchanged' flag. The value of this flag is determined via a SHA256 hash of the CAS document text. When " + PARAM_TABLE_DOCUMENT + " and " + PARAM_TABLE_DOCUMENT_SCHEMA + " are specified, the hash value of the document in storage is retrieved and compared to the current value. The flag is then set with respect to the comparison result.")
+    private boolean addUnchangedDocumentTextFlag;
 
 
     private Row2CasMapper row2CasMapper;
@@ -83,16 +89,20 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         xmiStorageDataTableSchema = (String) aContext.getConfigParameterValue(PARAM_TABLE_DOCUMENT_SCHEMA);
         documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
         toVisitKeys = (String[]) aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
+        addToVisitKeys = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_TO_VISIT_KEYS)).orElse(false);
+        addUnchangedDocumentTextFlag = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG)).orElse(false);
         // We don't know yet which tables to read. Thus, we leave the row mapping out.
         // We will now once the DBMultiplier#process(JCas) will have been run.
         Initializer initializer = new Initializer(mappingFileStr, null, null);
         xmlMapper = initializer.getXmlMapper();
         initialized = false;
 
-        if (!(xmiStorageDataTable == null && xmiStorageDataTableSchema == null) && !(xmiStorageDataTable != null && xmiStorageDataTableSchema != null && documentItemToHash != null)) {
-            String errorMsg = String.format("From the parameters '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA);
-            log.error(errorMsg);
-            throw new ResourceInitializationException(new IllegalArgumentException(errorMsg));
+        if ((addToVisitKeys || addUnchangedDocumentTextFlag)) {
+            if (!(xmiStorageDataTable == null && xmiStorageDataTableSchema == null) && !(xmiStorageDataTable != null && xmiStorageDataTableSchema != null && documentItemToHash != null)) {
+                String errorMsg = String.format("From the parameters '%s' and '%s' some are specified and some aren't. To activate hash value comparison in order to add aggregate component keys for CAS visit, specify all those parameters. Otherwise, specify none.", PARAM_TABLE_DOCUMENT, PARAM_TABLE_DOCUMENT_SCHEMA);
+                log.error(errorMsg);
+                throw new ResourceInitializationException(new IllegalArgumentException(errorMsg));
+            }
         }
     }
 
@@ -115,6 +125,8 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                     }
                     // The DBC is initialized in the super class in the process() method. Thus, at this point
                     // the DBC should be set.
+                    if (xmiStorageDataTable != null && !dbc.withConnectionQueryBoolean(d -> d.tableExists(xmiStorageDataTable)))
+                        throw new AnalysisEngineProcessException(new IllegalArgumentException("The data table" + xmiStorageDataTable + " to retrieve hash values from for document text change detection does not exist in the database: " + dbc.getDbURL()));
                     casPopulator = new CasPopulator(dbc, xmlMapper, row2CasMapper, rowMappingArray);
                     initialized = true;
                 }
@@ -138,7 +150,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
      * @param jCas The newly read JCas.
      */
     private void setToVisitAnnotation(JCas jCas) {
-        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable)) {
+        if (addToVisitKeys || addUnchangedDocumentTextFlag) {
             DBProcessingMetaData dbProcessingMetaData = JCasUtil.selectSingle(jCas, DBProcessingMetaData.class);
             StringArray pkArray = dbProcessingMetaData.getPrimaryKey();
             String pkString = String.join(",", pkArray.toArray());
@@ -148,14 +160,17 @@ private void setToVisitAnnotation(JCas jCas) {
                 if (existingHash.equals(newHash)) {
                     if (log.isTraceEnabled())
                         log.trace("Document {} has a document text hash that equals the one present in the database. Creating a ToVisit annotation routing it only to the components with delegate keys {}.", pkString, toVisitKeys);
-                    dbProcessingMetaData.setIsDocumentHashUnchanged(true);
-                    ToVisit toVisit = new ToVisit(jCas);
-                    if (toVisitKeys != null && toVisitKeys.length != 0) {
-                        StringArray keysArray = new StringArray(jCas, toVisitKeys.length);
-                        keysArray.copyFromArray(toVisitKeys, 0, 0, toVisitKeys.length);
-                        toVisit.setDelegateKeys(keysArray);
+                    if (addUnchangedDocumentTextFlag)
+                        dbProcessingMetaData.setIsDocumentHashUnchanged(true);
+                    if (addToVisitKeys) {
+                        ToVisit toVisit = new ToVisit(jCas);
+                        if (toVisitKeys != null && toVisitKeys.length != 0) {
+                            StringArray keysArray = new StringArray(jCas, toVisitKeys.length);
+                            keysArray.copyFromArray(toVisitKeys, 0, 0, toVisitKeys.length);
+                            toVisit.setDelegateKeys(keysArray);
+                        }
+                        toVisit.addToIndexes();
                     }
-                    toVisit.addToIndexes();
                 }
             } else {
                 log.trace("No existing hash was found for document {}", pkString);
@@ -191,7 +206,7 @@ protected List<Map<String, Object>> getAllRetrievedColumns() {
      * @throws AnalysisEngineProcessException If the SQL request fails.
      */
     private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
-        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable) && rowBatch.getIdentifiers() != null && rowBatch.getIdentifiers().size() > 0) {
+        if ((addToVisitKeys || addUnchangedDocumentTextFlag) && rowBatch.getIdentifiers() != null && rowBatch.getIdentifiers().size() > 0) {
             String hashColumn = documentItemToHash + "_sha256";
             // Extract the document IDs in this RowBatch. The IDs could be composite keys.
             List<String[]> documentIds = new ArrayList<>(rowBatch.getIdentifiers().size());
diff --git a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
index 86009735d..ae154a30f 100644
--- a/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
+++ b/jcore-xml-db-reader/src/test/java/de/julielab/jcore/reader/xml/XMLDBMultiplierTest.java
@@ -180,7 +180,8 @@ public void testHashComparison() throws Exception {
                 XMLDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
                 XMLDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
                 XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text",
-                XMLDBMultiplier.PARAM_TO_VISIT_KEYS, "ThisIsTheVisitKey"
+                XMLDBMultiplier.PARAM_TO_VISIT_KEYS, "ThisIsTheVisitKey",
+                XMLDBMultiplier.PARAM_ADD_TO_VISIT_KEYS, true
         );
         JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
         List<String> toVisitKeys = new ArrayList<>();
@@ -204,7 +205,8 @@ public void testHashComparison2() throws Exception {
                 XMLDBMultiplier.PARAM_MAPPING_FILE, Path.of("src", "test", "resources", "test-mappingfile.xml").toString(),
                 XMLDBMultiplier.PARAM_ADD_SHA_HASH, "documentText",
                 XMLDBMultiplier.PARAM_TABLE_DOCUMENT, TARGET_XMI_TABLE,
-                XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text"
+                XMLDBMultiplier.PARAM_TABLE_DOCUMENT_SCHEMA, "xmi_text",
+                XMLDBMultiplier.PARAM_ADD_TO_VISIT_KEYS, true
         );
         JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
         List<ToVisit> emptyToVisitAnnotation = new ArrayList<>();

From be429c4fe02a0fd456e669b8b58c72dbf3036c19 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Mar 2022 13:12:46 +0100
Subject: [PATCH 167/269] Add gene addition parameter to descriptor for GNP
 BioC writer.

---
 .../consumer/gnp/desc/jcore-gnp-bioc-writer.xml | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
index 524f590ea..82e53378c 100644
--- a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
+++ b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
@@ -29,12 +29,27 @@
                 <multiValued>false</multiValued>
                 <mandatory>true</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>AddGenes</name>
+                <description>false</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
-        <configurationParameterSettings/>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>AddGenes</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>

From 8524fbacb770663cf19a7262749157bf430899ed Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 17 Mar 2022 13:16:31 +0100
Subject: [PATCH 168/269] Resolves #134.

---
 .../de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index bca360265..78ec078cc 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -95,10 +95,12 @@ private int addGenesToPassage(JCas jCas, Zone z, BioCPassage p, int annotationId
                 BioCAnnotation annotation = new BioCAnnotation(String.valueOf(annotationId++));
                 annotation.setText(g.getCoveredText());
                 String type = "Gene";
-                String specificType = g.getSpecificType().toLowerCase();
+                String specificType = g.getSpecificType() != null ? g.getSpecificType().toLowerCase() : null;
                 // 'familiy' is an entity name typo in the ProGene corpus
                 if (specificType != null && (specificType.contains("familiy") || specificType.contains("family") || specificType.contains("complex")))
                     type = "FamilyName";
+                else if (specificType != null && specificType.contains("domain"))
+                    type = "DomainMotif";
                 annotation.putInfon("type", type);
                 annotation.addLocation(new BioCLocation(g.getBegin(), g.getEnd() - g.getBegin()));
                 p.addAnnotation(annotation);

From 9b5f84cec38280e8a2b7ab2e7d0905f11ee0b052 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 23 Mar 2022 18:18:55 +0100
Subject: [PATCH 169/269] Reduce database connection usage.

---
 .../java/de/julielab/jcore/reader/db/DBMultiplier.java    | 2 +-
 .../de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java | 8 +++++---
 .../java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java  | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
index c83fcaebb..b52c111c5 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
@@ -58,7 +58,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         initialized = false;
     }
 
-    private DataBaseConnector getDataBaseConnector(String costosysConfig) throws AnalysisEngineProcessException {
+    protected DataBaseConnector getDataBaseConnector(String costosysConfig) throws AnalysisEngineProcessException {
         DataBaseConnector dbc;
         try {
             dbc = new DataBaseConnector(costosysConfig);
diff --git a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
index 447e95929..eb0975888 100644
--- a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
+++ b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
@@ -84,8 +84,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 
     @Override
     public void process(JCas aJCas) throws AnalysisEngineProcessException {
-        super.process(aJCas);
         docId2HashMap = fetchCurrentHashesFromDatabase(JCasUtil.selectSingle(aJCas, RowBatch.class));
+        super.process(aJCas);
     }
 
     @Override
@@ -143,6 +143,8 @@ private void populateCas(JCas jCas, byte[][] documentData, String pkString) thro
      * @throws AnalysisEngineProcessException If the SQL request fails.
      */
     private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) throws AnalysisEngineProcessException {
+        if (dbc == null)
+            dbc = getDataBaseConnector(rowBatch.getCostosysConfiguration());
         if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable) && rowBatch.getIdentifiers() != null && rowBatch.getIdentifiers().size() > 0) {
             String hashColumn = documentItemToHash + "_sha256";
             // Extract the document IDs in this RowBatch. The IDs could be composite keys.
@@ -188,11 +190,11 @@ private Map<String, String> fetchCurrentHashesFromDatabase(RowBatch rowBatch) th
      * database, if present. If there was a hash in the database and the hash values are equal, creates the <tt>ToVisit</tt>
      * annotation and adds the toVisitKeys passed in the configuration of this component.</p>
      *
-     * @param jCas The newly read JCas.
+     * @param jCas     The newly read JCas.
      * @param pkString
      */
     private void setToVisitAnnotation(JCas jCas, String pkString) {
-        if (xmiStorageDataTable != null && dbc.tableExists(xmiStorageDataTable)) {
+        if (xmiStorageDataTable != null && xmiStorageDataTable != null) {
             String existingHash = docId2HashMap.get(pkString);
             if (existingHash != null) {
                 String newHash = getHash(jCas);
diff --git a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
index ef6d7735c..1d13802dd 100644
--- a/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
+++ b/jcore-xmi-db-writer/src/main/java/de/julielab/jcore/consumer/xmi/XMIDBWriter.java
@@ -541,7 +541,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
                         try (CoStoSysConnection costoConn = dbc.obtainOrReserveConnection()) {
                             Map<String, Boolean> mirrorSubsetNames = dbc.getMirrorSubsetNames(costoConn, effectiveDocTableName);
                             if (mirrorSubsetNames.keySet().contains(subsetTable.replace("^[^.]\\.", "")))
-                                throw new AnalysisEngineProcessException(new IllegalArgumentException("The read subset table " + subsetTable + " is a mirror subset its document table " + effectiveDocTableName + " and the base document should be stored. This base document storage would cause all its subset to reset the updated documents. Thus, the subset " + subsetTable + " would be partially reset while processing, reading the same documents over and over again. This is therefore illegal."));
+                                throw new AnalysisEngineProcessException(new IllegalArgumentException("The read subset table " + subsetTable + " is a mirror subset of the target document table " + effectiveDocTableName + " and the base document should be stored. This base document storage would cause all its subset to reset the updated documents. Thus, the subset " + subsetTable + " would be partially reset while processing, reading the same documents over and over again. This is therefore illegal."));
                         }
                     }
                 }

From 2a8b27773b114077465eeb08efa4042cc2790acb Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 23 Mar 2022 18:19:41 +0100
Subject: [PATCH 170/269] Lower "regex subsentence has invalid offsets" message
 to debug level.

---
 .../de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
index d89ca98b7..0cd7354c5 100644
--- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
+++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
@@ -366,7 +366,7 @@ private void splitAtWhitespaces(JCoReCondensedDocumentText documentText, Sentenc
                     lastEnd = s.getEnd();
                     currentSentenceLength = 0;
                 } else {
-                    LOGGER.warn("Not creating whitespace-segmented sub-sentence because its offsets would be invalid: {}-{}", subBegin, subEnd);
+                    LOGGER.debug("Not creating whitespace-segmented sub-sentence because its offsets would be invalid: {}-{}", subBegin, subEnd);
                 }
             }
             currentSentenceLength += wsMatcher.end();
@@ -379,7 +379,7 @@ private void splitAtWhitespaces(JCoReCondensedDocumentText documentText, Sentenc
             s.setComponentId(this.getClass().getName());
             subSentences.add(s);
         } else {
-            LOGGER.warn("Not creating whitespace-segmented sub-sentence because its offsets would be invalid: {}-{}", subBegin, subEnd);
+            LOGGER.debug("Not creating whitespace-segmented sub-sentence because its offsets would be invalid: {}-{}", subBegin, subEnd);
         }
     }
 

From ad533d9f11aa6ba3e680bb075aa531107f4a75e2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 25 Mar 2022 11:55:59 +0100
Subject: [PATCH 171/269] Fix a bug where a file name was expected but the XML
 source was a stream.

Some PMC documents do not contain their own ID. One measure to solve this was to use the file name or URI of the source file. However, when reading from the database, there is no such file. Instead, the file name is given by the primary key.
---
 .../jcore/multiplier/pmc/PMCDBMultiplier.java |  3 +-
 .../jcore/multiplier/pmc/ErrorTest.java       | 39 +++++++++++++++++++
 .../src/test/resources/costosys-errortest.xml | 24 ++++++++++++
 .../jcore/reader/pmc/parser/FrontParser.java  | 10 ++++-
 4 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/ErrorTest.java
 create mode 100644 jcore-pmc-db-reader/src/test/resources/costosys-errortest.xml

diff --git a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
index eb0975888..c669e4f92 100644
--- a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
+++ b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
@@ -13,6 +13,7 @@
 import de.julielab.jcore.types.pubmed.Header;
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.AbstractCas;
@@ -129,7 +130,7 @@ private void populateCas(JCas jCas, byte[][] documentData, String pkString) thro
         // It actually happens that some PMC XML documents do not contain their own ID. We can use the ID obtained
         // via the database primary key, which in turn might be derived from the original file name or some meta file.
         Header header = JCasUtil.selectSingle(jCas, Header.class);
-        if (header.getDocId().isBlank()) {
+        if (StringUtils.isBlank(header.getDocId())) {
             log.debug("Document has no docId set. Derived the ID {} from the primary key and setting it as the Header#docId feature.", pkString);
             header.setDocId(pkString);
         }
diff --git a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/ErrorTest.java b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/ErrorTest.java
new file mode 100644
index 000000000..674d61685
--- /dev/null
+++ b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/ErrorTest.java
@@ -0,0 +1,39 @@
+package de.julielab.jcore.multiplier.pmc;
+
+import de.julielab.jcore.reader.db.DBMultiplierReader;
+import de.julielab.jcore.utility.JCoReTools;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.JCasIterator;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Disabled;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+
+/**
+ * This is not as much a test as it is a facility to check error cases in isolation. The existing code
+ * reads from an XML database table and parses the PMC document from there
+ */
+@Disabled
+public class ErrorTest {
+
+    @Test
+    public void errorTest() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types");
+        CollectionReader reader = CollectionReaderFactory.createReader(DBMultiplierReader.class, DBMultiplierReader.PARAM_COSTOSYS_CONFIG_NAME, Path.of("src", "test", "resources", "costosys-errortest.xml").toString(), DBMultiplierReader.PARAM_TABLE, "_data.errordoc", DBMultiplierReader.PARAM_RESET_TABLE, true);
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(PMCDBMultiplier.class, PMCDBMultiplier.PARAM_OMIT_BIB_REFERENCES, true);
+        while (reader.hasNext()) {
+            reader.getNext(jCas.getCas());
+            JCasIterator jCasIterator = engine.processAndOutputNewCASes(jCas);
+            while (jCasIterator.hasNext()) {
+                JCas next = jCasIterator.next();
+                System.out.println(JCoReTools.getDocId(next));
+                next.release();
+            }
+        }
+    }
+}
diff --git a/jcore-pmc-db-reader/src/test/resources/costosys-errortest.xml b/jcore-pmc-db-reader/src/test/resources/costosys-errortest.xml
new file mode 100644
index 000000000..e9788a2fa
--- /dev/null
+++ b/jcore-pmc-db-reader/src/test/resources/costosys-errortest.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<databaseConnectorConfiguration>
+    <DBSchemaInformation>
+        <activePostgresSchema>public</activePostgresSchema>
+        <activeTableSchema>pmc_bulk_gzip</activeTableSchema>
+        <tableSchemas>
+            <tableSchema name="xmi_anno_view" forEach=".">
+                <field name="pmcid" type="text" xpath="PMID" primaryKey="true" retrieve="true"/>
+                <field name="xmi" type="bytea" xpath="." returnXMLFragment="true" retrieve="true" gzip="true"/>
+            </tableSchema>
+            <tableSchema name="xmi_pmc_annotation_gzip_retrieve" forEach=".">
+                <field name="pmcid" type="text" xpath="PMID" primaryKey="true" retrieve="true"/>
+                <field name="xmi" type="bytea" xpath="." returnXMLFragment="true" retrieve="true" gzip="true"/>
+            </tableSchema>
+        </tableSchemas>
+    </DBSchemaInformation>
+    <DBConnectionInformation>
+        <activeDBConnection>pmc_xml</activeDBConnection>
+        <maxActiveDBConnections>5</maxActiveDBConnections>
+        <DBConnections>
+            <DBConnection name="pmc_xml" url="jdbc:postgresql://localhost:5432/pmc_xml"/>
+        </DBConnections>
+    </DBConnectionInformation>
+</databaseConnectorConfiguration>
\ No newline at end of file
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
index 560f9877d..19a848902 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/parser/FrontParser.java
@@ -111,8 +111,14 @@ else if (xPathExists(String.format(pubDateFmt, "pmc-release")))
             header.setComponentId(PMCReader.class.getName());
 
             pmcid.ifPresentOrElse(id -> header.setDocId(id.startsWith("PMC") ? id : "PMC" + id), () -> {
-                String filenameId = nxmlDocumentParser.getCurrentSource().toString().substring(nxmlDocumentParser.getCurrentSource().toString().lastIndexOf(File.separatorChar)+1, nxmlDocumentParser.getCurrentSource().toString().lastIndexOf('.'));
-                header.setDocId(filenameId.startsWith("PMC") ? filenameId : "PMC" + filenameId);
+                // try to extract the PMCID from the file name
+                // For now, let the dot indicate that this is, indeed, a file name; the source also be an InputStream,
+                // then we don't have access to the file name
+                int dotIndex = nxmlDocumentParser.getCurrentSource().toString().lastIndexOf('.');
+                if (dotIndex > 0) {
+                    String filenameId = nxmlDocumentParser.getCurrentSource().toString().substring(nxmlDocumentParser.getCurrentSource().toString().lastIndexOf(File.separatorChar) + 1, dotIndex);
+                    header.setDocId(filenameId.startsWith("PMC") ? filenameId : "PMC" + filenameId);
+                }
             });
             pmid.ifPresent(p -> {
                 OtherID otherID = new OtherID(nxmlDocumentParser.cas);

From 8eaee4b749b23876de5ccedaaf9c09020eb373a8 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 25 Mar 2022 13:10:38 +0100
Subject: [PATCH 172/269] Fix an error due to api change in XML tools.

---
 .../java/de/julielab/jcore/multiplier/xml/XMLMultiplier.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-xml-reader/src/main/java/de/julielab/jcore/multiplier/xml/XMLMultiplier.java b/jcore-xml-reader/src/main/java/de/julielab/jcore/multiplier/xml/XMLMultiplier.java
index aafcb1e8a..4b6e4f8d1 100644
--- a/jcore-xml-reader/src/main/java/de/julielab/jcore/multiplier/xml/XMLMultiplier.java
+++ b/jcore-xml-reader/src/main/java/de/julielab/jcore/multiplier/xml/XMLMultiplier.java
@@ -189,7 +189,7 @@ public void process(JCas cas) throws AnalysisEngineProcessException {
         try {
             rowIterator = JulieXMLTools.constructRowIterator(
                     JulieXMLTools.readStream(UriUtilities.getInputStreamFromUri(new java.net.URI(currentUri)), 1024),
-                    1024, forEach, fields, currentUri);
+                    1024, forEach, fields, currentUri, true);
         } catch (IOException | URISyntaxException e) {
             throw new AnalysisEngineProcessException(e);
         }

From dcbbce60b9be72225eb500aa228f7b84f616db3f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 30 Mar 2022 09:41:30 +0200
Subject: [PATCH 173/269] Add FamilyName gene annotations to the CAS.

---
 .../jcore/reader/BioCCasPopulator.java        |  22 ++
 .../jcore/reader/BioCCasPopulatorTest.java    |  16 ++
 .../test/resources/bioc_collection_0_0.xml    | 261 ++++++++++++++++++
 3 files changed, 299 insertions(+)
 create mode 100644 jcore-gnp-bioc-reader/src/test/resources/bioc_collection_0_0.xml

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 75f58fa02..4618ef255 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -90,6 +90,9 @@ public void populateWithNextDocument(JCas jCas) {
                     case "Gene":
                         addGeneAnnotation(annotation, jCas);
                         break;
+                    case "FamilyName":
+                        addFamilyAnnotation(annotation, jCas);
+                        break;
                     case "Species":
                         addSpeciesAnnotation(annotation, jCas);
                         break;
@@ -100,6 +103,7 @@ public void populateWithNextDocument(JCas jCas) {
         }
     }
 
+
     private void setMaxXmiId(JCas jCas, BioCDocument document) {
         if (maxXmiIdMap != null) {
             Integer maxXmiId = maxXmiIdMap.get(document.getID());
@@ -215,6 +219,7 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
         // for GNormPlus, there are no discontinuing annotations anyway
         BioCLocation location = annotation.getTotalLocation();
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
+        gene.setSpecificType("Gene");
         ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
         resourceEntry.setSource("NCBI Gene");
         resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
@@ -225,6 +230,23 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
         gene.addToIndexes();
     }
 
+    private void addFamilyAnnotation(BioCAnnotation annotation, JCas jCas) {
+        // the "total location" is the span from the minimum location value to the maximum location value;
+        // for GNormPlus, there are no discontinuing annotations anyway
+        BioCLocation location = annotation.getTotalLocation();
+        Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
+        gene.setSpecificType("FamilyName");
+        // e.g.  <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
+        Optional<String> focusSpecies = annotation.getInfon("FocusSpecies");
+        if (!focusSpecies.isPresent())
+            throw new IllegalStateException("A FamilyName annotation does not specify its species: " + annotation);
+        String taxId = focusSpecies.get().substring(15);
+        StringArray speciesArray = new StringArray(jCas, 1);
+        speciesArray.set(0, taxId);
+        gene.setSpecies(speciesArray);
+        gene.addToIndexes();
+    }
+
     public int documentsLeftInCollection() {
         return bioCCollection.getDocmentCount() - pos;
     }
diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
index 3b7e0dba5..b93ad6c46 100644
--- a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
@@ -59,4 +59,20 @@ public void populateWithNextDocument() throws Exception {
         }
         assertThat(organisms).extracting(Organism::getCoveredText).contains("human", "patients", "rat", "retrovirus", "ZR-75-1");
     }
+
+    @Test
+    public void addFamilyNames() throws Exception {
+        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources","bioc_collection_0_0.xml"), null, null);
+        JCas jCas = getJCas();
+        bioCCasPopulator.populateWithNextDocument(jCas);
+
+        Collection<Gene> genes = JCasUtil.select(jCas, Gene.class);
+        assertThat(genes).hasSize(23);
+        assertThat(genes).filteredOn(Gene::getSpecificType, "FamilyName").hasSize(5);
+        for (Gene o : genes) {
+            if (o.getSpecificType().equals("FamilyName")) {
+                assertThat(o.getSpecies(0)).isEqualTo("9606");
+            }
+        }
+    }
 }
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/bioc_collection_0_0.xml b/jcore-gnp-bioc-reader/src/test/resources/bioc_collection_0_0.xml
new file mode 100644
index 000000000..46dc0e704
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/resources/bioc_collection_0_0.xml
@@ -0,0 +1,261 @@
+<?xml version="1.0" ?>
+<!DOCTYPE collection
+  SYSTEM 'BioC.dtd'>
+<collection>
+    <source>JCoRe GNormPlus BioC Writer</source>
+    <date>Wed Mar 02 14:58:28 CET 2022</date>
+    <key>PubTator.key</key>
+    <document>
+        <id>10885490</id>
+        <passage>
+            <infon key="type">title</infon>
+            <offset>0</offset>
+            <text>Decreased plasma cholesterol esterification and cholesteryl ester transfer in hypopituitary patients on glucocorticoid replacement therapy.</text>
+            <annotation id="0">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="92"/>
+                <text>patients</text>
+            </annotation>
+        </passage>
+        <passage>
+            <infon key="type">abstract</infon>
+            <offset>140</offset>
+            <text>Cardiovascular risk is increased in hypopituitary patients. No data are available with respect to the effect of glucocorticoid replacement therapy on high density lipoproteins (HDL) metabolism in such patients. Plasma lecithin:cholesterol acyl transferase (LCAT), cholesteryl ester transfer protein (CETP) and phospholipid transfer protein (PLTP) are important determinants of HDL remodelling. The possible influence of conventional glucocorticoid replacement on plasma lipids, plasma LCAT, CETP and PLTP activity levels, as well as on plasma cholesterol esterification (EST) and cholesteryl ester transfer (CET) was evaluated in 24 consecutive hypopituitary patients (12 men and 12 women) with untreated growth hormone deficiency of whom 17 had adrenal insufficiency and were treated with cortisone acetate, 25 to 37.5 mg daily. Twenty-three patients were on stable levothyroxin therapy and 22 patients used sex steroids. Urinary excretion of cortisol and cortisone metabolites was higher (p&lt;0.001) in glucocorticoid-treated patients. Body mass index (p&lt;0.08) and fat mass (p&lt;0.12) were not significantly different in patients receiving and not receiving glucocorticoids. Fasting blood glucose, plasma insulin and insulin resistance were similar in the groups. Plasma total (p&lt;0.05) and very low+low density lipoprotein cholesterol (p&lt;0.01) were lower in patients receiving glucocorticoids, whereas HDL cholesterol and plasma triglycerides were not different between patients treated and not treated with glucocorticoids. Plasma LCAT activity was 45% lower (p&lt;0.02) and CETP activity was 34% lower (p&lt;0.05) in patients on glucocorticoid treatment. Multiple regression analysis showed that these effects were independent of gender and fat mass. In glucocorticoid-receiving patients, plasma EST and CET were decreased by 80% (p&lt;0.01) and by 58% (p&lt;0.05), respectively. These changes were at least partly attributable to lower LCAT and CETP activity levels. In contrast, plasma PLTP activity was not different between patients with and without glucocorticoid treatment, suggesting that exogenous glucocorticoids exert a different regulatory effect on plasma CETP compared to PLTP. In conclusion, this preliminary study suggests that conventional glucocorticoid replacement in hypopituitary patients is associated with a decrease in plasma cholesterol esterification and cholesteryl ester transfer, indicating that these steps in HDL metabolism are impaired. Such abnormalities in HDL metabolism could be involved in increased cardiovascular risk in glucocorticoid-treated hypopituitary patients, despite a lack of deterioration in plasma lipids.</text>
+            <annotation id="1">
+                <infon key="NCBI Gene">3931</infon>
+                <infon key="type">Gene</infon>
+                <location length="37" offset="358"/>
+                <text>lecithin:cholesterol acyl transferase</text>
+            </annotation>
+            <annotation id="2">
+                <infon key="NCBI Gene">3931</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="397"/>
+                <text>LCAT</text>
+            </annotation>
+            <annotation id="3">
+                <infon key="NCBI Gene">1071</infon>
+                <infon key="type">Gene</infon>
+                <location length="34" offset="404"/>
+                <text>cholesteryl ester transfer protein</text>
+            </annotation>
+            <annotation id="4">
+                <infon key="NCBI Gene">1071</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="440"/>
+                <text>CETP</text>
+            </annotation>
+            <annotation id="5">
+                <infon key="NCBI Gene">5360</infon>
+                <infon key="type">Gene</infon>
+                <location length="29" offset="450"/>
+                <text>phospholipid transfer protein</text>
+            </annotation>
+            <annotation id="6">
+                <infon key="NCBI Gene">5360</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="481"/>
+                <text>PLTP</text>
+            </annotation>
+            <annotation id="7">
+                <infon key="NCBI Gene">3931</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="625"/>
+                <text>LCAT</text>
+            </annotation>
+            <annotation id="8">
+                <infon key="NCBI Gene">1071</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="631"/>
+                <text>CETP</text>
+            </annotation>
+            <annotation id="9">
+                <infon key="NCBI Gene">5360</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="640"/>
+                <text>PLTP</text>
+            </annotation>
+            <annotation id="10">
+                <infon key="NCBI Gene">3630</infon>
+                <infon key="type">Gene</infon>
+                <location length="7" offset="1343"/>
+                <text>insulin</text>
+            </annotation>
+            <annotation id="11">
+                <infon key="NCBI Gene">3630</infon>
+                <infon key="type">Gene</infon>
+                <location length="7" offset="1355"/>
+                <text>insulin</text>
+            </annotation>
+            <annotation id="12">
+                <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
+                <infon key="type">FamilyName</infon>
+                <location length="3" offset="1540"/>
+                <text>HDL</text>
+            </annotation>
+            <annotation id="13">
+                <infon key="NCBI Gene">3931</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="1670"/>
+                <text>LCAT</text>
+            </annotation>
+            <annotation id="14">
+                <infon key="NCBI Gene">1071</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="1711"/>
+                <text>CETP</text>
+            </annotation>
+            <annotation id="15">
+                <infon key="NCBI Gene">3931</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="2065"/>
+                <text>LCAT</text>
+            </annotation>
+            <annotation id="16">
+                <infon key="NCBI Gene">1071</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="2074"/>
+                <text>CETP</text>
+            </annotation>
+            <annotation id="17">
+                <infon key="NCBI Gene">5360</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="2116"/>
+                <text>PLTP</text>
+            </annotation>
+            <annotation id="18">
+                <infon key="NCBI Gene">1071</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="2296"/>
+                <text>CETP</text>
+            </annotation>
+            <annotation id="19">
+                <infon key="NCBI Gene">5360</infon>
+                <infon key="type">Gene</infon>
+                <location length="4" offset="2313"/>
+                <text>PLTP</text>
+            </annotation>
+            <annotation id="20">
+                <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
+                <infon key="type">FamilyName</infon>
+                <location length="3" offset="2618"/>
+                <text>HDL</text>
+            </annotation>
+            <annotation id="21">
+                <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
+                <infon key="type">FamilyName</infon>
+                <location length="3" offset="2567"/>
+                <text>HDL</text>
+            </annotation>
+            <annotation id="22">
+                <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
+                <infon key="type">FamilyName</infon>
+                <location length="3" offset="517"/>
+                <text>HDL</text>
+            </annotation>
+            <annotation id="23">
+                <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
+                <infon key="type">FamilyName</infon>
+                <location length="3" offset="317"/>
+                <text>HDL</text>
+            </annotation>
+            <annotation id="24">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="190"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="25">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="341"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="26">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="799"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="27">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="3" offset="812"/>
+                <text>men</text>
+            </annotation>
+            <annotation id="28">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="5" offset="823"/>
+                <text>women</text>
+            </annotation>
+            <annotation id="29">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="983"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="30">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1035"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="31">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1166"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="32">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1259"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="33">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1496"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="34">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1608"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="35">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1751"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="36">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="1913"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="37">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="2156"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="38">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="2428"/>
+                <text>patients</text>
+            </annotation>
+            <annotation id="39">
+                <infon key="NCBI Taxonomy">9606</infon>
+                <infon key="type">Species</infon>
+                <location length="8" offset="2724"/>
+                <text>patients</text>
+            </annotation>
+        </passage>
+    </document>
+</collection>
\ No newline at end of file

From 91bd495440b27f7f5d08d325af4a7a1764620d56 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 4 Apr 2022 12:04:50 +0200
Subject: [PATCH 174/269] Set null title types to title type 'other'.

That removes an exception thrown for some title types we do not really care about (e.g. titles of publications in a literature list added to the text body of a PMC document, outside of references).
---
 .../de/julielab/jcore/reader/BioCCasPopulator.java   | 12 ++++++++++--
 .../jcore/consumer/gnp/BioCDocumentPopulator.java    |  9 +++++----
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 4618ef255..4c8b3f908 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -97,7 +97,7 @@ public void populateWithNextDocument(JCas jCas) {
                         addSpeciesAnnotation(annotation, jCas);
                         break;
                 }
-            } catch (MissingInfonException e) {
+            } catch (MissingInfonException | IllegalArgumentException e) {
                 throw new IllegalArgumentException("BioCDocument " + document.getID() + " has an annotation issue; see cause exception.", e);
             }
         }
@@ -233,7 +233,15 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
     private void addFamilyAnnotation(BioCAnnotation annotation, JCas jCas) {
         // the "total location" is the span from the minimum location value to the maximum location value;
         // for GNormPlus, there are no discontinuing annotations anyway
-        BioCLocation location = annotation.getTotalLocation();
+        BioCLocation location;
+        try {
+            location = annotation.getTotalLocation();
+        } catch (Exception e) {
+            // This handles a legacy issue: We modified GNormPlus to output FamilyName annotations. For some reason,
+            // FamilyNames can have zero length. This has been fixed but there is still old output that would
+            // cause an error at this point. Thus, when the offsets are invalid, skip the annotation.
+            return;
+        }
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
         gene.setSpecificType("FamilyName");
         // e.g.  <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 78ec078cc..c642193cf 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -34,9 +34,10 @@ public BioCDocument populate(JCas jCas) {
             if (z instanceof Title) {
                 Title t = (Title) z;
                 String titleType;
-                if (t.getTitleType() == null)
-                    throw new IllegalArgumentException("The titleType feature was not set for " + t);
-                switch (t.getTitleType()) {
+                String titleTypeString = t.getTitleType();
+                if (titleTypeString == null)
+                    titleTypeString = "other";
+                switch (titleTypeString) {
                     case "document":
                         titleType = "title";
                         break;
@@ -54,7 +55,7 @@ public BioCDocument populate(JCas jCas) {
                         titleType = "null";
                         break;
                     default:
-                        log.debug("Unhandled title type {}", t.getTitleType());
+                        log.debug("Unhandled title type {}", titleTypeString);
                         titleType = "other_title";
                         break;
                 }

From a87ba45c8de0dea8693c12c97da42a6e124e63f9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 6 Apr 2022 08:09:59 +0200
Subject: [PATCH 175/269] Add '9999' confidence values to gene annotations in
 the GNP reader.

9999 means "exact" match. GNP checks a dictionary to find IDs and transfers those to other matches under some circumstances.
---
 .../main/java/de/julielab/jcore/reader/BioCCasPopulator.java  | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 4c8b3f908..972d0e7dd 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -220,6 +220,9 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
         BioCLocation location = annotation.getTotalLocation();
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
         gene.setSpecificType("Gene");
+        // 9999 ist the GeNo score for exact matches; GNP only recognized exact dictionary matches and transfers
+        // their IDs to other forms under certain circumstances (abbreviations, for example)
+        gene.setConfidence("9999");
         ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
         resourceEntry.setSource("NCBI Gene");
         resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
@@ -244,6 +247,7 @@ private void addFamilyAnnotation(BioCAnnotation annotation, JCas jCas) {
         }
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
         gene.setSpecificType("FamilyName");
+        gene.setConfidence("9999");
         // e.g.  <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
         Optional<String> focusSpecies = annotation.getInfon("FocusSpecies");
         if (!focusSpecies.isPresent())

From 2917a9220c69eb1e4577b50180a7b04c99bab4b6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 6 Apr 2022 08:10:31 +0200
Subject: [PATCH 176/269] Add error messages when the passed data resources is
 null.

---
 .../main/java/de/julielab/jcore/utility/JCoReTools.java    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReTools.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReTools.java
index 038321c70..606502b76 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReTools.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReTools.java
@@ -259,7 +259,8 @@ public static FSArray copyFSArray(FSArray array) {
 
     /**
      * Creates a new <tt>StringArray</tt> from the given string elements.
-     * @param jCas The jCas to associate the new StringArray with.
+     *
+     * @param jCas     The jCas to associate the new StringArray with.
      * @param elements The strings to put into the StringArray.
      * @return The new, filled StringArray.
      */
@@ -456,7 +457,11 @@ else if (comparison < 0) {
      * @throws IOException If reading the resource file fails.
      */
     public static InputStream resolveExternalResourceGzipInputStream(DataResource resource) throws IOException {
+        if (resource == null)
+            throw new IllegalArgumentException("The passed DataResource is null.");
         InputStream is = resource.getInputStream();
+        if (is == null)
+            throw new IllegalArgumentException("The resource at " + resource.getUrl() + " could not be read. It does not exist or is not on the ClassPath.");
         String lcUriString = resource.getUri().toString().toLowerCase();
         if (lcUriString.endsWith(".gz") || lcUriString.endsWith(".gzip"))
             is = new GZIPInputStream(is);

From aea26c82f5c1803f9ead2abb45dff3be38810fc8 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 12 Apr 2022 15:37:42 +0200
Subject: [PATCH 177/269] Handle multiple gene IDs assigned by GNormPlus.

For ranges or enumerations like B7-1/2, for example. Now, for each such IDs a new ResourceEntry is created. Thus, one Gene can now have multiple ResourceEntries.
---
 .../jcore/reader/BioCCasPopulator.java        | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 972d0e7dd..553224c3a 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -220,15 +220,19 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
         BioCLocation location = annotation.getTotalLocation();
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
         gene.setSpecificType("Gene");
-        // 9999 ist the GeNo score for exact matches; GNP only recognized exact dictionary matches and transfers
-        // their IDs to other forms under certain circumstances (abbreviations, for example)
-        gene.setConfidence("9999");
-        ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
-        resourceEntry.setSource("NCBI Gene");
-        resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
-        resourceEntry.setEntryId(geneId.get());
-        FSArray resourceEntryList = new FSArray(jCas, 1);
-        resourceEntryList.set(0, resourceEntry);
+        // one gene mention might have multiple IDs when there are ranges or enumerations, e.g. "IL2-5", "B7-1 and B7-2" or "B7-1/2"
+        String[] geneIds = geneId.get().split(";");
+        FSArray resourceEntryList = new FSArray(jCas, geneIds.length);
+        for (int i = 0; i < geneIds.length; i++) {
+            ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
+            // 9999 ist the GeNo score for exact matches; GNP only recognized exact dictionary matches and transfers
+            // their IDs to other forms under certain circumstances (abbreviations, for example)
+            resourceEntry.setConfidence("9999");
+            resourceEntry.setSource("NCBI Gene");
+            resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+            resourceEntry.setEntryId(geneIds[i]);
+            resourceEntryList.set(i, resourceEntry);
+        }
         gene.setResourceEntryList(resourceEntryList);
         gene.addToIndexes();
     }
@@ -247,7 +251,6 @@ private void addFamilyAnnotation(BioCAnnotation annotation, JCas jCas) {
         }
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
         gene.setSpecificType("FamilyName");
-        gene.setConfidence("9999");
         // e.g.  <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
         Optional<String> focusSpecies = annotation.getInfon("FocusSpecies");
         if (!focusSpecies.isPresent())

From bfe6bc911374b97e001201695787e5544210339b Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 12 Apr 2022 15:37:52 +0200
Subject: [PATCH 178/269] Minor changes.

---
 .../jcore/ae/biosem/BioSemEventAnnotator.java | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
index e263b203f..8a42dd9dc 100644
--- a/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
+++ b/jcore-biosem-ae/src/main/java/de/julielab/jcore/ae/biosem/BioSemEventAnnotator.java
@@ -1,11 +1,11 @@
-/** 
- * 
+/**
+ *
  * Copyright (c) 2017, JULIE Lab.
- * All rights reserved. This program and the accompanying materials 
+ * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the BSD-2-Clause License
  *
- * Author: 
- * 
+ * Author:
+ *
  * Description:
  **/
 package de.julielab.jcore.ae.biosem;
@@ -237,7 +237,7 @@ private EventMention addEventToIndexes(PData event, Map<String, Gene> proteinMap
 	}
 
 	/**
-	 * 
+	 *
 	 * @param uimaEvent
 	 *            The UIMA event annotation to add a new argument to
 	 * @param bioSemArg
@@ -342,7 +342,7 @@ private void addUimaEventArgument(EventMention uimaEvent, Object bioSemArg, int
 	}
 
 	/**
-	 * 
+	 *
 	 * @param uimaEvent
 	 * @param uimaArg
 	 * @param argPos
@@ -383,7 +383,7 @@ private EventTrigger addTriggerToIndexes(Word trg, JCas aJCas) {
 	 * ID&lt;tab&gt;Entity-Type[Protein]&lt;tab&gt;start&lt;tab&gt;end&lt;tab&gt;Mention name
 	 * </code> <br/>
 	 * Example: <samp> T3 Protein 166 174 TGF-beta </samp>
-	 * 
+	 *
 	 * @return
 	 */
 	private List<String> getProteinLines(Map<String, Gene> proteins, String docId) throws AnnotatorProcessException {
@@ -405,7 +405,7 @@ private List<String> getProteinLines(Map<String, Gene> proteins, String docId) t
 	/**
 	 * Assigns an ID of the form <tt>Ti</tt> to each gene in the CAS, <tt>i</tt>
 	 * being an enumeration number beginning at 0.
-	 * 
+	 *
 	 * @param aJCas
 	 * @return
 	 */
@@ -421,9 +421,7 @@ private Map<String, Gene> enumerateProteins(JCas aJCas) {
 			Gene gene = (Gene) geneIt.next();
 			if (gene.getBegin() < lastEnd)
 				continue;
-			String id = gene.getId();
-			// if (StringUtils.isBlank(id))
-			id = "T" + i++;
+			String id = "T" + i++;
 			gene.setId(id);
 			proteins.put(id, gene);
 			lastEnd = gene.getEnd();

From e514d6103bee2d85dfc54a0d48b131ddb8be2f12 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 12 Apr 2022 15:38:34 +0200
Subject: [PATCH 179/269] Resolve a JAR hell issue with commons-io.

The solution: Don't use commons-io.
---
 .../Neo4jRelationsConsumer.java               | 62 +++++++++++--------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
index 0a1aaafff..190cf30cd 100644
--- a/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
+++ b/jcore-neo4j-relations-consumer/src/main/java/de/julielab/jcore/consumer/neo4jrelations/Neo4jRelationsConsumer.java
@@ -6,6 +6,7 @@
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.HashMultiset;
 import com.google.common.collect.Multiset;
+import de.julielab.java.utilities.IOStreamUtilities;
 import de.julielab.jcore.ae.checkpoint.DocumentId;
 import de.julielab.jcore.ae.checkpoint.DocumentReleaseCheckpoint;
 import de.julielab.jcore.types.ArgumentMention;
@@ -20,7 +21,6 @@
 import de.julielab.neo4j.plugins.datarepresentation.ImportIETypedRelations;
 import de.julielab.neo4j.plugins.datarepresentation.constants.ImportIERelations;
 import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.io.IOUtils;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -44,8 +44,6 @@
 import java.util.*;
 import java.util.stream.StreamSupport;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 @ResourceMetaData(name = "JCoRe Neo4j Relations Consumer", description = "This component assumes that a Neo4j server with an installed julieliab-neo4j-plugins-concepts plugin installed. It then sends FlattenedRelation instances with more then one arguments to Neo4j. Note that this requires the event arguments to have a ResourceEntry list to obtain database concept IDs from.", vendor = "JULIE Lab, Germany", copyright = "JULIE Lab", version = "2.6.0-SNAPSHOT")
 @TypeCapability(inputs = {"de.julielab.jcore.types.EventMention"})
 public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
@@ -84,19 +82,24 @@ public class Neo4jRelationsConsumer extends JCasAnnotator_ImplBase {
      */
     @Override
     public void initialize(final UimaContext aContext) throws ResourceInitializationException {
-        url = (String) aContext.getConfigParameterValue(PARAM_URL);
-        idProperty = (String) aContext.getConfigParameterValue(PARAM_ID_PROPERTY);
-        globalSource = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_SOURCE)).orElse(null);
-        neo4jUser = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_USER)).orElse(null);
-        neo4jPassword = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_PASSWORD)).orElse(null);
-        writeBatchSize = Optional.ofNullable((Integer) aContext.getConfigParameterValue(PARAM_WRITE_BATCH_SIZE)).orElse(50);
-        om = new ObjectMapper();
-        om.setSerializationInclusion(JsonInclude.Include.NON_NULL);
-        om.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
-        initImportRelations();
-        DocumentReleaseCheckpoint.get().register(Neo4jRelationsConsumer.class.getCanonicalName());
-        documentIds = new HashSet<>();
-        docNum = 0;
+        try {
+            url = (String) aContext.getConfigParameterValue(PARAM_URL);
+            idProperty = (String) aContext.getConfigParameterValue(PARAM_ID_PROPERTY);
+            globalSource = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_SOURCE)).orElse(null);
+            neo4jUser = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_USER)).orElse(null);
+            neo4jPassword = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_NEO4J_PASSWORD)).orElse(null);
+            writeBatchSize = Optional.ofNullable((Integer) aContext.getConfigParameterValue(PARAM_WRITE_BATCH_SIZE)).orElse(50);
+            om = new ObjectMapper();
+            om.setSerializationInclusion(JsonInclude.Include.NON_NULL);
+            om.setSerializationInclusion(JsonInclude.Include.NON_EMPTY);
+            initImportRelations();
+            DocumentReleaseCheckpoint.get().register(Neo4jRelationsConsumer.class.getCanonicalName());
+            documentIds = new HashSet<>();
+            docNum = 0;
+        } catch (Throwable e) {
+            log.error("Could not initialize", e);
+            throw new ResourceInitializationException(e);
+        }
     }
 
     private void initImportRelations() {
@@ -109,16 +112,23 @@ private void initImportRelations() {
      */
     @Override
     public void process(final JCas aJCas) throws AnalysisEngineProcessException {
-        ImportIERelationDocument document = convertRelations(aJCas);
-        if (!document.getRelations().isEmpty())
-            importIERelations.addRelationDocument(document);
+        try {
+            ImportIERelationDocument document = convertRelations(aJCas);
+            if (!document.getRelations().isEmpty())
+                importIERelations.addRelationDocument(document);
 
-        Optional<DBProcessingMetaData> metaOpt = JCasUtil.select(aJCas, DBProcessingMetaData.class).stream().findAny();
-        documentIds.add(metaOpt.isPresent() ? new DocumentId(metaOpt.get()) : new DocumentId(JCoReTools.getDocId(aJCas)));
+            Optional<DBProcessingMetaData> metaOpt = JCasUtil.select(aJCas, DBProcessingMetaData.class).stream().findAny();
+            documentIds.add(metaOpt.isPresent() ? new DocumentId(metaOpt.get()) : new DocumentId(JCoReTools.getDocId(aJCas)));
 
-        if (documentIds.size() % writeBatchSize == 0) {
-            log.trace("Document nr {} processed, sending batch nr {} of size {} to database.", docNum, docNum / writeBatchSize, writeBatchSize);
-            batchProcessComplete();
+            if (documentIds.size() % writeBatchSize == 0) {
+                log.trace("Document nr {} processed, sending batch nr {} of size {} to database.", docNum, docNum / writeBatchSize, writeBatchSize);
+                batchProcessComplete();
+            }
+        } catch (Throwable e) {
+            log.error("Exception occurred in document {}", JCoReTools.getDocId(aJCas), e);
+            if (!(e instanceof AnalysisEngineProcessException))
+                throw new AnalysisEngineProcessException(e);
+            throw e;
         }
     }
 
@@ -187,12 +197,12 @@ private void sendRelationsToNeo4j() throws AnalysisEngineProcessException {
                     g.close();
                 }
                 try (InputStream inputStream = urlConnection.getInputStream()) {
-                    log.debug("Response from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+                    log.debug("Response from Neo4j: {}", IOStreamUtilities.getStringFromInputStream(inputStream));
                 } catch (IOException e) {
                     log.error("Exception occurred while sending relation data to Neo4j server.");
                     try (InputStream inputStream = urlConnection.getErrorStream()) {
                         if (inputStream != null)
-                            log.error("Error from Neo4j: {}", IOUtils.toString(inputStream, UTF_8));
+                            log.error("Error from Neo4j: {}", IOStreamUtilities.getStringFromInputStream(inputStream));
                     }
                     throw e;
                 }

From ba3fea8169e14f2db55a77a8b29affdaa39e45d3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 12:06:14 +0200
Subject: [PATCH 180/269] Add an MMAX2 reader. Fixes #136.

---
 jcore-mmax2-reader/LICENSE                    |  26 ++
 jcore-mmax2-reader/README.md                  |  34 ++
 jcore-mmax2-reader/pom.xml                    |  73 ++++
 .../julielab/jcore/cr/mmax2/MMAX2Reader.java  | 323 ++++++++++++++++++
 .../cr/mmax2/desc/jcore-mmax2-reader.xml      |  54 +++
 .../jcore/cr/mmax2/MMAX2ReaderTest.java       |  64 ++++
 .../resources/input/mmax_26000/Basedata.uri   |   1 +
 .../input/mmax_26000/Basedata/Basedata.xml    | 240 +++++++++++++
 .../input/mmax_26000/Basedata/words.dtd       |   3 +
 .../mmax_26000/Customizations/proteins.xml    |  72 ++++
 .../mmax_26000/Customizations/sentence.xml    |   3 +
 .../input/mmax_26000/Markables/markables.dtd  |   2 +
 .../input/mmax_26000/Markables/proteins.xml   |  20 ++
 .../input/mmax_26000/Markables/sentence.xml   |  14 +
 .../input/mmax_26000/Schemes/proteins.xml     |  16 +
 .../input/mmax_26000/Schemes/sentence.xml     |   3 +
 .../input/mmax_26000/Styles/default_style.xsl |  58 ++++
 .../input/mmax_26000/common_paths.xml         |  17 +
 .../resources/input/mmax_26000/project.mmax   |   7 +
 .../src/test/resources/originalText/10048764  |   2 +
 20 files changed, 1032 insertions(+)
 create mode 100644 jcore-mmax2-reader/LICENSE
 create mode 100644 jcore-mmax2-reader/README.md
 create mode 100644 jcore-mmax2-reader/pom.xml
 create mode 100644 jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
 create mode 100644 jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
 create mode 100644 jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata.uri
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/Basedata.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/words.dtd
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/proteins.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/sentence.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/markables.dtd
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/proteins.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/sentence.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/proteins.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/sentence.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/Styles/default_style.xsl
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/common_paths.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input/mmax_26000/project.mmax
 create mode 100644 jcore-mmax2-reader/src/test/resources/originalText/10048764

diff --git a/jcore-mmax2-reader/LICENSE b/jcore-mmax2-reader/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-mmax2-reader/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-mmax2-reader/README.md b/jcore-mmax2-reader/README.md
new file mode 100644
index 000000000..2cacbc00a
--- /dev/null
+++ b/jcore-mmax2-reader/README.md
@@ -0,0 +1,34 @@
+# JCoRe Component Skeleton
+`Text that describes the component in brevity...`
+
+**Descriptor Path**:
+```
+de.julielab.jcore.{reader, ae, consumer}.NAME.desc.ARTIFACT-NAME
+```
+
+`More thorough description`
+`Are there any requirements or dependencies for this component?`
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-mmax2-reader/pom.xml b/jcore-mmax2-reader/pom.xml
new file mode 100644
index 000000000..39f6d714e
--- /dev/null
+++ b/jcore-mmax2-reader/pom.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-mmax2-reader</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <version>2.6.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-mmax-to-iob-iexml-converter</artifactId>
+            <version>1.0.2-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe MMAX2 reader.</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-mmax2-reader</url>
+    <description>Collection reader for MMAX2 annotation projects.</description>
+    <licenses>
+        <license>
+            <name>BSD 2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
+</project>
diff --git a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
new file mode 100644
index 000000000..fa09f4c69
--- /dev/null
+++ b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
@@ -0,0 +1,323 @@
+package de.julielab.jcore.cr.mmax2;
+
+import de.julielab.jcore.types.ConceptMention;
+import de.julielab.jcore.types.Gene;
+import de.julielab.jcore.types.Token;
+import de.julielab.jcore.utility.JCoReAnnotationTools;
+import de.julielab.jules.mmax.MarkableContainer;
+import de.julielab.jules.mmax.Statistics;
+import de.julielab.jules.mmax.WordInformation;
+import org.apache.uima.UimaContext;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.eml.MMAX2.annotation.markables.Markable;
+import org.eml.MMAX2.discourse.MMAX2Discourse;
+import org.eml.MMAX2.discourse.MMAX2DiscourseElement;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.util.*;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+@ResourceMetaData(name = "JCoRe MMAX2 reader.", description = "Collection reader for MMAX2 annotation projects.", vendor = "JULIE Lab Jena, Germany")
+public class MMAX2Reader extends JCasCollectionReader_ImplBase {
+
+    public static final String PARAM_INPUT_DIR = "InputDir";
+    public static final String PARAM_ANNOTATION_LEVELS = "AnnotationLevels";
+    public static final String PARAM_ORIGINAL_TEXT_FILES = "OriginalTextFiles";
+    public static final String PARAM_UIMA_ANNOTATION_TYPES = "UimaAnnotationTypes";
+    private final static Logger log = LoggerFactory.getLogger(MMAX2Reader.class);
+    @ConfigurationParameter(name = PARAM_INPUT_DIR, description = "Should point to the directory of which the MMAX2 projects are sub directories of.")
+    private String inputDir;
+    @ConfigurationParameter(name = PARAM_ANNOTATION_LEVELS, description = "The names of the MMAX2 annotation levels to create annotations for.")
+    private String[] annotationLevels;
+    @ConfigurationParameter(name = PARAM_UIMA_ANNOTATION_TYPES, description = "The fully qualified names of the UIMA annotation types to be used for the representation of the input annotation level. Must match the indices of " + PARAM_ANNOTATION_LEVELS + ", i.e. the ith level will be added to the CAS as the ith type.")
+    private String[] uimaTypeNames;
+    @ConfigurationParameter(name = PARAM_ORIGINAL_TEXT_FILES, mandatory = false, description = "The MMAX2 base data consists of tokenized text and does not keep track of the original text. This parameter should point to a directory containing the original text files. The file names should match the MMAX2 project IDs.")
+    private String originalTextFilesDir;
+
+    private LinkedList<File> folderList;
+    private String actualPath;
+    private HashMap<String, String> levels2uimaNames;
+    private List<Class<?>> uimaAnnotationClasses;
+    private int numDocuments;
+
+    /**
+     * This method is called a single time by the framework at component
+     * creation. Here, descriptor parameters are read and initial setup is done.
+     */
+    @Override
+    public void initialize(UimaContext context) throws ResourceInitializationException {
+        super.initialize(context);
+        inputDir = (String) context.getConfigParameterValue(PARAM_INPUT_DIR);
+        annotationLevels = (String[]) context.getConfigParameterValue(PARAM_ANNOTATION_LEVELS);
+        uimaTypeNames = (String[]) getUimaContext().getConfigParameterValue(PARAM_UIMA_ANNOTATION_TYPES);
+        originalTextFilesDir = (String) context.getConfigParameterValue(PARAM_ORIGINAL_TEXT_FILES);
+        actualPath = null;
+        if (annotationLevels.length != uimaTypeNames.length)
+            throw new IllegalArgumentException("The number of annotation levels and the number of UIMA type names must match. But the given annotation levels are '" + Arrays.toString(annotationLevels) + "' and the UIMA types names are '" + Arrays.toString(uimaTypeNames) + "'.");
+        try {
+            uimaAnnotationClasses = Arrays.stream(uimaTypeNames).map(name -> {
+                try {
+                    return Class.forName(name);
+                } catch (ClassNotFoundException e) {
+                    throw new RuntimeException(e);
+                }
+            }).collect(Collectors.toList());
+        } catch (Exception e) {
+            log.error("Could not initialize UIMA annotation classes from parameter values {}", Arrays.toString(uimaTypeNames));
+            throw new ResourceInitializationException(e);
+        }
+        levels2uimaNames = IntStream.range(0, annotationLevels.length).collect(HashMap::new, (m, i) -> m.put(annotationLevels[i], uimaTypeNames[i]), (m1, m2) -> m1.putAll(m2));
+        setUpFolderList();
+    }
+
+    private void setUpFolderList() throws ResourceInitializationException {
+        folderList = new LinkedList<>();
+        if (!inputDir.endsWith(File.separator))
+            this.inputDir += File.separator;
+
+        File rootX = new File(inputDir);
+
+        if (!rootX.exists()) {
+            File dir1 = new File(".");
+            try {
+                rootX = new File(dir1.getCanonicalPath() + inputDir);
+            } catch (IOException e) {
+                e.printStackTrace();
+                System.exit(1);
+            }
+            if (!rootX.exists()) {
+                log.error("{} does not exist", inputDir);
+                throw new ResourceInitializationException(new IllegalArgumentException(inputDir + " does not exist"));
+            }
+        }
+
+        for (String rootFolder : rootX.list()) {
+            if (!rootFolder.endsWith(File.separator))
+                rootFolder += File.separator;
+            File root = new File(inputDir + rootFolder);
+            if (root.isDirectory()) {
+                this.folderList.add(root);
+            }
+        }
+        numDocuments = folderList.size();
+    }
+
+    private String getPMID() throws CollectionException {
+        try {
+            FileInputStream fstream = new FileInputStream(this.actualPath + "Basedata.uri");
+            // Get the object of DataInputStream
+            DataInputStream in = new DataInputStream(fstream);
+            BufferedReader br = new BufferedReader(new InputStreamReader(in));
+            String strLine;
+            // Read File Line By Line
+            int count = 0;
+            String pmid = "";
+            while ((strLine = br.readLine()) != null) {
+                count++;
+                pmid = strLine;
+            }
+            if (count > 1) {
+                log.error("unknown data in {}Basedata.uri", actualPath);
+                System.exit(1);
+                return null;
+            }
+            return pmid;
+        } catch (IOException e) {
+            log.error("Error while parsing {}Basedata.uri", actualPath);
+            throw new CollectionException(e);
+        }
+    }
+
+    /**
+     * This method is called for each document going through the component. This
+     * is where the actual work happens.
+     */
+    @Override
+    public void getNext(JCas jCas) throws CollectionException {
+        Statistics.projects++;
+        actualPath = this.folderList.poll().getAbsolutePath() + "/";
+        // rename style file from default_style.xsl to generic_nongui_style.xsl
+        // (necessary for api use)
+        File style = new File(actualPath + "Styles/default_style.xsl");
+        style.renameTo(new File(actualPath + "Styles/generic_nongui_style.xsl"));
+
+        File mmaxfile = new File(actualPath + "project.mmax");
+        MMAX2Discourse discourse = MMAX2Discourse.buildDiscourse(mmaxfile.getAbsolutePath());
+
+        // text from basedata with spaces between all words
+        String documentText = discourse.getNextDocumentChunk();
+
+        WordInformation[] words = new WordInformation[discourse.getDiscourseElementCount()];
+
+        int textPosition = 0;
+        // Words from basedata
+        for (MMAX2DiscourseElement elem : discourse.getDiscourseElements()) {
+            WordInformation word = new WordInformation();
+            word.setId(elem.getID());
+            int discoursePosition = elem.getDiscoursePosition();
+            word.setPosition(discoursePosition);
+            StringBuilder textBuilder = new StringBuilder();
+            int end = discourse.getDisplayEndPositionFromDiscoursePosition(discoursePosition);
+            for (textPosition = discourse.getDisplayStartPositionFromDiscoursePosition(discoursePosition); textPosition <= end; textPosition++) {
+                textBuilder.append(documentText.charAt(textPosition));
+            }
+            word.setText(textBuilder.toString());
+            words[discoursePosition] = word;
+        }
+
+        this.produceOutput(discourse, words, jCas);
+
+        // set stylefile back to normal
+        style = new File(actualPath + "Styles/generic_nongui_style.xsl");
+        style.renameTo(new File(actualPath + "Styles/default_style.xsl"));
+
+        Statistics.projects++;
+    }
+
+    private void produceOutput(MMAX2Discourse discourse, WordInformation[] words, JCas jCas) throws CollectionException {
+        StringBuilder out = new StringBuilder();
+        StringBuilder outPlain = new StringBuilder();
+        String pmid = this.getPMID();
+        if (originalTextFilesDir != null && this.originalTextFilesDir.length() > 0)
+            this.handleOriginalTextInformation(pmid, words);
+
+        Map<Integer, Token> pos2offsets = new HashMap<>();
+
+        for (int i = 0; i < words.length; i++) {
+            WordInformation word = words[i];
+
+            Token token = new Token(jCas, outPlain.length(), outPlain.length() + word.getText().length());
+            token.setComponentId(getClass().getCanonicalName());
+            token.addToIndexes();
+            pos2offsets.put(word.getPosition(), token);
+
+            outPlain.append(word.getText());
+            if (word.isFollowedBySpace()) {
+                out.append(" ");
+                outPlain.append(" ");
+            }
+        }
+        for (int i = 0; i < annotationLevels.length; ++i) {
+            Iterator<Markable> iterator = discourse.getMarkableLevelByName(annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
+            while (iterator.hasNext()) {
+                Markable markable = iterator.next();
+                int beginPosition = markable.getLeftmostDiscoursePosition();
+                int endPosition = markable.getRightmostDiscoursePosition();
+                int beginOffset = pos2offsets.get(beginPosition).getBegin();
+                int endOffset = pos2offsets.get(endPosition).getEnd();
+                Annotation a;
+                try {
+                    a = JCoReAnnotationTools.getAnnotationByClassName(jCas, uimaTypeNames[i]);
+                } catch (Exception e) {
+                    throw new CollectionException(e);
+                }
+                a.setBegin(beginOffset);
+                a.setEnd(endOffset);
+                if (a instanceof ConceptMention)
+                    ((ConceptMention) a).setSpecificType(markable.getAttributeValue(markable.getMarkableLevelName()));
+                a.addToIndexes();
+            }
+        }
+        for (WordInformation word : words) {
+            for (MarkableContainer mc : word.getMarkables()) {
+                int beginPosition = mc.getBegin();
+                if (beginPosition == word.getPosition()) {
+                    int endPosition = mc.getEnd();
+                    int beginOffset = pos2offsets.get(beginPosition).getBegin();
+                    int endOffset = pos2offsets.get(endPosition).getEnd();
+                    Gene gene = new Gene(jCas, beginOffset, endOffset);
+                    gene.addToIndexes();
+                }
+            }
+        }
+        String textPlain = outPlain.toString();
+        jCas.setDocumentText(textPlain);
+    }
+
+    private void handleOriginalTextInformation(String pmid, WordInformation[] words) throws CollectionException {
+        if (originalTextFilesDir.length() > 0 && !originalTextFilesDir.endsWith("/"))
+            originalTextFilesDir += File.separator;
+
+        File file = new File(originalTextFilesDir + pmid);
+        if (!file.exists()) {
+            log.warn("no original File found for {} using only mmax text.", pmid);
+            return;
+        }
+        try {
+            FileInputStream fis = new FileInputStream(file);
+            InputStreamReader isr = new InputStreamReader(fis);
+            int wordCounter = 0;
+            int i;
+            try {
+                WordInformation actualWord = words[wordCounter];
+                String actualText = actualWord.getText();
+                actualWord.setFollowedBySpace(false);
+                int wordCharCounter = 0;
+                while ((i = isr.read()) >= 0) {
+                    if (wordCharCounter >= actualText.length()) {
+                        wordCounter++;
+                        if (wordCounter < words.length) {
+                            actualWord = words[wordCounter];
+                            actualText = actualWord.getText();
+                            actualWord.setFollowedBySpace(false);
+                            wordCharCounter = 0;
+                        } else {
+                            if (!Character.isWhitespace(i)) {
+                                log.warn("original Text contains more words than mmax information");
+                            }
+                            return;
+                        }
+                    }
+
+                    if (actualText.charAt(wordCharCounter) == i || Character.toLowerCase(actualText.charAt(wordCharCounter)) == Character.toLowerCase(i)) {
+                        wordCharCounter++;
+                    } else {
+                        if (!Character.isWhitespace(i)) {
+                            log.warn("there is a non whitespace character different in original text at document {} critical character is '{}' near word '{}' (MMAX2 word ID {})", pmid, i, actualText, actualWord.getId());
+                        } else {
+                            words[wordCounter - 1].setFollowedBySpace(true);
+                        }
+                    }
+                }
+                isr.close();
+            } catch (IOException e) {
+                log.error("Error attempting to read original text file ", e);
+                throw new CollectionException(e);
+            }
+        } catch (Exception e) {
+            log.error("Error attempting to read original text file", e);
+            if (e instanceof CollectionException)
+                throw (CollectionException) e;
+            throw new CollectionException(e);
+        }
+    }
+
+    @Override
+    public void close() {
+        // nothing to do
+    }
+
+    @Override
+    public Progress[] getProgress() {
+        return new Progress[]{new ProgressImpl(numDocuments - folderList.size(), numDocuments, "document")};
+    }
+
+    @Override
+    public boolean hasNext() {
+        return !this.folderList.isEmpty();
+    }
+
+
+}
diff --git a/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml b/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
new file mode 100644
index 000000000..8f3289029
--- /dev/null
+++ b/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>de.julielab.jcore.cr.mmax2.MMAX2Reader</implementationName>
+  <processingResourceMetaData>
+    <name>JCoRe MMAX2 reader.</name>
+    <description>Collection reader for MMAX2 annotation projects.</description>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>InputDir</name>
+        <description>Should point to the directory of which the MMAX2 projects are sub directories of.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>AnnotationLevels</name>
+        <description>The names of the MMAX2 annotation levels to create annotations for.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>UimaAnnotationTypes</name>
+        <description>The fully qualified names of the UIMA annotation types to be used for the representation of the input annotation level. Must match the indices of AnnotationLevels, i.e. the ith level will be added to the CAS as the ith type.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>OriginalTextFiles</name>
+        <description>The MMAX2 base data consists of tokenized text and does not keep track of the original text. This parameter should point to a directory containing the original text files. The file names should match the MMAX2 project IDs.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings/>
+    <typeSystemDescription>
+      <imports>
+        <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+        <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+      </imports>
+    </typeSystemDescription>
+    <fsIndexCollection/>
+    <capabilities/>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
new file mode 100644
index 000000000..410b42ed1
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
@@ -0,0 +1,64 @@
+package de.julielab.jcore.cr.mmax2;
+
+import de.julielab.jcore.types.Protein;
+import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.Token;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+
+import java.nio.file.Path;
+import java.util.Collection;
+
+import static org.assertj.core.api.Assertions.assertThat;
+/**
+ * Unit tests for jcore-mmax2-reader.
+ *
+ * @author
+ */
+public class MMAX2ReaderTest {
+
+    @Test
+    public void testReader() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader",
+                MMAX2Reader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString(),
+                MMAX2Reader.PARAM_ANNOTATION_LEVELS, new String[]{"proteins", "sentence"},
+                MMAX2Reader.PARAM_UIMA_ANNOTATION_TYPES, new String[]{"de.julielab.jcore.types.Protein", "de.julielab.jcore.types.Sentence"});
+        assertThat(reader.hasNext()).isTrue();
+        reader.getNext(jCas.getCas());
+        // the text should be tokenized because we did not provide the original text
+        assertThat(jCas.getDocumentText()).startsWith("Characterization of antihuman IFNAR-1 monoclonal antibodies : epitope localization and functional analysis .");
+        Collection<Protein> proteins = JCasUtil.select(jCas, Protein.class);
+        assertThat(proteins).hasSize(16);
+        assertThat(proteins).map(Protein::getCoveredText).contains("IFNAR-1", "type I interferon receptor", "HuIFNAR-1", "Stat");
+        Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+        assertThat(sentences).hasSize(10);
+        Collection<Token> tokens = JCasUtil.select(jCas, Token.class);
+        // check a small sample of tokens that should have been created
+       assertThat(tokens).map(Token::getCoveredText).contains("Characterization", "IFNAR-1", ":", "(", "subunits", "recognition", ".", "HuIFNAR-1");
+    }
+
+    @Test
+    public void testReaderOriginalText() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader",
+                MMAX2Reader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString(),
+                MMAX2Reader.PARAM_ORIGINAL_TEXT_FILES, Path.of("src", "test", "resources", "originalText").toString(),
+                MMAX2Reader.PARAM_ANNOTATION_LEVELS, new String[]{"proteins"},
+                MMAX2Reader.PARAM_UIMA_ANNOTATION_TYPES, new String[]{"de.julielab.jcore.types.Protein"});
+        assertThat(reader.hasNext()).isTrue();
+        reader.getNext(jCas.getCas());
+        // in this test, the text should not appear tokenized but arranged according to the original text
+        assertThat(jCas.getDocumentText()).startsWith("Characterization of antihuman IFNAR-1 monoclonal antibodies: epitope localization and functional analysis.");
+        Collection<Protein> proteins = JCasUtil.select(jCas, Protein.class);
+        assertThat(proteins).hasSize(16);
+        assertThat(proteins).map(Protein::getCoveredText).contains("IFNAR-1", "type I interferon receptor", "HuIFNAR-1", "Stat");
+        Collection<Token> tokens = JCasUtil.select(jCas, Token.class);
+        // check a small sample of tokens that should have been created
+        assertThat(tokens).map(Token::getCoveredText).contains("Characterization", "IFNAR-1", ":", "(", "subunits", "recognition", ".", "HuIFNAR-1");
+    }
+}
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata.uri b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata.uri
new file mode 100644
index 000000000..134fd8e79
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata.uri
@@ -0,0 +1 @@
+10048764
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/Basedata.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/Basedata.xml
new file mode 100644
index 000000000..cd5e3c8a3
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/Basedata.xml
@@ -0,0 +1,240 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!DOCTYPE words SYSTEM "words.dtd">
+<words>
+<word id="word_1">Characterization</word>
+<word id="word_2">of</word>
+<word id="word_3">antihuman</word>
+<word id="word_4">IFNAR-1</word>
+<word id="word_5">monoclonal</word>
+<word id="word_6">antibodies</word>
+<word id="word_7">:</word>
+<word id="word_8">epitope</word>
+<word id="word_9">localization</word>
+<word id="word_10">and</word>
+<word id="word_11">functional</word>
+<word id="word_12">analysis</word>
+<word id="word_13">.</word>
+<word id="word_14">The</word>
+<word id="word_15">type</word>
+<word id="word_16">I</word>
+<word id="word_17">interferon</word>
+<word id="word_18">receptor</word>
+<word id="word_19">(</word>
+<word id="word_20">IFNAR</word>
+<word id="word_21">)</word>
+<word id="word_22">is</word>
+<word id="word_23">composed</word>
+<word id="word_24">of</word>
+<word id="word_25">two</word>
+<word id="word_26">subunits</word>
+<word id="word_27">,</word>
+<word id="word_28">IFNAR-1</word>
+<word id="word_29">and</word>
+<word id="word_30">IFNAR-2</word>
+<word id="word_31">,</word>
+<word id="word_32">encoding</word>
+<word id="word_33">transmembrane</word>
+<word id="word_34">polypeptides</word>
+<word id="word_35">.</word>
+<word id="word_36">IFNAR-2</word>
+<word id="word_37">has</word>
+<word id="word_38">a</word>
+<word id="word_39">dominant</word>
+<word id="word_40">role</word>
+<word id="word_41">in</word>
+<word id="word_42">ligand</word>
+<word id="word_43">binding</word>
+<word id="word_44">,</word>
+<word id="word_45">but</word>
+<word id="word_46">IFNAR-1</word>
+<word id="word_47">contributes</word>
+<word id="word_48">to</word>
+<word id="word_49">binding</word>
+<word id="word_50">affinity</word>
+<word id="word_51">and</word>
+<word id="word_52">to</word>
+<word id="word_53">differential</word>
+<word id="word_54">ligand</word>
+<word id="word_55">recognition</word>
+<word id="word_56">.</word>
+<word id="word_57">A</word>
+<word id="word_58">panel</word>
+<word id="word_59">of</word>
+<word id="word_60">five</word>
+<word id="word_61">monoclonal</word>
+<word id="word_62">antibodies</word>
+<word id="word_63">(</word>
+<word id="word_64">mAb</word>
+<word id="word_65">)</word>
+<word id="word_66">to</word>
+<word id="word_67">human</word>
+<word id="word_68">IFNAR-1</word>
+<word id="word_69">(</word>
+<word id="word_70">HuIFNAR-1</word>
+<word id="word_71">)</word>
+<word id="word_72">was</word>
+<word id="word_73">produced</word>
+<word id="word_74">and</word>
+<word id="word_75">characterized</word>
+<word id="word_76">.</word>
+<word id="word_77">The</word>
+<word id="word_78">reactivity</word>
+<word id="word_79">of</word>
+<word id="word_80">each</word>
+<word id="word_81">mAb</word>
+<word id="word_82">toward</word>
+<word id="word_83">HuIFNAR-1</word>
+<word id="word_84">on</word>
+<word id="word_85">native</word>
+<word id="word_86">and</word>
+<word id="word_87">transfected</word>
+<word id="word_88">cells</word>
+<word id="word_89">and</word>
+<word id="word_90">in</word>
+<word id="word_91">Western</word>
+<word id="word_92">blot</word>
+<word id="word_93">and</word>
+<word id="word_94">ELISA</word>
+<word id="word_95">formats</word>
+<word id="word_96">was</word>
+<word id="word_97">determined</word>
+<word id="word_98">.</word>
+<word id="word_99">In</word>
+<word id="word_100">functional</word>
+<word id="word_101">assays</word>
+<word id="word_102">,</word>
+<word id="word_103">one</word>
+<word id="word_104">mAb</word>
+<word id="word_105">,</word>
+<word id="word_106">EA12</word>
+<word id="word_107">,</word>
+<word id="word_108">blocked</word>
+<word id="word_109">IFN-a2</word>
+<word id="word_110">binding</word>
+<word id="word_111">to</word>
+<word id="word_112">human</word>
+<word id="word_113">cells</word>
+<word id="word_114">and</word>
+<word id="word_115">interfered</word>
+<word id="word_116">with</word>
+<word id="word_117">Stat</word>
+<word id="word_118">activation</word>
+<word id="word_119">and</word>
+<word id="word_120">antiviral</word>
+<word id="word_121">activity</word>
+<word id="word_122">.</word>
+<word id="word_123">Epitopes</word>
+<word id="word_124">for</word>
+<word id="word_125">the</word>
+<word id="word_126">mAb</word>
+<word id="word_127">were</word>
+<word id="word_128">localized</word>
+<word id="word_129">to</word>
+<word id="word_130">subdomains</word>
+<word id="word_131">of</word>
+<word id="word_132">the</word>
+<word id="word_133">HuIFNAR-1</word>
+<word id="word_134">extracellular</word>
+<word id="word_135">domain</word>
+<word id="word_136">by</word>
+<word id="word_137">differential</word>
+<word id="word_138">reactivity</word>
+<word id="word_139">of</word>
+<word id="word_140">the</word>
+<word id="word_141">mAb</word>
+<word id="word_142">to</word>
+<word id="word_143">a</word>
+<word id="word_144">series</word>
+<word id="word_145">of</word>
+<word id="word_146">human</word>
+<word id="word_147">/</word>
+<word id="word_148">bovine</word>
+<word id="word_149">IFNAR-1</word>
+<word id="word_150">chimeras</word>
+<word id="word_151">.</word>
+<word id="word_152">The</word>
+<word id="word_153">antibody</word>
+<word id="word_154">EA12</word>
+<word id="word_155">seems</word>
+<word id="word_156">to</word>
+<word id="word_157">require</word>
+<word id="word_158">native</word>
+<word id="word_159">HuIFNAR-1</word>
+<word id="word_160">for</word>
+<word id="word_161">reactivity</word>
+<word id="word_162">and</word>
+<word id="word_163">does</word>
+<word id="word_164">not</word>
+<word id="word_165">map</word>
+<word id="word_166">to</word>
+<word id="word_167">a</word>
+<word id="word_168">single</word>
+<word id="word_169">subdomain</word>
+<word id="word_170">,</word>
+<word id="word_171">perhaps</word>
+<word id="word_172">recognizing</word>
+<word id="word_173">an</word>
+<word id="word_174">epitope</word>
+<word id="word_175">containing</word>
+<word id="word_176">noncontiguous</word>
+<word id="word_177">sequences</word>
+<word id="word_178">in</word>
+<word id="word_179">at</word>
+<word id="word_180">least</word>
+<word id="word_181">two</word>
+<word id="word_182">subdomains</word>
+<word id="word_183">.</word>
+<word id="word_184">In</word>
+<word id="word_185">contrast</word>
+<word id="word_186">,</word>
+<word id="word_187">the</word>
+<word id="word_188">epitopes</word>
+<word id="word_189">of</word>
+<word id="word_190">the</word>
+<word id="word_191">non</word>
+<word id="word_192">-</word>
+<word id="word_193">neutralizing</word>
+<word id="word_194">mAb</word>
+<word id="word_195">FB2</word>
+<word id="word_196">,</word>
+<word id="word_197">AA3</word>
+<word id="word_198">,</word>
+<word id="word_199">and</word>
+<word id="word_200">GB8</word>
+<word id="word_201">mapped</word>
+<word id="word_202">,</word>
+<word id="word_203">respectively</word>
+<word id="word_204">,</word>
+<word id="word_205">to</word>
+<word id="word_206">the</word>
+<word id="word_207">first</word>
+<word id="word_208">,</word>
+<word id="word_209">second</word>
+<word id="word_210">,</word>
+<word id="word_211">and</word>
+<word id="word_212">third</word>
+<word id="word_213">subdomains</word>
+<word id="word_214">of</word>
+<word id="word_215">HuIFNAR-1</word>
+<word id="word_216">.</word>
+<word id="word_217">The</word>
+<word id="word_218">mAb</word>
+<word id="word_219">DB2</word>
+<word id="word_220">primarily</word>
+<word id="word_221">maps</word>
+<word id="word_222">to</word>
+<word id="word_223">the</word>
+<word id="word_224">fourth</word>
+<word id="word_225">subdomain</word>
+<word id="word_226">,</word>
+<word id="word_227">although</word>
+<word id="word_228">its</word>
+<word id="word_229">reactivity</word>
+<word id="word_230">may</word>
+<word id="word_231">be</word>
+<word id="word_232">affected</word>
+<word id="word_233">by</word>
+<word id="word_234">other</word>
+<word id="word_235">determinants</word>
+<word id="word_236">.</word>
+</words>
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/words.dtd b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/words.dtd
new file mode 100644
index 000000000..a02b470f1
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Basedata/words.dtd
@@ -0,0 +1,3 @@
+<!ELEMENT words (word*)>
+<!ELEMENT word (#PCDATA)>
+<!ATTLIST word id ID #REQUIRED>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/proteins.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/proteins.xml
new file mode 100644
index 000000000..0f4bd71f8
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/proteins.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<customization>
+
+<rule pattern="proteins={protein_familiy_or_group}" style="background=d:212212212"/>
+<rule pattern="proteins={protein_complex}" style="background=green"/>
+<rule pattern="proteins={protein_enum}" style="background=red"/>
+<rule pattern="proteins={protein}" style="background=orange"/>
+<rule pattern="proteins={protein_variant}" style="background=d:064224208"/>
+
+
+
+
+<!--
+
+<rule pattern="t_cells={memory_t_cell}" style="background=black"/>
+<rule pattern="t_cells={memory_t_cell}" style="foreground=white"/>
+<rule pattern="t_cells={regulatory_t_cell}" style="background=d:072061139"/>
+<rule pattern="t_cells={regulatory_t_cell}" style="foreground=white"/>
+<rule pattern="t_cells={cytotoxic_t_cell}" style="background=d:064224208"/>
+<rule pattern="t_cells={naive_t_cell}" style="background=d:255140000"/>
+<rule pattern="t_cells={naive_t_cell}" style="foreground=blue"/>
+<rule pattern="t_cells={nk_t_cell}" style="background=d:255140000"/>
+<rule pattern="t_cells={nk_t_cell}" style="foreground=green"/>
+
+
+<rule pattern="thymocytes={thymocyte}" style="background=d:255140000"/>
+<rule pattern="thymocytes={pro_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn2_immature_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn2_immature_t_cell}" style="foreground=blue"/>
+<rule pattern="thymocytes={dn3_immature_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn3_immature_t_cell}" style="foreground=white"/>
+<rule pattern="thymocytes={dn4_alpha_beta_immature_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn4_alpha_beta_immature_t_cell}" style="foreground=red"/>
+
+
+<rule pattern="t_helper_cells={t_helper_cell}" style="background=d:034139034"/>
+<rule pattern="t_helper_cells={t_helper_1_cell}" style="background=d:034139034"/>
+<rule pattern="t_helper_cells={t_helper_1_cell}" style="foreground=blue"/>
+<rule pattern="t_helper_cells={t_helper_2_cell}" style="background=d:034139034"/>
+<rule pattern="t_helper_cells={t_helper_2_cell}" style="foreground=red"/>
+
+
+<rule pattern="transplant_status={donor}" style="background=d:128000128"/>
+<rule pattern="transplant_status={donor}" style="foreground=blue"/>
+<rule pattern="transplant_status={recipient}" style="background=d:128000128"/>
+<rule pattern="transplant_status={recipient}" style="foreground=red"/>
+<rule pattern="age={embryonic}" style="background=green"/>
+<rule pattern="age={fetal}" style="background=green"/>
+<rule pattern="age={fetal}" style="foreground=blue"/>
+<rule pattern="age={infant}" style="background=green"/>
+<rule pattern="age={infant}" style="foreground=white"/>
+<rule pattern="age={adult}" style="background=green"/>
+<rule pattern="age={adult}" style="foreground=red"/>
+<rule pattern="organism={human}" style="background=d:255215000"/>
+<rule pattern="organism={mouse}" style="background=blue"/>
+<rule pattern="organism={mouse}" style="foreground=white"/>
+<rule pattern="organism={rat}" style="background=red"/>
+<rule pattern="organism={dog}" style="background=black"/>
+<rule pattern="organism={dog}" style="foreground=white"/>
+<rule pattern="organism={animal}" style="background=d:072061139"/>
+<rule pattern="organism={animal}" style="foreground=white"/>
+<rule pattern="organism={bacterium}" style="background=d:064224208"/>
+<rule pattern="organism={ebv_virus}" style="background=d:255140000"/>
+<rule pattern="organism={ebv_virus}" style="foreground=blue"/>
+<rule pattern="organism={cmv_virus}" style="background=d:255140000"/>
+<rule pattern="organism={cmv_virus}" style="foreground=green"/>
+<rule pattern="organism={virus}" style="background=d:255140000"/>
+-->
+
+</customization>
+
+
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/sentence.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/sentence.xml
new file mode 100644
index 000000000..6fbf9d136
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Customizations/sentence.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<customization>
+</customization>
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/markables.dtd b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/markables.dtd
new file mode 100644
index 000000000..220e8b3c8
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/markables.dtd
@@ -0,0 +1,2 @@
+<!ELEMENT markables (markable*)>
+<!ATTLIST markable id ID #REQUIRED>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/proteins.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/proteins.xml
new file mode 100644
index 000000000..46c822f8d
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/proteins.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE markables SYSTEM "markables.dtd">
+<markables xmlns="www.eml.org/NameSpaces/proteins">
+<markable id="markable_19" span="word_70" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_25" span="word_159" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_16" span="word_36" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_24" span="word_149" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_15" span="word_30" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_18" span="word_68" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_23" span="word_133" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_17" span="word_46" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_22" span="word_117" mmax_level="proteins"  proteins="protein_familiy_or_group" />
+<markable id="markable_12" span="word_15..word_18" mmax_level="proteins"  proteins="protein_complex" />
+<markable id="markable_11" span="word_4" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_14" span="word_28" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_26" span="word_215" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_13" span="word_20" mmax_level="proteins"  proteins="protein_complex" />
+<markable id="markable_21" span="word_109" mmax_level="proteins"  proteins="protein" />
+<markable id="markable_20" span="word_83" mmax_level="proteins"  proteins="protein" />
+</markables>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/sentence.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/sentence.xml
new file mode 100644
index 000000000..9a91c925b
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Markables/sentence.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE markables SYSTEM "markables.dtd">
+<markables xmlns="www.eml.org/NameSpaces/sentence">
+<markable id="markable_7" span="word_123..word_151" mmax_level="sentence" />
+<markable id="markable_6" span="word_99..word_122" mmax_level="sentence" />
+<markable id="markable_5" span="word_77..word_98" mmax_level="sentence" />
+<markable id="markable_4" span="word_57..word_76" mmax_level="sentence" />
+<markable id="markable_3" span="word_36..word_56" mmax_level="sentence" />
+<markable id="markable_2" span="word_14..word_35" mmax_level="sentence" />
+<markable id="markable_1" span="word_1..word_13" mmax_level="sentence" />
+<markable id="markable_10" span="word_217..word_236" mmax_level="sentence" />
+<markable id="markable_8" span="word_152..word_183" mmax_level="sentence" />
+<markable id="markable_9" span="word_184..word_216" mmax_level="sentence" />
+</markables>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/proteins.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/proteins.xml
new file mode 100644
index 000000000..1045dc27e
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/proteins.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<annotationscheme>
+
+  <attribute id="proteins" name="proteins" text="" type="nominal_button">
+
+<value id="protein_familiy_or_group" name="protein_familiy_or_group"/>
+    <value id="protein_complex" name="protein_complex"/>
+    <value id="protein_enum" name="protein_enum"/>
+    <value id="protein" name="protein"/>
+   <value id="protein_variant" name="protein_variant"/>
+
+   </attribute>
+
+
+
+</annotationscheme>
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/sentence.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/sentence.xml
new file mode 100644
index 000000000..f37fbc936
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Schemes/sentence.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<annotationscheme>
+</annotationscheme>
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Styles/default_style.xsl b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Styles/default_style.xsl
new file mode 100644
index 000000000..ab671aa34
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/Styles/default_style.xsl
@@ -0,0 +1,58 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+xmlns:mmax="org.eml.MMAX2.discourse.MMAX2DiscourseLoader"
+xmlns:proteins="www.eml.org/NameSpaces/proteins"
+xmlns:sentence="www.eml.org/NameSpaces/sentence">
+<xsl:output method="text" indent="no" omit-xml-declaration="yes"/>
+<xsl:strip-space elements="*"/>
+
+
+<xsl:template match="words">
+
+  <xsl:apply-templates/>
+
+</xsl:template>
+
+<xsl:template match="word">
+
+  <xsl:value-of select="mmax:registerDiscourseElement(@id)"/>
+
+  <xsl:apply-templates select="mmax:getStartedMarkables(@id)" mode="opening"/>
+
+<xsl:value-of select="mmax:setDiscourseElementStart()"/>
+<xsl:value-of select="mmax:startBold()"/>
+   <xsl:apply-templates/>
+<xsl:value-of select="mmax:endBold()"/>
+  <xsl:value-of select="mmax:setDiscourseElementEnd()"/>
+
+  <xsl:apply-templates select="mmax:getEndedMarkables(@id)" mode="closing"/>
+
+<xsl:text> </xsl:text>
+
+</xsl:template>
+
+<xsl:template match="proteins:markable" mode="opening">
+<xsl:value-of select="mmax:startBold()"/>
+<xsl:value-of select="mmax:addLeftMarkableHandle(@mmax_level, @id, '[')"/>
+<xsl:value-of select="mmax:endBold()"/>
+</xsl:template>
+
+<xsl:template match="proteins:markable" mode="closing">
+<xsl:value-of select="mmax:startBold()"/>
+<xsl:value-of select="mmax:addRightMarkableHandle(@mmax_level, @id, ']')"/>
+<xsl:value-of select="mmax:endBold()"/>
+</xsl:template>
+
+
+<xsl:template match="sentence:markable" mode="closing">
+
+<xsl:value-of select="mmax:startSubscript()"/>
+ <xsl:text>
+</xsl:text>
+<xsl:value-of select="mmax:endSubscript()"/>
+
+</xsl:template>
+
+
+</xsl:stylesheet>
+
+
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/common_paths.xml b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/common_paths.xml
new file mode 100644
index 000000000..8f55971b4
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/common_paths.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<!DOCTYPE common_paths>
+<common_paths>
+<basedata_path>Basedata/</basedata_path>
+<markable_path>Markables/</markable_path>
+<scheme_path>Schemes/</scheme_path>
+<style_path>Styles/</style_path>
+<customization_path>Customizations/</customization_path>
+<views><stylesheet>default_style.xsl</stylesheet>
+</views>
+<annotations>
+<level name="proteins" schemefile="proteins.xml" customization_file="proteins.xml">proteins.xml</level>
+<level name="sentence" schemefile="sentence.xml" customization_file="sentence.xml" at_startup="visible">sentence.xml</level>
+</annotations>
+<user_switches>
+</user_switches>
+</common_paths>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input/mmax_26000/project.mmax b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/project.mmax
new file mode 100644
index 000000000..52fc0b1c1
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input/mmax_26000/project.mmax
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<mmax_project>
+<turns></turns>
+<words>Basedata.xml</words>
+<gestures></gestures>
+<keyactions></keyactions>
+</mmax_project>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/originalText/10048764 b/jcore-mmax2-reader/src/test/resources/originalText/10048764
new file mode 100644
index 000000000..2db1f6185
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/originalText/10048764
@@ -0,0 +1,2 @@
+Characterization of antihuman IFNAR-1 monoclonal antibodies: epitope localization and functional analysis.
+The type I interferon receptor (IFNAR) is composed of two subunits, IFNAR-1 and IFNAR-2, encoding transmembrane polypeptides. IFNAR-2 has a dominant role in ligand binding, but IFNAR-1 contributes to binding affinity and to differential ligand recognition. A panel of five monoclonal antibodies (mAb) to human IFNAR-1 (HuIFNAR-1) was produced and characterized. The reactivity of each mAb toward HuIFNAR-1 on native and transfected cells and in Western blot and ELISA formats was determined. In functional assays, one mAb, EA12, blocked IFN-a2 binding to human cells and interfered with Stat activation and antiviral activity. Epitopes for the mAb were localized to subdomains of the HuIFNAR-1 extracellular domain by differential reactivity of the mAb to a series of human/bovine IFNAR-1 chimeras. The antibody EA12 seems to require native HuIFNAR-1 for reactivity and does not map to a single subdomain, perhaps recognizing an epitope containing noncontiguous sequences in at least two subdomains. In contrast, the epitopes of the non-neutralizing mAb FB2, AA3, and GB8 mapped, respectively, to the first, second, and third subdomains of HuIFNAR-1. The mAb DB2 primarily maps to the fourth subdomain, although its reactivity may be affected by other determinants.

From 8b1f50431b611c14d30b3d3ad8a19e072623cee6 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 12:07:18 +0200
Subject: [PATCH 181/269] Add a convenience method to the ES consumer for
 parallel feature paths and filters.

The old methods would always apply all filters to all feature paths instead of assuming a 1:1 relationship.
---
 .../consumer/es/AbstractFieldGenerator.java   |  46 +-
 pom.xml                                       | 662 ++++++------------
 2 files changed, 275 insertions(+), 433 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java
index 287cd68ea..44dd6d012 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractFieldGenerator.java
@@ -730,10 +730,12 @@ public ArrayFieldValue createRawFieldValueForAnnotations(FeatureStructure[] a, S
 
 	/**
 	 * Applies the
-	 * {@link #createRawFieldValueForAnnotation(FeatureStructure, String[], Filter[])
+	 * {@link #createRawFieldValueForAnnotation(FeatureStructure, String[], Filter[])}
 	 * method to all feature structures in <tt>fss</tt>. Thus, the feature paths and
 	 * filters are expected to be <em>parallel</em>: Each feature path has its own
-	 * filter. If the filters array is shorter than the feature paths array, the
+	 * filter. But: The feature paths and filters are applied to all feature structures.
+	 * See {@link #createRawFieldValueForParallelAnnotations(FeatureStructure[], String[], Filter[], Filter)} to apply the ith feature path to the ith feature structure.
+	 * If the filters array is shorter than the feature paths array, the
 	 * missing filters will be treated as if they were null. Finally, after all
 	 * values have been created in this way, if the <tt>overallFilter</tt> is not
 	 * null, it will be applied to all resulting values. It will be reset once
@@ -772,6 +774,46 @@ public ArrayFieldValue createRawFieldValueForAnnotations(FeatureStructure[] fss,
 		return arrayFieldValue;
 	}
 
+	/**
+	 * Calls {@link #createRawFieldValueForAnnotation(FeatureStructure, String, Filter)} for all tuples
+	 * <pre>
+	 * (fss[i], featurePaths[i], filters[i]), i in {0,..,fss.length-1}
+	 * </pre>, thus handling feature structures, feature paths and filters separately for each index. <tt>fss</tt> and
+	 * <tt>featurePaths</tt> must be non-null and of equal length. <tt>filters</tt> may be null or shorter. The
+	 * <tt>overallFilter</tt> will be applied to all values resulting from the previous process.
+	 * @param fss
+	 * @param featurePaths
+	 * @param filters
+	 * @param overallFilter
+	 * @return
+	 * @throws CASException
+	 */
+	public ArrayFieldValue createRawFieldValueForParallelAnnotations(FeatureStructure[] fss, String[] featurePaths,
+															 Filter[] filters, Filter overallFilter) throws CASException {
+		ArrayFieldValue arrayFieldValue = new ArrayFieldValue();
+		for (int i = 0; i < fss.length; i++) {
+			FeatureStructure annotation = fss[i];
+			IFieldValue fieldValueForAnnotation = createRawFieldValueForAnnotation(annotation, featurePaths[i],
+					filters != null && i < filters.length ? filters[i] : null);
+			arrayFieldValue.addFlattened(fieldValueForAnnotation);
+		}
+		if (null != overallFilter) {
+			overallFilter.reset();
+			ArrayFieldValue filteredArrayFieldValue = new ArrayFieldValue();
+			for (IFieldValue fieldValue : arrayFieldValue) {
+				RawToken token = (RawToken) fieldValue;
+				String tokenString = String.valueOf(token.token);
+				List<String> filteredTokens = overallFilter.filter(tokenString);
+				if (!filteredTokens.isEmpty()) {
+					for (String filteredToken : filteredTokens)
+						filteredArrayFieldValue.add(new RawToken(filteredToken));
+				}
+			}
+			arrayFieldValue = filteredArrayFieldValue;
+		}
+		return arrayFieldValue;
+	}
+
 	/**
 	 * Creates a single array of all field values derived by the given feature paths
 	 * and filters. The <tt>filters</tt> array is taken to be parallel to
diff --git a/pom.xml b/pom.xml
index 84fad31a2..4b6553f98 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,550 +1,350 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
-
-
-
+      
+  
   <modelVersion>4.0.0</modelVersion>
-
-
-
-
+      
+  
   <parent>
-
-
-
-
+            
+    
     <groupId>de.julielab</groupId>
-
-
-
-
+            
+    
     <artifactId>jcore-parent</artifactId>
-
-
-
-
+            
+    
     <version>2.5.2-SNAPSHOT</version>
-
-
-
-
+          
+  
   </parent>
-
-
-
-
+      
+  
   <artifactId>jcore-base</artifactId>
-
-
-
-
+      
+  
   <packaging>pom</packaging>
-
-
-
-
+      
+  
   <name>JCoRe Base</name>
-
-
-
-
+      
+  
   <description>The POM for the JCoRe Base projects.</description>
-
-
-
-
+      
+  
   <version>2.6.0-SNAPSHOT</version>
-
-
-
-
+      
+  
   <organization>
-
-
-
-
+            
+    
     <name>JULIE Lab, Germany</name>
-
-
-
-
+            
+    
     <url>http://www.julielab.de</url>
-
-
-
-
+          
+  
   </organization>
-
-
-
-
+      
+  
   <licenses>
-
-
-
-
+            
+    
     <license>
-
-
-
-
+                  
+      
       <name>BSD-2-Clause</name>
-
-
-
-
+                  
+      
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-
-
-
-
+                
+    
     </license>
-
-
-
-
+          
+  
   </licenses>
-
-
-
-
+      
+  
   <url>https://github.com/JULIELab/jcore-base</url>
-
-
-
-
+      
+  
   <dependencies>
-
-
-
-
+            
+    
     <dependency>
-
-
-
-
+                  
+      
       <groupId>org.apache.uima</groupId>
-
-
-
-
+                  
+      
       <artifactId>uimaj-core</artifactId>
-
-
-
-
+                  
+      
       <version>${uima-version}</version>
-
-
-
-
+                
+    
     </dependency>
-
-
-
-
+            
+    
     <dependency>
-
-
-
-
+                  
+      
       <groupId>org.apache.uima</groupId>
-
-
-
-
+                  
+      
       <artifactId>uimafit-core</artifactId>
-
-
-
-
+                  
+      
       <version>${uimafit-version}</version>
-
-
-
-
+                
+    
     </dependency>
-
-
-
-
+          
+  
   </dependencies>
-
-
-
-
+      
+  
   <modules>
-
-
-
+            
+    
     <module>jcore-annotation-adder-ae</module>
-
-
-
+            
+    
     <module>jcore-ace-reader</module>
-
-
-
-
+            
+    
     <module>jcore-acronym-ae</module>
-
-
-
+            
+    
     <module>jcore-acronym-writer</module>
-
-
-
-
+            
+    
     <module>jcore-banner-ae</module>
-
-
-
+            
+    
     <module>jcore-bc2gm-reader</module>
-
-
-
+            
+    
     <module>jcore-bc2gmformat-writer</module>
-
-
-
+            
+    
     <module>jcore-biolemmatizer-ae</module>
-
-
-
-
+            
+    
     <module>jcore-bionlpformat-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-bionlpformat-reader</module>
-
-
-
-
+            
+    
     <module>jcore-biosem-ae</module>
-
-
-
-
+            
+    
     <module>jcore-conll-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-coordination-baseline-ae</module>
-
-
-
+            
+    
     <module>jcore-cord19-reader</module>
-
-
-
+            
+    
     <module>jcore-coreference-writer</module>
-
-
-
+            
+    
     <module>jcore-ct-reader</module>
-
-
-
+            
+    
     <module>jcore-db-checkpoint-ae</module>
-
-
-
+            
+    
     <module>jcore-descriptor-creator</module>
-
-
-
+            
+    
     <module>jcore-dta-reader</module>
-
-
-
-
+            
+    
     <module>jcore-ec-code-ae</module>
-
-
-
-
+            
+    
     <module>jcore-elasticsearch-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-embedding-writer</module>
-
-
-
-
+            
+    
     <module>jcore-event-flattener-ae</module>
-
-
-
-
+            
+    
     <module>jcore-feature-value-replacement-ae</module>
-
-
-
-
+            
+    
     <module>jcore-file-reader</module>
-
-
-
-
+            
+    
     <module>jcore-flair-ner-ae</module>
-
-
-
+            
+    
     <module>jcore-flair-token-embedding-ae</module>
-
-
-
+            
+    
     <module>jcore-flow-controllers</module>
-
+            
+    
     <module>jcore-gnp-bioc-reader</module>
-
+            
+    
     <module>jcore-gnp-bioc-writer</module>
-
-
+            
+    
     <module>jcore-iexml-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-iexml-reader</module>
-
-
-
-
+            
+    
     <module>jcore-ign-reader</module>
-
-
-
-
+            
+    
     <module>jcore-iob-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-jnet-ae</module>
-
-
-
-
+            
+    
     <module>jcore-jpos-ae</module>
-
-
-
-
+            
+    
     <module>jcore-jsbd-ae</module>
-
-
-
-
+            
+    
     <module>jcore-jtbd-ae</module>
-
-
-
-
+            
+    
     <module>jcore-julielab-entity-evaluator-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-likelihood-assignment-ae</module>
-
-
-
-
+            
+    
     <module>jcore-likelihood-detection-ae</module>
-
-
-
+            
+    
     <module>jcore-line-multiplier</module>
-
-
-
+            
+    
     <module>jcore-lingpipegazetteer-ae</module>
-
-
-
-
+            
+    
     <module>jcore-lingpipe-porterstemmer-ae</module>
-
-
-
-
+            
+    
     <module>jcore-lingscope-ae</module>
-
-
-
-
+            
+    
     <module>jcore-linnaeus-species-ae</module>
-
-
-
-
+            
+    
     <module>jcore-mantra-xml-types</module>
-
-
-
-
+            
+    
     <module>jcore-medxn-ae</module>
-
-
-
-
+            
+    
     <module>jcore-msdoc-reader</module>
-
-
-
-
+            
+    
     <module>jcore-mstparser-ae</module>
-
-
-
-
+            
+    
     <module>jcore-muc7-reader</module>
-
-
-
-
+            
+    
     <module>jcore-mutationfinder-ae</module>
-
-
-
+            
+    
     <module>jcore-neo4j-relations-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-opennlp-chunk-ae</module>
-
-
-
-
+            
+    
     <module>jcore-opennlp-parser-ae</module>
-
-
-
-
+            
+    
     <module>jcore-opennlp-postag-ae</module>
-
-
-
-
+            
+    
     <module>jcore-opennlp-sentence-ae</module>
-
-
-
-
+            
+    
     <module>jcore-opennlp-token-ae</module>
-
-
-
+            
+    
     <module>jcore-ppd-writer</module>
-
-
-
+            
+    
     <module>jcore-pmc-reader</module>
-
-
-
-
+            
+    
     <module>jcore-pubtator-reader</module>
-
-
-
-
+            
+    
     <module>jcore-stanford-lemmatizer-ae</module>
-
-
-
-
+            
+    
     <module>jcore-topic-indexing-ae</module>
-
-
-
-
+            
+    
     <module>jcore-topics-writer</module>
-
-
-
-
+            
+    
     <module>jcore-txt-consumer</module>
-
-
-
-
+            
+    
     <module>jcore-types</module>
-
-
-
-
+            
+    
     <module>jcore-utilities</module>
-
-
-
-
+            
+    
     <module>jcore-xml-mapper</module>
-
-
-
-
+            
+    
     <module>jcore-xml-reader</module>
-
-
-
-
+            
+    
     <module>jcore-xmi-reader</module>
-
-
-
-
+            
+    
     <module>jcore-xmi-writer</module>
-
-
-
-
+            
+    
     <module>jedis-parent</module>
-
-
-
+            
+    
     <module>jcore-jedis-integration-tests</module>
-
-
-
-
+            
+    
+    <module>jcore-mmax2-reader</module>
+      
   </modules>
-
-
-
-
+      
+  
   <scm>
-
-
-
-
+            
+    
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-
-
-
-
+            
+    
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-
-
-
-
+            
+    
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-
-
-
-
+          
+  
   </scm>
-
-
-
+    
 
 </project>

From 2d9b2fd84aaad1df7b1614194600920d7052b7c4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 12:07:54 +0200
Subject: [PATCH 182/269] Add a test for the correct handling of multiple gene
 IDs per gene mention.

---
 .../jcore/reader/BioCCasPopulatorTest.java    |  24 +++-
 .../resources/multipleGeneIdsDocument.xml     | 136 ++++++++++++++++++
 2 files changed, 159 insertions(+), 1 deletion(-)
 create mode 100644 jcore-gnp-bioc-reader/src/test/resources/multipleGeneIdsDocument.xml

diff --git a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
index b93ad6c46..e1ffdc7e4 100644
--- a/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
+++ b/jcore-gnp-bioc-reader/src/test/java/de/julielab/jcore/reader/BioCCasPopulatorTest.java
@@ -4,6 +4,7 @@
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
 import org.assertj.core.api.Condition;
 import org.junit.jupiter.api.Test;
 
@@ -62,7 +63,7 @@ public void populateWithNextDocument() throws Exception {
 
     @Test
     public void addFamilyNames() throws Exception {
-        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources","bioc_collection_0_0.xml"), null, null);
+        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources", "bioc_collection_0_0.xml"), null, null);
         JCas jCas = getJCas();
         bioCCasPopulator.populateWithNextDocument(jCas);
 
@@ -75,4 +76,25 @@ public void addFamilyNames() throws Exception {
             }
         }
     }
+
+    @Test
+    public void multipleGeneIds() throws Exception {
+        // Check that gene mentions with multiple IDs (enumerations, alternatives, ranges...) result in multiple ResourceEntries in a Gene annotation
+        BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(Path.of("src", "test", "resources", "multipleGeneIdsDocument.xml"), null, null);
+        JCas jCas = getJCas();
+        bioCCasPopulator.populateWithNextDocument(jCas);
+
+        Collection<Gene> genes = JCasUtil.select(jCas, Gene.class);
+        boolean multipleIdGeneFound = false;
+        for (Gene o : genes) {
+            if (o.getBegin() == 805) {
+                multipleIdGeneFound = true;
+                FSArray resourceEntryList = o.getResourceEntryList();
+                assertThat(resourceEntryList).hasSize(2);
+                assertThat(o.getResourceEntryList(0).getEntryId()).isEqualTo("12519");
+                assertThat(o.getResourceEntryList(1).getEntryId()).isEqualTo("12524");
+            }
+        }
+        assertThat(multipleIdGeneFound).isTrue();
+    }
 }
\ No newline at end of file
diff --git a/jcore-gnp-bioc-reader/src/test/resources/multipleGeneIdsDocument.xml b/jcore-gnp-bioc-reader/src/test/resources/multipleGeneIdsDocument.xml
new file mode 100644
index 000000000..1a26ceb19
--- /dev/null
+++ b/jcore-gnp-bioc-reader/src/test/resources/multipleGeneIdsDocument.xml
@@ -0,0 +1,136 @@
+<?xml version="1.0" ?>
+<!DOCTYPE collection
+        SYSTEM 'BioC.dtd'>
+<collection>
+    <source>JCoRe GNormPlus BioC Writer</source>
+    <date>Wed Mar 02 14:58:28 CET 2022</date>
+    <key>PubTator.key</key>
+    <document>
+        <id>16177354</id>
+        <passage>
+            <infon key="type">title</infon>
+            <offset>0</offset>
+            <text>Cellular mechanisms of the adjuvant activity of the flagellin component FljB of Salmonella enterica
+                Serovar Typhimurium to potentiate mucosal and systemic responses.
+            </text>
+            <annotation id="0">
+                <infon key="FocusSpecies">NCBITaxonomyID:90371</infon>
+                <infon key="type">FamilyName</infon>
+                <location offset="52" length="9"/>
+                <text>flagellin</text>
+            </annotation>
+            <annotation id="2">
+                <infon key="NCBI Taxonomy">90371</infon>
+                <infon key="type">Species</infon>
+                <location offset="80" length="39"/>
+                <text>Salmonella enterica Serovar Typhimurium</text>
+            </annotation>
+        </passage>
+        <passage>
+            <infon key="type">abstract</infon>
+            <offset>166</offset>
+            <text>An expanding area of interest is the utilization of microbe-based components to augment mucosal and
+                systemic immune responses to target antigens. Thus, the aim of the present study was to assess if the
+                flagellin component FljB from Salmonella enterica serovar Typhimurium could act as a mucosal adjuvant
+                and then to determine the cellular mechanism(s) by which FljB mediates its adjuvant properties. To
+                determine if FljB could act as a mucosal adjuvant, mice were immunized by the intranasal (i.n.) route
+                with antigen alone or in conjunction with FljB. Additionally, we assessed how FljB affected the levels
+                of the costimulatory molecules B7-1 and B7-2 on dendritic cells by flow cytometry and determined the
+                functional role these costimulatory molecules played in the adjuvant properties of FljB in vivo. Mice
+                immunized by the i.n. route with antigen and FljB exhibited significantly elevated levels of mucosal and
+                systemic antibody and CD4(+)-T-cell responses compared to mice given antigen only. Stimulation of
+                dendritic cells in vitro with FljB resulted in a pronounced increase in the surface expression of B7-1
+                and B7-2. The percentage of dendritic cells expressing B7-2 but not B7-1 increased significantly when
+                stimulated with FljB over a concentration range of 10 to 10,000 ng/ml. Immunization of wild-type and
+                B7-1, B7-2, and B7-1/2 knockout mice by the i.n. route revealed that the ability of FljB to increase
+                B7-2 expression is largely responsible for its adjuvant effect in vivo. These findings demonstrate that
+                FljB can act as an effective mucosal adjuvant and that its ability to enhance the level of B7-2
+                expression is predominantly responsible for its adjuvant properties.
+            </text>
+            <annotation id="4">
+                <infon key="NCBI Gene">12519;12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="805" length="13"/>
+                <text>B7-1 and B7-2</text>
+            </annotation>
+            <annotation id="8">
+                <infon key="NCBI Gene">12519;12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1278" length="13"/>
+                <text>B7-1 and B7-2</text>
+            </annotation>
+            <annotation id="9">
+                <infon key="NCBI Gene">12519;12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1502" length="6"/>
+                <text>B7-1/2</text>
+            </annotation>
+            <annotation id="11">
+                <infon key="NCBI Gene">12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1782" length="4"/>
+                <text>B7-2</text>
+            </annotation>
+            <annotation id="12">
+                <infon key="NCBI Gene">12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1587" length="4"/>
+                <text>B7-2</text>
+            </annotation>
+            <annotation id="13">
+                <infon key="NCBI Gene">12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1492" length="4"/>
+                <text>B7-2</text>
+            </annotation>
+            <annotation id="14">
+                <infon key="NCBI Gene">12519</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1486" length="4"/>
+                <text>B7-1</text>
+            </annotation>
+            <annotation id="15">
+                <infon key="NCBI Gene">12519</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1351" length="4"/>
+                <text>B7-1</text>
+            </annotation>
+            <annotation id="16">
+                <infon key="NCBI Gene">12524</infon>
+                <infon key="type">Gene</infon>
+                <location offset="1338" length="4"/>
+                <text>B7-2</text>
+            </annotation>
+            <annotation id="23">
+                <infon key="FocusSpecies">NCBITaxonomyID:90371</infon>
+                <infon key="type">FamilyName</infon>
+                <location offset="368" length="9"/>
+                <text>flagellin</text>
+            </annotation>
+            <annotation id="24">
+                <infon key="NCBI Taxonomy">90371</infon>
+                <infon key="type">Species</infon>
+                <location offset="398" length="39"/>
+                <text>Salmonella enterica serovar Typhimurium</text>
+            </annotation>
+            <annotation id="25">
+                <infon key="NCBI Taxonomy">10090</infon>
+                <infon key="type">Species</infon>
+                <location offset="620" length="4"/>
+                <text>mice</text>
+            </annotation>
+            <annotation id="26">
+                <infon key="NCBI Taxonomy">10090</infon>
+                <infon key="type">Species</infon>
+                <location offset="972" length="4"/>
+                <text>Mice</text>
+            </annotation>
+            <annotation id="27">
+                <infon key="NCBI Taxonomy">10090</infon>
+                <infon key="type">Species</infon>
+                <location offset="1518" length="4"/>
+                <text>mice</text>
+            </annotation>
+        </passage>
+    </document>
+</collection>
\ No newline at end of file

From d716980a843cce6412d868373f339977b42cdf83 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 12:13:21 +0200
Subject: [PATCH 183/269] Add component.meta file for the jcore-mmax2-reader.

---
 jcore-mmax2-reader/component.meta             |  20 ++
 .../mapper/StructuredAbstractParser.java      | 176 +++++++++---------
 2 files changed, 109 insertions(+), 87 deletions(-)
 create mode 100644 jcore-mmax2-reader/component.meta

diff --git a/jcore-mmax2-reader/component.meta b/jcore-mmax2-reader/component.meta
new file mode 100644
index 000000000..386acc60b
--- /dev/null
+++ b/jcore-mmax2-reader/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "reader"
+    ],
+    "description": "Collection reader for MMAX2 annotation projects.",
+    "descriptors": [
+        {
+            "category": "reader",
+            "location": "de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-mmax2-reader",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe MMAX2 reader."
+}
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
index 5881ab36a..08f79a85d 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
@@ -23,106 +23,108 @@
  * component, if required. <br>
  * <em>NOTE</em>: Using this parser, the AbstractText annotation is already
  * created and should not be set in the mapping file.
- * 
+ *
  * @author faessler
- * 
  */
 public class StructuredAbstractParser implements DocumentTextPartParser {
 
-	private static final boolean newlineBetweenSections = true;
+    private static final boolean newlineBetweenSections = true;
 
-	public List<String> parseDocumentPart(VTDNav vn, PartOfDocument docTextPart, int offset, JCas jCas,
-			byte[] identifier)  {
-		String baseXPath = docTextPart.getXPath();
+    public List<String> parseDocumentPart(VTDNav vn, PartOfDocument docTextPart, int offset, JCas jCas,
+                                          byte[] identifier) {
+        String baseXPath = docTextPart.getXPath();
 
-		List<Map<String, String>> fields = new ArrayList<>();
-		Map<String, String> field = new HashMap<>();
-		field.put(JulieXMLConstants.NAME, "Label");
-		field.put(JulieXMLConstants.XPATH, "@Label");
-		fields.add(field);
+        List<Map<String, String>> fields = new ArrayList<>();
+        Map<String, String> field = new HashMap<>();
+        field.put(JulieXMLConstants.NAME, "Label");
+        field.put(JulieXMLConstants.XPATH, "@Label");
+        fields.add(field);
 
-		field = new HashMap<>();
-		field.put(JulieXMLConstants.NAME, "NlmCategory");
-		field.put(JulieXMLConstants.XPATH, "@NlmCategory");
-		fields.add(field);
+        field = new HashMap<>();
+        field.put(JulieXMLConstants.NAME, "NlmCategory");
+        field.put(JulieXMLConstants.XPATH, "@NlmCategory");
+        fields.add(field);
 
-		field = new HashMap<>();
-		field.put(JulieXMLConstants.NAME, "AbstractText");
-		field.put(JulieXMLConstants.XPATH, ".");
-		fields.add(field);
-		Iterator<Map<String, Object>> rowIterator = JulieXMLTools.constructRowIterator(vn, baseXPath + "/AbstractText",
-				fields, new String(identifier));
-		List<AbstractSection> abstractParts = new ArrayList<>();
-		// for the text contents
-		StringBuilder sb = new StringBuilder();
+        field = new HashMap<>();
+        field.put(JulieXMLConstants.NAME, "AbstractText");
+        field.put(JulieXMLConstants.XPATH, ".");
+        fields.add(field);
+        Iterator<Map<String, Object>> rowIterator = JulieXMLTools.constructRowIterator(vn, baseXPath + "/AbstractText",
+                fields, new String(identifier));
+        List<AbstractSection> abstractParts = new ArrayList<>();
+        // for the text contents
+        StringBuilder sb = new StringBuilder();
 
-		int sectionOffset = offset;
-		while (rowIterator.hasNext()) {
-			Map<String, Object> abstractSectionData = rowIterator.next();
-			String label = (String) abstractSectionData.get("Label");
-			String nlmCategory = (String) abstractSectionData.get("NlmCategory");
-			String abstractSectionText = (String) abstractSectionData.get("AbstractText");
-			if (newlineBetweenSections) {
-				// in case the last section was empty, we delete the trailing
-				// newline
-				if (sb.length() > 0 && StringUtils.isBlank(abstractSectionText)) {
-					sb.deleteCharAt(sb.length() - 1);
-					--sectionOffset;
-				}
-			}
-			sb.append(abstractSectionText);
+        int sectionOffset = offset;
+        while (rowIterator.hasNext()) {
+            Map<String, Object> abstractSectionData = rowIterator.next();
+            String label = (String) abstractSectionData.get("Label");
+            String nlmCategory = (String) abstractSectionData.get("NlmCategory");
+            String abstractSectionText = (String) abstractSectionData.get("AbstractText");
+            if (newlineBetweenSections) {
+                // in case the last section was empty, we delete the trailing
+                // newline
+                if (sb.length() > 0 && StringUtils.isBlank(abstractSectionText)) {
+                    sb.deleteCharAt(sb.length() - 1);
+                    --sectionOffset;
+                }
+            }
+            // comment in to add the structured abstract section labels to the text, e.g. "AIMS: ...", "BACKGROUND: ..."
+            if (null != label && !"unlabelled".equalsIgnoreCase(label))
+                sb.append(label).append(": ");
+            sb.append(abstractSectionText);
 
-			// if label and nlmCategory are null, there is no section heading;
-			// most probably this just isn't a structured abstract
-			if (null != label || null != nlmCategory) {
-				AbstractSectionHeading abstractPartHeading = new AbstractSectionHeading(jCas);
-				abstractPartHeading.setLabel(label);
-				abstractPartHeading.setNlmCategory(nlmCategory);
-				abstractPartHeading.setTitleType("abstractSection");
-				abstractPartHeading.addToIndexes();
+            // if label and nlmCategory are null, there is no section heading;
+            // most probably this just isn't a structured abstract
+            if (null != label || null != nlmCategory) {
+                AbstractSectionHeading abstractPartHeading = new AbstractSectionHeading(jCas);
+                abstractPartHeading.setLabel(label);
+                abstractPartHeading.setNlmCategory(nlmCategory);
+                abstractPartHeading.setTitleType("abstractSection");
+                abstractPartHeading.addToIndexes();
 
-				AbstractSection abstractPart = new AbstractSection(jCas);
-				abstractPart.setBegin(sectionOffset);
-				sectionOffset += abstractSectionText.length();
-				abstractPart.setEnd(sectionOffset);
-				abstractPart.setAbstractSectionHeading(abstractPartHeading);
-				abstractPart.addToIndexes();
+                AbstractSection abstractPart = new AbstractSection(jCas);
+                abstractPart.setBegin(sectionOffset);
+                sectionOffset += abstractSectionText.length();
+                abstractPart.setEnd(sectionOffset);
+                abstractPart.setAbstractSectionHeading(abstractPartHeading);
+                abstractPart.addToIndexes();
 
-				abstractParts.add(abstractPart);
-			} else {
-				sectionOffset += abstractSectionText.length();
-			}
+                abstractParts.add(abstractPart);
+            } else {
+                sectionOffset += abstractSectionText.length();
+            }
 
-			// let's insert a line break after each section text
-			if (newlineBetweenSections && sb.length() > 0 && rowIterator.hasNext()) {
-				sb.append("\n");
-				++sectionOffset;
-			}
-		}
+            // let's insert a line break after each section text
+            if (newlineBetweenSections && sb.length() > 0 && rowIterator.hasNext()) {
+                sb.append("\n");
+                ++sectionOffset;
+            }
+        }
 
-		// only create an abstract annotation if there actually is an abstract
-		if (!abstractParts.isEmpty() || sectionOffset > offset) {
-			if (sectionOffset == offset) {
-				// there was no abstract but just empty abstract sections; decrement the offsets so we stay with existing document text
-				--offset;
-				--sectionOffset;
-				for (AbstractSection section : abstractParts) {
-					section.setBegin(offset);
-					section.setEnd(offset);
-				}
-			}
-			AbstractText abstractText = new AbstractText(jCas, offset, sectionOffset);
-			abstractText.setAbstractType("main");
-			if (abstractParts.size() > 0) {
-				FSArray sectionsArray = new FSArray(jCas, abstractParts.size());
-				for (int i = 0; i < abstractParts.size(); ++i)
-					sectionsArray.set(i, abstractParts.get(i));
-				abstractText.setStructuredAbstractParts(sectionsArray);
-			}
-			abstractText.addToIndexes();
-			return Collections.singletonList(sb.toString());
-		}
-		return Collections.emptyList();
-	}
+        // only create an abstract annotation if there actually is an abstract
+        if (!abstractParts.isEmpty() || sectionOffset > offset) {
+            if (sectionOffset == offset) {
+                // there was no abstract but just empty abstract sections; decrement the offsets so we stay with existing document text
+                --offset;
+                --sectionOffset;
+                for (AbstractSection section : abstractParts) {
+                    section.setBegin(offset);
+                    section.setEnd(offset);
+                }
+            }
+            AbstractText abstractText = new AbstractText(jCas, offset, sectionOffset);
+            abstractText.setAbstractType("main");
+            if (abstractParts.size() > 0) {
+                FSArray sectionsArray = new FSArray(jCas, abstractParts.size());
+                for (int i = 0; i < abstractParts.size(); ++i)
+                    sectionsArray.set(i, abstractParts.get(i));
+                abstractText.setStructuredAbstractParts(sectionsArray);
+            }
+            abstractText.addToIndexes();
+            return Collections.singletonList(sb.toString());
+        }
+        return Collections.emptyList();
+    }
 
 }

From 90ac8b499941ac28daf7e83dd8933eaf9ea490f4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 13:16:36 +0200
Subject: [PATCH 184/269] Bump neo4j plugins version.

---
 jcore-neo4j-relations-consumer/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index dca3293f1..3c5d394e3 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -53,7 +53,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-neo4j-plugins-concepts</artifactId>
-            <version>3.0.1-SNAPSHOT</version>
+            <version>3.1.0-SNAPSHOT</version>
             <scope>test</scope>
         </dependency>
         <dependency>

From bc1024a4f1d6864426af3b1dacfb63d720267581 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 13:44:23 +0200
Subject: [PATCH 185/269] Add IDs to sentences.

---
 jcore-mmax2-reader/component.meta                          | 2 +-
 .../main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java  | 7 ++++++-
 .../java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java   | 7 +++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/jcore-mmax2-reader/component.meta b/jcore-mmax2-reader/component.meta
index 386acc60b..d0e5293fb 100644
--- a/jcore-mmax2-reader/component.meta
+++ b/jcore-mmax2-reader/component.meta
@@ -16,5 +16,5 @@
         "groupId": "de.julielab",
         "version": "2.6.0-SNAPSHOT"
     },
-    "name": "JCoRe MMAX2 reader."
+    "name": "JCoRe MMAX2 reader"
 }
diff --git a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
index fa09f4c69..ac4e78c59 100644
--- a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
+++ b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
@@ -2,6 +2,7 @@
 
 import de.julielab.jcore.types.ConceptMention;
 import de.julielab.jcore.types.Gene;
+import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
 import de.julielab.jcore.utility.JCoReAnnotationTools;
 import de.julielab.jules.mmax.MarkableContainer;
@@ -29,7 +30,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.IntStream;
 
-@ResourceMetaData(name = "JCoRe MMAX2 reader.", description = "Collection reader for MMAX2 annotation projects.", vendor = "JULIE Lab Jena, Germany")
+@ResourceMetaData(name = "JCoRe MMAX2 reader", description = "Collection reader for MMAX2 annotation projects.", vendor = "JULIE Lab Jena, Germany")
 public class MMAX2Reader extends JCasCollectionReader_ImplBase {
 
     public static final String PARAM_INPUT_DIR = "InputDir";
@@ -211,6 +212,7 @@ private void produceOutput(MMAX2Discourse discourse, WordInformation[] words, JC
         }
         for (int i = 0; i < annotationLevels.length; ++i) {
             Iterator<Markable> iterator = discourse.getMarkableLevelByName(annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
+            int id = 0;
             while (iterator.hasNext()) {
                 Markable markable = iterator.next();
                 int beginPosition = markable.getLeftmostDiscoursePosition();
@@ -227,7 +229,10 @@ private void produceOutput(MMAX2Discourse discourse, WordInformation[] words, JC
                 a.setEnd(endOffset);
                 if (a instanceof ConceptMention)
                     ((ConceptMention) a).setSpecificType(markable.getAttributeValue(markable.getMarkableLevelName()));
+                else if (a instanceof Sentence)
+                    ((Sentence)a).setId(String.valueOf(id));
                 a.addToIndexes();
+                ++id;
             }
         }
         for (WordInformation word : words) {
diff --git a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
index 410b42ed1..9f8c59f56 100644
--- a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
+++ b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
@@ -30,13 +30,20 @@ public void testReader() throws Exception {
                 MMAX2Reader.PARAM_UIMA_ANNOTATION_TYPES, new String[]{"de.julielab.jcore.types.Protein", "de.julielab.jcore.types.Sentence"});
         assertThat(reader.hasNext()).isTrue();
         reader.getNext(jCas.getCas());
+
         // the text should be tokenized because we did not provide the original text
         assertThat(jCas.getDocumentText()).startsWith("Characterization of antihuman IFNAR-1 monoclonal antibodies : epitope localization and functional analysis .");
         Collection<Protein> proteins = JCasUtil.select(jCas, Protein.class);
         assertThat(proteins).hasSize(16);
+
         assertThat(proteins).map(Protein::getCoveredText).contains("IFNAR-1", "type I interferon receptor", "HuIFNAR-1", "Stat");
         Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
         assertThat(sentences).hasSize(10);
+
+        assertThat(proteins).extracting(Protein::getSpecificType).filteredOn(type -> type.equals("protein")).hasSize(13);
+        assertThat(proteins).extracting(Protein::getSpecificType).filteredOn(type -> type.equals("protein_complex")).hasSize(2);
+        assertThat(proteins).extracting(Protein::getSpecificType).filteredOn(type -> type.equals("protein_familiy_or_group")).hasSize(1);
+
         Collection<Token> tokens = JCasUtil.select(jCas, Token.class);
         // check a small sample of tokens that should have been created
        assertThat(tokens).map(Token::getCoveredText).contains("Characterization", "IFNAR-1", ":", "(", "subunits", "recognition", ".", "HuIFNAR-1");

From f6a8ede41f602e7c68c332cdadfea0ffc72b6d05 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 13:44:31 +0200
Subject: [PATCH 186/269] Add required dependency.

---
 jcore-bc2gmformat-writer/pom.xml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/jcore-bc2gmformat-writer/pom.xml b/jcore-bc2gmformat-writer/pom.xml
index 37c5a1de0..75acd4004 100644
--- a/jcore-bc2gmformat-writer/pom.xml
+++ b/jcore-bc2gmformat-writer/pom.xml
@@ -36,6 +36,10 @@
             <artifactId>jcore-utilities</artifactId>
             <version>${jcore-utilities-version}</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-java-utilities</artifactId>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-descriptor-creator</artifactId>

From fd57ed03176aebf3d149908e75eb374af3efe98f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 15:47:47 +0200
Subject: [PATCH 187/269] Fix an issue where the sentence ID column required
 the document ID column to be output.

---
 .../EntityEvaluatorConsumer.java              | 14 ++++----
 .../EntityEvaluatorConsumerTest.java          | 36 +++++++++++++++++++
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
index bffd2311d..16ad3fff3 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
@@ -157,13 +157,11 @@ private void addOffsetsColumn(JCas aJCas) {
     }
 
     private void addDocumentIdColumn(JCas aJCas) throws CASException {
-        if (outputColumnNames.contains(DOCUMENT_ID_COLUMN)) {
-            Column c = columns.get(DOCUMENT_ID_COLUMN);
-            if (c == null)
-                c = new Column(DOCUMENT_ID_COLUMN + ":" + Header.class.getCanonicalName() + "=/docId", null, aJCas.getTypeSystem());
-            c = new DocumentIdColumn(c);
-            columns.put(DOCUMENT_ID_COLUMN, c);
-        }
+        Column c = columns.get(DOCUMENT_ID_COLUMN);
+        if (c == null)
+            c = new Column(DOCUMENT_ID_COLUMN + ":" + Header.class.getCanonicalName() + "=/docId", null, aJCas.getTypeSystem());
+        c = new DocumentIdColumn(c);
+        columns.put(DOCUMENT_ID_COLUMN, c);
     }
 
     private void addDocumentTextSha256Column() {
@@ -183,7 +181,7 @@ private void addSentenceIdColumn(JCas aJCas) throws CASException {
             Column docIdColumn = columns.get(DOCUMENT_ID_COLUMN);
             String documentId = null;
             if (docIdColumn != null)
-                documentId = docIdColumn.getValue(aJCas.getDocumentAnnotationFs(), aJCas).getFirst();
+                documentId = docIdColumn.getValue(null, aJCas).getFirst();
             Type sentenceType = c.getSingleType();
             // put all sentences into an index with an
             // overlap-comparator - this way the index can be
diff --git a/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java b/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
index ca29657b9..b0589b592 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
@@ -75,6 +75,42 @@ public void testEntityEvaluatorConsumerSingleEntity() throws Exception {
 		assertEquals("document1	document1:0	23	gene", lines.get(0));
 	}
 
+	@Test
+	public void testEntityEvaluatorConsumerSingleEntity2() throws Exception {
+		// The same test as above but minus the DocumentId column
+		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
+				"de.julielab.jcore.types.jcore-semantics-biology-types",
+				"de.julielab.jcore.types.jcore-document-meta-types");
+		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
+				PARAM_COLUMN_DEFINITIONS,
+				new String[] { "geneid:Gene=/resourceEntryList[0]/entryId", "name:/:coveredText()" },
+				// We here use the default SentenceId column, we did not provide a definition!
+				PARAM_OUTPUT_COLUMNS, new String[] { SENTENCE_ID_COLUMN, "geneid", "name" },
+				PARAM_TYPE_PREFIX, "de.julielab.jcore.types", PARAM_OUTPUT_FILE, "src/test/resources/outfile-test.tsv");
+
+		jcas.setDocumentText("One gene one sentence.");
+		Header h = new Header(jcas);
+		h.setDocId("document1");
+		h.addToIndexes();
+		Sentence s = new Sentence(jcas, 0, jcas.getDocumentText().length());
+		s.setId("sentence1");
+		s.addToIndexes();
+		Gene g = new Gene(jcas, 4, 8);
+		GeneResourceEntry re = new GeneResourceEntry(jcas);
+		re.setEntryId("23");
+		FSArray array = new FSArray(jcas, 1);
+		array.set(0, re);
+		g.setResourceEntryList(array);
+		g.addToIndexes();
+
+		consumer.process(jcas.getCas());
+		consumer.collectionProcessComplete();
+
+		List<String> lines = Files.readLines(new File("src/test/resources/outfile-test.tsv"), Charset.forName("UTF-8"));
+		assertEquals(1, lines.size());
+		assertEquals("document1:0	23	gene", lines.get(0));
+	}
+
 	@Test
 	public void testEntityEvaluatorConsumerNoEntities() throws Exception {
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",

From 49be6c89573ce03a1360f5a64cb086350c8844d7 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 16:49:37 +0200
Subject: [PATCH 188/269] Add a detailed error message for missing sentences in
 the JCoReEntityDataset.

---
 .../de/julielab/jcore/banner/dataset/JCoReEntityDataset.java    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/banner/dataset/JCoReEntityDataset.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/banner/dataset/JCoReEntityDataset.java
index 1db2578c9..f22ed040c 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/banner/dataset/JCoReEntityDataset.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/banner/dataset/JCoReEntityDataset.java
@@ -65,6 +65,8 @@ public void load(File sentenceFile, File mentionsFile) {
 			mentReader.lines().forEach(ml -> {
 				String[] split = ml.split("\\t");
 				Sentence sentence = sentences.get(split[0]);
+				if (sentence == null)
+					throw new IllegalStateException("The gene mention '" + ml + "' is associated with sentence ID '" + split[0] + "' but such a sentence ID was not found in " + sentenceFile.getAbsolutePath());
 				int begin = Integer.parseInt(split[1]);
 				int end = Integer.parseInt(split[2]);
 				EntityType label = EntityType.getType(split[3]);

From f4a8953adc2dafab4ccf6cccce6c567bc5da6c24 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 16:50:25 +0200
Subject: [PATCH 189/269] Set the default offset scope to documents even when
 sentence ID are output.

---
 .../jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
index 16ad3fff3..413e8fa87 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
@@ -263,7 +263,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 
         offsetMode = null == offsetModeStr ? OffsetMode.CharacterSpan : OffsetMode.valueOf(offsetModeStr);
         if (null == offsetScopeStr) {
-            offsetScope = outputColumnNames.contains(SENTENCE_ID_COLUMN) ? OffsetScope.Sentence : OffsetScope.Document;
+            offsetScope = OffsetScope.Document;
         } else {
             offsetScope = OffsetScope.valueOf(offsetScopeStr);
         }

From 4dea6a1a10bf2b7142fc9df0e28277a52f636454 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 16:51:10 +0200
Subject: [PATCH 190/269] Revoke the change to add the structured abstract
 sections to the text.

This was only added briefly to re-create the original documents of the ProGene corpus where the headings are part of the texts.
---
 .../reader/xmlmapper/mapper/StructuredAbstractParser.java     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
index 08f79a85d..ce46c09f6 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
@@ -70,8 +70,8 @@ public List<String> parseDocumentPart(VTDNav vn, PartOfDocument docTextPart, int
                 }
             }
             // comment in to add the structured abstract section labels to the text, e.g. "AIMS: ...", "BACKGROUND: ..."
-            if (null != label && !"unlabelled".equalsIgnoreCase(label))
-                sb.append(label).append(": ");
+//            if (null != label && !"unlabelled".equalsIgnoreCase(label))
+//                sb.append(label).append(": ");
             sb.append(abstractSectionText);
 
             // if label and nlmCategory are null, there is no section heading;

From d311a6a92485cea02a0dfff5d40899b72fce15c3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 16:51:37 +0200
Subject: [PATCH 191/269] Add a Header annotation containing the document ID.

---
 .../julielab/jcore/cr/mmax2/MMAX2Reader.java  | 10 +++---
 .../jcore/cr/mmax2/MMAX2ReaderTest.java       | 31 +++++++++++++++++--
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
index ac4e78c59..85634bb56 100644
--- a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
+++ b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
@@ -1,9 +1,6 @@
 package de.julielab.jcore.cr.mmax2;
 
-import de.julielab.jcore.types.ConceptMention;
-import de.julielab.jcore.types.Gene;
-import de.julielab.jcore.types.Sentence;
-import de.julielab.jcore.types.Token;
+import de.julielab.jcore.types.*;
 import de.julielab.jcore.utility.JCoReAnnotationTools;
 import de.julielab.jules.mmax.MarkableContainer;
 import de.julielab.jules.mmax.Statistics;
@@ -25,6 +22,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.*;
+import java.util.List;
 import java.util.*;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
@@ -249,6 +247,10 @@ else if (a instanceof Sentence)
         }
         String textPlain = outPlain.toString();
         jCas.setDocumentText(textPlain);
+
+        Header h = new Header(jCas);
+        h.setDocId(pmid);
+        h.addToIndexes();
     }
 
     private void handleOriginalTextInformation(String pmid, WordInformation[] words) throws CollectionException {
diff --git a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
index 9f8c59f56..65a401a0b 100644
--- a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
+++ b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
@@ -1,5 +1,6 @@
 package de.julielab.jcore.cr.mmax2;
 
+import de.julielab.jcore.types.Header;
 import de.julielab.jcore.types.Protein;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.types.Token;
@@ -23,7 +24,7 @@ public class MMAX2ReaderTest {
 
     @Test
     public void testReader() throws Exception {
-        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
         CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader",
                 MMAX2Reader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString(),
                 MMAX2Reader.PARAM_ANNOTATION_LEVELS, new String[]{"proteins", "sentence"},
@@ -31,6 +32,9 @@ public void testReader() throws Exception {
         assertThat(reader.hasNext()).isTrue();
         reader.getNext(jCas.getCas());
 
+        Header h = JCasUtil.selectSingle(jCas, Header.class);
+        assertThat(h.getDocId()).isEqualTo("10048764");
+
         // the text should be tokenized because we did not provide the original text
         assertThat(jCas.getDocumentText()).startsWith("Characterization of antihuman IFNAR-1 monoclonal antibodies : epitope localization and functional analysis .");
         Collection<Protein> proteins = JCasUtil.select(jCas, Protein.class);
@@ -39,6 +43,7 @@ public void testReader() throws Exception {
         assertThat(proteins).map(Protein::getCoveredText).contains("IFNAR-1", "type I interferon receptor", "HuIFNAR-1", "Stat");
         Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
         assertThat(sentences).hasSize(10);
+        assertThat(sentences).extracting(Sentence::getId).containsExactlyInAnyOrder("0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
 
         assertThat(proteins).extracting(Protein::getSpecificType).filteredOn(type -> type.equals("protein")).hasSize(13);
         assertThat(proteins).extracting(Protein::getSpecificType).filteredOn(type -> type.equals("protein_complex")).hasSize(2);
@@ -51,7 +56,7 @@ public void testReader() throws Exception {
 
     @Test
     public void testReaderOriginalText() throws Exception {
-        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
         CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader",
                 MMAX2Reader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString(),
                 MMAX2Reader.PARAM_ORIGINAL_TEXT_FILES, Path.of("src", "test", "resources", "originalText").toString(),
@@ -68,4 +73,26 @@ public void testReaderOriginalText() throws Exception {
         // check a small sample of tokens that should have been created
         assertThat(tokens).map(Token::getCoveredText).contains("Characterization", "IFNAR-1", ":", "(", "subunits", "recognition", ".", "HuIFNAR-1");
     }
+
+    @Test
+    public void testReader2() throws Exception {
+        JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
+        CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader",
+                MMAX2Reader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input2").toString(),
+                MMAX2Reader.PARAM_ANNOTATION_LEVELS, new String[]{"proteins", "sentence"},
+                MMAX2Reader.PARAM_UIMA_ANNOTATION_TYPES, new String[]{"de.julielab.jcore.types.Protein", "de.julielab.jcore.types.Sentence"});
+        assertThat(reader.hasNext()).isTrue();
+        reader.getNext(jCas.getCas());
+
+        Header h = JCasUtil.selectSingle(jCas, Header.class);
+        assertThat(h.getDocId()).isEqualTo("10471746");
+
+        Collection<Protein> proteins = JCasUtil.select(jCas, Protein.class);
+        for (var p : proteins) {
+            System.out.println(p.getCoveredText() + ": " + p.getBegin() + "-"+p.getEnd());
+        }
+        Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
+        for (var s : sentences)
+            System.out.println(s.getBegin() + " - " + s.getEnd());
+    }
 }

From d2f432628249282b4bc4e486c6ed62b45a71cb1c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Apr 2022 21:08:53 +0200
Subject: [PATCH 192/269] Add an option to remove overlapping annotations. Take
 the longest annotation, then.

---
 .../julielab/jcore/cr/mmax2/MMAX2Reader.java  | 45 +++++++++++++++++--
 .../jcore/cr/mmax2/MMAX2ReaderTest.java       | 33 ++++++++++----
 2 files changed, 67 insertions(+), 11 deletions(-)

diff --git a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
index 85634bb56..400c752f5 100644
--- a/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
+++ b/jcore-mmax2-reader/src/main/java/de/julielab/jcore/cr/mmax2/MMAX2Reader.java
@@ -35,7 +35,10 @@ public class MMAX2Reader extends JCasCollectionReader_ImplBase {
     public static final String PARAM_ANNOTATION_LEVELS = "AnnotationLevels";
     public static final String PARAM_ORIGINAL_TEXT_FILES = "OriginalTextFiles";
     public static final String PARAM_UIMA_ANNOTATION_TYPES = "UimaAnnotationTypes";
+    public static final String PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS = "RemoveOverlappingShorterAnnotations";
     private final static Logger log = LoggerFactory.getLogger(MMAX2Reader.class);
+    @ConfigurationParameter(name = PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS, mandatory = false, defaultValue = "false", description = "If set to true, for all overlapping annotations only the longest is kept.")
+    boolean removeOverlappingShorterAnnotations;
     @ConfigurationParameter(name = PARAM_INPUT_DIR, description = "Should point to the directory of which the MMAX2 projects are sub directories of.")
     private String inputDir;
     @ConfigurationParameter(name = PARAM_ANNOTATION_LEVELS, description = "The names of the MMAX2 annotation levels to create annotations for.")
@@ -44,7 +47,6 @@ public class MMAX2Reader extends JCasCollectionReader_ImplBase {
     private String[] uimaTypeNames;
     @ConfigurationParameter(name = PARAM_ORIGINAL_TEXT_FILES, mandatory = false, description = "The MMAX2 base data consists of tokenized text and does not keep track of the original text. This parameter should point to a directory containing the original text files. The file names should match the MMAX2 project IDs.")
     private String originalTextFilesDir;
-
     private LinkedList<File> folderList;
     private String actualPath;
     private HashMap<String, String> levels2uimaNames;
@@ -62,6 +64,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
         annotationLevels = (String[]) context.getConfigParameterValue(PARAM_ANNOTATION_LEVELS);
         uimaTypeNames = (String[]) getUimaContext().getConfigParameterValue(PARAM_UIMA_ANNOTATION_TYPES);
         originalTextFilesDir = (String) context.getConfigParameterValue(PARAM_ORIGINAL_TEXT_FILES);
+        removeOverlappingShorterAnnotations = Optional.ofNullable((Boolean) context.getConfigParameterValue(PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS)).orElse(false);
         actualPath = null;
         if (annotationLevels.length != uimaTypeNames.length)
             throw new IllegalArgumentException("The number of annotation levels and the number of UIMA type names must match. But the given annotation levels are '" + Arrays.toString(annotationLevels) + "' and the UIMA types names are '" + Arrays.toString(uimaTypeNames) + "'.");
@@ -208,8 +211,9 @@ private void produceOutput(MMAX2Discourse discourse, WordInformation[] words, JC
                 outPlain.append(" ");
             }
         }
+        Set<Markable> ignoredMarkables = getIgnoredMarkables(discourse);
         for (int i = 0; i < annotationLevels.length; ++i) {
-            Iterator<Markable> iterator = discourse.getMarkableLevelByName(annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
+            Iterator<Markable> iterator = discourse.getMarkableLevelByName(annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(ignoredMarkables::contains)).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
             int id = 0;
             while (iterator.hasNext()) {
                 Markable markable = iterator.next();
@@ -228,7 +232,7 @@ private void produceOutput(MMAX2Discourse discourse, WordInformation[] words, JC
                 if (a instanceof ConceptMention)
                     ((ConceptMention) a).setSpecificType(markable.getAttributeValue(markable.getMarkableLevelName()));
                 else if (a instanceof Sentence)
-                    ((Sentence)a).setId(String.valueOf(id));
+                    ((Sentence) a).setId(String.valueOf(id));
                 a.addToIndexes();
                 ++id;
             }
@@ -253,6 +257,41 @@ else if (a instanceof Sentence)
         h.addToIndexes();
     }
 
+    private Set<Markable> getIgnoredMarkables(MMAX2Discourse discourse) {
+        if (!removeOverlappingShorterAnnotations)
+            return Collections.emptySet();
+        Set<Markable> toIgnore = new HashSet<>();
+        for (int i = 0; i < annotationLevels.length; ++i) {
+        Map<Integer, Set<Markable>> markablesByPos = new HashMap<>();
+            Iterator<Markable> iterator = discourse.getMarkableLevelByName(annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
+            while (iterator.hasNext()) {
+                Markable markable = iterator.next();
+                // associate the markable with all the word indices it covers
+                IntStream.rangeClosed(markable.getLeftmostDiscoursePosition(), markable.getRightmostDiscoursePosition()).forEach(j -> markablesByPos.compute(j, (k, v) -> v != null ? v : new HashSet<>()).add(markable));
+            }
+            // now, for each word index, keep only the longest markable
+            for (Integer pos : markablesByPos.keySet()) {
+                Set<Markable> markables = markablesByPos.get(pos);
+                if (markables.size() > 1) {
+                    int maxSize = 0;
+                    Markable longestMarkable = null;
+                    for (Markable markable : markables) {
+                        // first, we just add all markables to ignore
+                        toIgnore.add(markable);
+                        int markableLength = markable.getRightmostDiscoursePosition() - markable.getLeftmostDiscoursePosition() + 1;
+                        if (markableLength > maxSize) {
+                            maxSize = markableLength;
+                            longestMarkable = markable;
+                        }
+                    }
+                    // now remove only the longest markable - that we want to keep - from the set of ignores markables
+                    toIgnore.remove(longestMarkable);
+                }
+            }
+        }
+        return toIgnore;
+    }
+
     private void handleOriginalTextInformation(String pmid, WordInformation[] words) throws CollectionException {
         if (originalTextFilesDir.length() > 0 && !originalTextFilesDir.endsWith("/"))
             originalTextFilesDir += File.separator;
diff --git a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
index 65a401a0b..79b9bfb11 100644
--- a/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
+++ b/jcore-mmax2-reader/src/test/java/de/julielab/jcore/cr/mmax2/MMAX2ReaderTest.java
@@ -15,6 +15,7 @@
 import java.util.Collection;
 
 import static org.assertj.core.api.Assertions.assertThat;
+
 /**
  * Unit tests for jcore-mmax2-reader.
  *
@@ -51,7 +52,7 @@ public void testReader() throws Exception {
 
         Collection<Token> tokens = JCasUtil.select(jCas, Token.class);
         // check a small sample of tokens that should have been created
-       assertThat(tokens).map(Token::getCoveredText).contains("Characterization", "IFNAR-1", ":", "(", "subunits", "recognition", ".", "HuIFNAR-1");
+        assertThat(tokens).map(Token::getCoveredText).contains("Characterization", "IFNAR-1", ":", "(", "subunits", "recognition", ".", "HuIFNAR-1");
     }
 
     @Test
@@ -85,14 +86,30 @@ public void testReader2() throws Exception {
         reader.getNext(jCas.getCas());
 
         Header h = JCasUtil.selectSingle(jCas, Header.class);
-        assertThat(h.getDocId()).isEqualTo("10471746");
+        assertThat(h.getDocId()).isEqualTo("14731280");
 
         Collection<Protein> proteins = JCasUtil.select(jCas, Protein.class);
-        for (var p : proteins) {
-            System.out.println(p.getCoveredText() + ": " + p.getBegin() + "-"+p.getEnd());
-        }
-        Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class);
-        for (var s : sentences)
-            System.out.println(s.getBegin() + " - " + s.getEnd());
+        // there is this one protein seemingly annotated double; while this is more of an error than the real case
+        // to handle, it was responsible for errors and works for a simple test
+        long overlappingProteinCount = proteins.stream().filter(p -> p.getBegin() == 95 && p.getEnd() == 99).count();
+        assertThat(overlappingProteinCount).isEqualTo(2);
+
+        // now activate the parameter to avoid overlapping annotations
+        jCas.reset();
+        reader = CollectionReaderFactory.createReader("de.julielab.jcore.cr.mmax2.desc.jcore-mmax2-reader",
+                MMAX2Reader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input2").toString(),
+                MMAX2Reader.PARAM_ANNOTATION_LEVELS, new String[]{"proteins", "sentence"},
+                MMAX2Reader.PARAM_UIMA_ANNOTATION_TYPES, new String[]{"de.julielab.jcore.types.Protein", "de.julielab.jcore.types.Sentence"},
+                MMAX2Reader.PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS, true);
+        assertThat(reader.hasNext()).isTrue();
+        reader.getNext(jCas.getCas());
+
+
+        proteins = JCasUtil.select(jCas, Protein.class);
+        // there shouldn't be an overlap any more
+        overlappingProteinCount = proteins.stream().filter(p -> p.getBegin() == 95 && p.getEnd() == 99).count();
+        assertThat(overlappingProteinCount).isEqualTo(1);
     }
+
+
 }

From abccb002d8b3cbce471a97cf7ae0a89eb98fd055 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 21 Apr 2022 08:11:44 +0200
Subject: [PATCH 193/269] Add test data to git.

---
 .../cr/mmax2/desc/jcore-mmax2-reader.xml      |  18 +-
 .../resources/input2/mmax_23647/Basedata.uri  |   1 +
 .../input2/mmax_23647/Basedata/Basedata.xml   | 299 ++++++++++++++++++
 .../input2/mmax_23647/Basedata/words.dtd      |   3 +
 .../mmax_23647/Customizations/proteins.xml    |  72 +++++
 .../mmax_23647/Customizations/sentence.xml    |   3 +
 .../input2/mmax_23647/Markables/markables.dtd |   2 +
 .../input2/mmax_23647/Markables/proteins.xml  |  29 ++
 .../input2/mmax_23647/Markables/sentence.xml  |  11 +
 .../input2/mmax_23647/Schemes/proteins.xml    |  16 +
 .../input2/mmax_23647/Schemes/sentence.xml    |   3 +
 .../mmax_23647/Styles/default_style.xsl       |  58 ++++
 .../input2/mmax_23647/common_paths.xml        |  17 +
 .../resources/input2/mmax_23647/project.mmax  |   7 +
 14 files changed, 537 insertions(+), 2 deletions(-)
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata.uri
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/Basedata.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/words.dtd
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/proteins.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/sentence.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/markables.dtd
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/proteins.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/sentence.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/proteins.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/sentence.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Styles/default_style.xsl
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/common_paths.xml
 create mode 100644 jcore-mmax2-reader/src/test/resources/input2/mmax_23647/project.mmax

diff --git a/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml b/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
index 8f3289029..6d5978b54 100644
--- a/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
+++ b/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
@@ -3,10 +3,17 @@
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <implementationName>de.julielab.jcore.cr.mmax2.MMAX2Reader</implementationName>
   <processingResourceMetaData>
-    <name>JCoRe MMAX2 reader.</name>
+    <name>JCoRe MMAX2 reader</name>
     <description>Collection reader for MMAX2 annotation projects.</description>
     <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
+      <configurationParameter>
+        <name>RemoveOverlappingShorterAnnotations</name>
+        <description>If set to true, for all overlapping annotations only the longest is kept.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
       <configurationParameter>
         <name>InputDir</name>
         <description>Should point to the directory of which the MMAX2 projects are sub directories of.</description>
@@ -36,7 +43,14 @@
         <mandatory>false</mandatory>
       </configurationParameter>
     </configurationParameters>
-    <configurationParameterSettings/>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>RemoveOverlappingShorterAnnotations</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
     <typeSystemDescription>
       <imports>
         <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata.uri b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata.uri
new file mode 100644
index 000000000..4e6d1a1f3
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata.uri
@@ -0,0 +1 @@
+14731280
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/Basedata.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/Basedata.xml
new file mode 100644
index 000000000..90e494de3
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/Basedata.xml
@@ -0,0 +1,299 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<!DOCTYPE words SYSTEM "words.dtd">
+<words>
+<word id="word_1">Multiple</word>
+<word id="word_2">stress</word>
+<word id="word_3">signal</word>
+<word id="word_4">integration</word>
+<word id="word_5">in</word>
+<word id="word_6">the</word>
+<word id="word_7">regulation</word>
+<word id="word_8">of</word>
+<word id="word_9">the</word>
+<word id="word_10">complex</word>
+<word id="word_11">sigma</word>
+<word id="word_12">S</word>
+<word id="word_13">-</word>
+<word id="word_14">dependent</word>
+<word id="word_15">csiD</word>
+<word id="word_16">-</word>
+<word id="word_17">ygaF</word>
+<word id="word_18">-</word>
+<word id="word_19">gabDTP</word>
+<word id="word_20">operon</word>
+<word id="word_21">in</word>
+<word id="word_22">Escherichia</word>
+<word id="word_23">coli</word>
+<word id="word_24">.</word>
+<word id="word_25">The</word>
+<word id="word_26">csiD</word>
+<word id="word_27">-</word>
+<word id="word_28">ygaF</word>
+<word id="word_29">-</word>
+<word id="word_30">gabDTP</word>
+<word id="word_31">region</word>
+<word id="word_32">in</word>
+<word id="word_33">the</word>
+<word id="word_34">Escherichia</word>
+<word id="word_35">coli</word>
+<word id="word_36">genome</word>
+<word id="word_37">represents</word>
+<word id="word_38">a</word>
+<word id="word_39">cluster</word>
+<word id="word_40">of</word>
+<word id="word_41">sigma</word>
+<word id="word_41.5">S</word>
+<word id="word_41.75">-</word>
+<word id="word_42">controlled</word>
+<word id="word_43">genes</word>
+<word id="word_44">.</word>
+<word id="word_45">Here</word>
+<word id="word_46">,</word>
+<word id="word_47">we</word>
+<word id="word_48">investigated</word>
+<word id="word_49">promoter</word>
+<word id="word_50">structures</word>
+<word id="word_51">,</word>
+<word id="word_52">sigma</word>
+<word id="word_53">factor</word>
+<word id="word_54">dependencies</word>
+<word id="word_55">,</word>
+<word id="word_56">potential</word>
+<word id="word_57">co</word>
+<word id="word_58">-</word>
+<word id="word_59">regulation</word>
+<word id="word_60">and</word>
+<word id="word_61">environmental</word>
+<word id="word_62">regulatory</word>
+<word id="word_63">patterns</word>
+<word id="word_64">for</word>
+<word id="word_65">all</word>
+<word id="word_66">of</word>
+<word id="word_67">these</word>
+<word id="word_68">genes</word>
+<word id="word_69">.</word>
+<word id="word_70">We</word>
+<word id="word_71">find</word>
+<word id="word_72">that</word>
+<word id="word_73">this</word>
+<word id="word_74">region</word>
+<word id="word_75">constitutes</word>
+<word id="word_76">a</word>
+<word id="word_77">complex</word>
+<word id="word_78">operon</word>
+<word id="word_79">with</word>
+<word id="word_80">expression</word>
+<word id="word_81">being</word>
+<word id="word_82">controlled</word>
+<word id="word_83">by</word>
+<word id="word_84">three</word>
+<word id="word_85">differentially</word>
+<word id="word_86">regulated</word>
+<word id="word_87">promoters</word>
+<word id="word_88">:</word>
+<word id="word_89">(i)</word>
+<word id="word_90">csiDp</word>
+<word id="word_91">,</word>
+<word id="word_92">which</word>
+<word id="word_93">affects</word>
+<word id="word_94">the</word>
+<word id="word_95">expression</word>
+<word id="word_96">of</word>
+<word id="word_97">all</word>
+<word id="word_98">five</word>
+<word id="word_99">genes</word>
+<word id="word_100">,</word>
+<word id="word_101">is</word>
+<word id="word_102">cAMP</word>
+<word id="word_103">-</word>
+<word id="word_104">CRP</word>
+<word id="word_105">/</word>
+<word id="word_106">sigma</word>
+<word id="word_107">S</word>
+<word id="word_108">-</word>
+<word id="word_109">dependent</word>
+<word id="word_110">and</word>
+<word id="word_111">activated</word>
+<word id="word_112">exclusively</word>
+<word id="word_113">upon</word>
+<word id="word_114">carbon</word>
+<word id="word_115">starvation</word>
+<word id="word_116">and</word>
+<word id="word_117">stationary</word>
+<word id="word_118">phase</word>
+<word id="word_119">;</word>
+<word id="word_120">(ii)</word>
+<word id="word_121">gabDp1</word>
+<word id="word_122">,</word>
+<word id="word_123">which</word>
+<word id="word_124">is</word>
+<word id="word_125">sigma</word>
+<word id="word_126">S</word>
+<word id="word_127">-</word>
+<word id="word_128">dependent</word>
+<word id="word_129">and</word>
+<word id="word_130">exhibits</word>
+<word id="word_131">multiple</word>
+<word id="word_132">stress</word>
+<word id="word_133">induction</word>
+<word id="word_134">like</word>
+<word id="word_135">sigma</word>
+<word id="word_136">S</word>
+<word id="word_137">itself</word>
+<word id="word_138">;</word>
+<word id="word_139">and</word>
+<word id="word_140">(iii)</word>
+<word id="word_141">gabDp2</word>
+<word id="word_142">[</word>
+<word id="word_143">previously</word>
+<word id="word_144">suggested</word>
+<word id="word_145">by</word>
+<word id="word_146">Schneider</word>
+<word id="word_147">,</word>
+<word id="word_148">B.L.</word>
+<word id="word_149">,</word>
+<word id="word_150">Ruback</word>
+<word id="word_151">,</word>
+<word id="word_152">S.</word>
+<word id="word_153">,</word>
+<word id="word_154">Kiupakis</word>
+<word id="word_155">,</word>
+<word id="word_156">A.K.</word>
+<word id="word_157">,</word>
+<word id="word_158">Kasbarian</word>
+<word id="word_159">,</word>
+<word id="word_160">H.</word>
+<word id="word_161">,</word>
+<word id="word_162">Pybus</word>
+<word id="word_163">,</word>
+<word id="word_164">C.</word>
+<word id="word_165">,</word>
+<word id="word_166">and</word>
+<word id="word_167">Reitzer</word>
+<word id="word_168">,</word>
+<word id="word_169">L.</word>
+<word id="word_170">(</word>
+<word id="word_171">2002</word>
+<word id="word_172">)</word>
+<word id="word_173">J.</word>
+<word id="word_174">Bacteriol.</word>
+<word id="word_175">184</word>
+<word id="word_176">:</word>
+<word id="word_177">6976-6986</word>
+<word id="word_178">]</word>
+<word id="word_179">,</word>
+<word id="word_180">which</word>
+<word id="word_181">appears</word>
+<word id="word_182">to</word>
+<word id="word_183">be</word>
+<word id="word_184">Nac</word>
+<word id="word_185">/</word>
+<word id="word_186">sigma</word>
+<word id="word_187">70</word>
+<word id="word_188">-</word>
+<word id="word_189">controlled</word>
+<word id="word_190">and</word>
+<word id="word_191">to</word>
+<word id="word_192">respond</word>
+<word id="word_193">to</word>
+<word id="word_194">poor</word>
+<word id="word_195">nitrogen</word>
+<word id="word_196">sources</word>
+<word id="word_197">.</word>
+<word id="word_198">In</word>
+<word id="word_199">addition</word>
+<word id="word_200">,</word>
+<word id="word_201">we</word>
+<word id="word_202">identify</word>
+<word id="word_203">a</word>
+<word id="word_204">novel</word>
+<word id="word_205">repressor</word>
+<word id="word_206">,</word>
+<word id="word_207">CsiR</word>
+<word id="word_208">,</word>
+<word id="word_209">which</word>
+<word id="word_210">modulates</word>
+<word id="word_211">csiDp</word>
+<word id="word_212">activity</word>
+<word id="word_213">in</word>
+<word id="word_214">a</word>
+<word id="word_215">temporal</word>
+<word id="word_216">manner</word>
+<word id="word_217">during</word>
+<word id="word_218">early</word>
+<word id="word_219">stationary</word>
+<word id="word_220">phase</word>
+<word id="word_221">.</word>
+<word id="word_222">Finally</word>
+<word id="word_223">,</word>
+<word id="word_224">we</word>
+<word id="word_225">propose</word>
+<word id="word_226">a</word>
+<word id="word_227">physiological</word>
+<word id="word_228">role</word>
+<word id="word_229">for</word>
+<word id="word_230">sigma</word>
+<word id="word_230.5">S</word>
+<word id="word_230.75">-</word>
+<word id="word_231">controlled</word>
+<word id="word_232">GabT</word>
+<word id="word_233">/</word>
+<word id="word_234">D</word>
+<word id="word_235">-</word>
+<word id="word_236">mediated</word>
+<word id="word_237">gamma-aminobutyrate</word>
+<word id="word_238">(</word>
+<word id="word_239">GABA</word>
+<word id="word_240">)</word>
+<word id="word_241">catabolism</word>
+<word id="word_242">and</word>
+<word id="word_243">glutamate</word>
+<word id="word_244">accumulation</word>
+<word id="word_245">in</word>
+<word id="word_246">general</word>
+<word id="word_247">stress</word>
+<word id="word_248">adaptation</word>
+<word id="word_249">.</word>
+<word id="word_250">This</word>
+<word id="word_251">physiological</word>
+<word id="word_252">role</word>
+<word id="word_253">is</word>
+<word id="word_254">reflected</word>
+<word id="word_255">by</word>
+<word id="word_256">the</word>
+<word id="word_257">activation</word>
+<word id="word_258">of</word>
+<word id="word_259">the</word>
+<word id="word_260">operon</word>
+<word id="word_261">-</word>
+<word id="word_262">internal</word>
+<word id="word_263">gabDp1</word>
+<word id="word_264">promoter</word>
+<word id="word_265">under</word>
+<word id="word_266">the</word>
+<word id="word_267">different</word>
+<word id="word_268">conditions</word>
+<word id="word_269">that</word>
+<word id="word_270">also</word>
+<word id="word_271">induce</word>
+<word id="word_272">sigma</word>
+<word id="word_273">S</word>
+<word id="word_274">,</word>
+<word id="word_275">which</word>
+<word id="word_276">include</word>
+<word id="word_277">shifts</word>
+<word id="word_278">to</word>
+<word id="word_279">acidic</word>
+<word id="word_280">pH</word>
+<word id="word_281">or</word>
+<word id="word_282">high</word>
+<word id="word_283">osmolarity</word>
+<word id="word_284">as</word>
+<word id="word_285">well</word>
+<word id="word_286">as</word>
+<word id="word_287">starvation</word>
+<word id="word_288">or</word>
+<word id="word_289">stationary</word>
+<word id="word_290">phase</word>
+<word id="word_291">.</word>
+</words>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/words.dtd b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/words.dtd
new file mode 100644
index 000000000..a02b470f1
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Basedata/words.dtd
@@ -0,0 +1,3 @@
+<!ELEMENT words (word*)>
+<!ELEMENT word (#PCDATA)>
+<!ATTLIST word id ID #REQUIRED>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/proteins.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/proteins.xml
new file mode 100644
index 000000000..0f4bd71f8
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/proteins.xml
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<customization>
+
+<rule pattern="proteins={protein_familiy_or_group}" style="background=d:212212212"/>
+<rule pattern="proteins={protein_complex}" style="background=green"/>
+<rule pattern="proteins={protein_enum}" style="background=red"/>
+<rule pattern="proteins={protein}" style="background=orange"/>
+<rule pattern="proteins={protein_variant}" style="background=d:064224208"/>
+
+
+
+
+<!--
+
+<rule pattern="t_cells={memory_t_cell}" style="background=black"/>
+<rule pattern="t_cells={memory_t_cell}" style="foreground=white"/>
+<rule pattern="t_cells={regulatory_t_cell}" style="background=d:072061139"/>
+<rule pattern="t_cells={regulatory_t_cell}" style="foreground=white"/>
+<rule pattern="t_cells={cytotoxic_t_cell}" style="background=d:064224208"/>
+<rule pattern="t_cells={naive_t_cell}" style="background=d:255140000"/>
+<rule pattern="t_cells={naive_t_cell}" style="foreground=blue"/>
+<rule pattern="t_cells={nk_t_cell}" style="background=d:255140000"/>
+<rule pattern="t_cells={nk_t_cell}" style="foreground=green"/>
+
+
+<rule pattern="thymocytes={thymocyte}" style="background=d:255140000"/>
+<rule pattern="thymocytes={pro_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn2_immature_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn2_immature_t_cell}" style="foreground=blue"/>
+<rule pattern="thymocytes={dn3_immature_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn3_immature_t_cell}" style="foreground=white"/>
+<rule pattern="thymocytes={dn4_alpha_beta_immature_t_cell}" style="background=green"/>
+<rule pattern="thymocytes={dn4_alpha_beta_immature_t_cell}" style="foreground=red"/>
+
+
+<rule pattern="t_helper_cells={t_helper_cell}" style="background=d:034139034"/>
+<rule pattern="t_helper_cells={t_helper_1_cell}" style="background=d:034139034"/>
+<rule pattern="t_helper_cells={t_helper_1_cell}" style="foreground=blue"/>
+<rule pattern="t_helper_cells={t_helper_2_cell}" style="background=d:034139034"/>
+<rule pattern="t_helper_cells={t_helper_2_cell}" style="foreground=red"/>
+
+
+<rule pattern="transplant_status={donor}" style="background=d:128000128"/>
+<rule pattern="transplant_status={donor}" style="foreground=blue"/>
+<rule pattern="transplant_status={recipient}" style="background=d:128000128"/>
+<rule pattern="transplant_status={recipient}" style="foreground=red"/>
+<rule pattern="age={embryonic}" style="background=green"/>
+<rule pattern="age={fetal}" style="background=green"/>
+<rule pattern="age={fetal}" style="foreground=blue"/>
+<rule pattern="age={infant}" style="background=green"/>
+<rule pattern="age={infant}" style="foreground=white"/>
+<rule pattern="age={adult}" style="background=green"/>
+<rule pattern="age={adult}" style="foreground=red"/>
+<rule pattern="organism={human}" style="background=d:255215000"/>
+<rule pattern="organism={mouse}" style="background=blue"/>
+<rule pattern="organism={mouse}" style="foreground=white"/>
+<rule pattern="organism={rat}" style="background=red"/>
+<rule pattern="organism={dog}" style="background=black"/>
+<rule pattern="organism={dog}" style="foreground=white"/>
+<rule pattern="organism={animal}" style="background=d:072061139"/>
+<rule pattern="organism={animal}" style="foreground=white"/>
+<rule pattern="organism={bacterium}" style="background=d:064224208"/>
+<rule pattern="organism={ebv_virus}" style="background=d:255140000"/>
+<rule pattern="organism={ebv_virus}" style="foreground=blue"/>
+<rule pattern="organism={cmv_virus}" style="background=d:255140000"/>
+<rule pattern="organism={cmv_virus}" style="foreground=green"/>
+<rule pattern="organism={virus}" style="background=d:255140000"/>
+-->
+
+</customization>
+
+
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/sentence.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/sentence.xml
new file mode 100644
index 000000000..6fbf9d136
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Customizations/sentence.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<customization>
+</customization>
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/markables.dtd b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/markables.dtd
new file mode 100644
index 000000000..220e8b3c8
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/markables.dtd
@@ -0,0 +1,2 @@
+<!ELEMENT markables (markable*)>
+<!ATTLIST markable id ID #REQUIRED>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/proteins.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/proteins.xml
new file mode 100644
index 000000000..1a5bd6616
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/proteins.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE markables SYSTEM "markables.dtd">
+<markables xmlns="www.eml.org/NameSpaces/proteins">
+<markable id="markable_8" span="word_11..word_12" proteins="protein" />
+<markable id="markable_9" span="word_19..word_20" proteins="protein_familiy_or_group" />
+<markable id="markable_16" span="word_104" proteins="protein" />
+<markable id="markable_15" span="word_90" proteins="protein" />
+<markable id="markable_18" span="word_121" proteins="protein" />
+<markable id="markable_17" span="word_106..word_107" proteins="protein" />
+<markable id="markable_12" span="word_30" proteins="protein_familiy_or_group" />
+<markable id="markable_11" span="word_28" proteins="protein" />
+<markable id="markable_14" span="word_52..word_53" proteins="protein_familiy_or_group" />
+<markable id="markable_13" span="word_41..word_41.5" proteins="protein" />
+<markable id="markable_10" span="word_26" proteins="protein" />
+<markable id="markable_30" span="word_17" proteins="protein_familiy_or_group" />
+<markable id="markable_32" span="word_15" proteins="protein" />
+<markable id="markable_31" span="word_17" proteins="protein" />
+<markable id="markable_19" span="word_125..word_126" proteins="protein" />
+<markable id="markable_25" span="word_211" proteins="protein" />
+<markable id="markable_24" span="word_207" proteins="protein" />
+<markable id="markable_23" span="word_186..word_187" proteins="protein" />
+<markable id="markable_22" span="word_184" proteins="protein" />
+<markable id="markable_29" span="word_272..word_273" proteins="protein" />
+<markable id="markable_28" span="word_263" proteins="protein" />
+<markable id="markable_27" span="word_232..word_234" proteins="protein_enum" />
+<markable id="markable_26" span="word_230..word_230.5" proteins="protein" />
+<markable id="markable_21" span="word_141" proteins="protein" />
+<markable id="markable_20" span="word_135..word_136" proteins="protein" />
+</markables>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/sentence.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/sentence.xml
new file mode 100644
index 000000000..c35553af7
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Markables/sentence.xml
@@ -0,0 +1,11 @@
+<?xml version="1.0"?>
+<!DOCTYPE markables SYSTEM "markables.dtd">
+<markables xmlns="www.eml.org/NameSpaces/sentence">
+<markable id="markable_1" span="word_1..word_24"/>
+<markable id="markable_2" span="word_25..word_44"/>
+<markable id="markable_3" span="word_45..word_69"/>
+<markable id="markable_4" span="word_70..word_197"/>
+<markable id="markable_5" span="word_198..word_221"/>
+<markable id="markable_6" span="word_222..word_249"/>
+<markable id="markable_7" span="word_250..word_291"/>
+</markables>
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/proteins.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/proteins.xml
new file mode 100644
index 000000000..1045dc27e
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/proteins.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<annotationscheme>
+
+  <attribute id="proteins" name="proteins" text="" type="nominal_button">
+
+<value id="protein_familiy_or_group" name="protein_familiy_or_group"/>
+    <value id="protein_complex" name="protein_complex"/>
+    <value id="protein_enum" name="protein_enum"/>
+    <value id="protein" name="protein"/>
+   <value id="protein_variant" name="protein_variant"/>
+
+   </attribute>
+
+
+
+</annotationscheme>
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/sentence.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/sentence.xml
new file mode 100644
index 000000000..f37fbc936
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Schemes/sentence.xml
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<annotationscheme>
+</annotationscheme>
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Styles/default_style.xsl b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Styles/default_style.xsl
new file mode 100644
index 000000000..ab671aa34
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/Styles/default_style.xsl
@@ -0,0 +1,58 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
+xmlns:mmax="org.eml.MMAX2.discourse.MMAX2DiscourseLoader"
+xmlns:proteins="www.eml.org/NameSpaces/proteins"
+xmlns:sentence="www.eml.org/NameSpaces/sentence">
+<xsl:output method="text" indent="no" omit-xml-declaration="yes"/>
+<xsl:strip-space elements="*"/>
+
+
+<xsl:template match="words">
+
+  <xsl:apply-templates/>
+
+</xsl:template>
+
+<xsl:template match="word">
+
+  <xsl:value-of select="mmax:registerDiscourseElement(@id)"/>
+
+  <xsl:apply-templates select="mmax:getStartedMarkables(@id)" mode="opening"/>
+
+<xsl:value-of select="mmax:setDiscourseElementStart()"/>
+<xsl:value-of select="mmax:startBold()"/>
+   <xsl:apply-templates/>
+<xsl:value-of select="mmax:endBold()"/>
+  <xsl:value-of select="mmax:setDiscourseElementEnd()"/>
+
+  <xsl:apply-templates select="mmax:getEndedMarkables(@id)" mode="closing"/>
+
+<xsl:text> </xsl:text>
+
+</xsl:template>
+
+<xsl:template match="proteins:markable" mode="opening">
+<xsl:value-of select="mmax:startBold()"/>
+<xsl:value-of select="mmax:addLeftMarkableHandle(@mmax_level, @id, '[')"/>
+<xsl:value-of select="mmax:endBold()"/>
+</xsl:template>
+
+<xsl:template match="proteins:markable" mode="closing">
+<xsl:value-of select="mmax:startBold()"/>
+<xsl:value-of select="mmax:addRightMarkableHandle(@mmax_level, @id, ']')"/>
+<xsl:value-of select="mmax:endBold()"/>
+</xsl:template>
+
+
+<xsl:template match="sentence:markable" mode="closing">
+
+<xsl:value-of select="mmax:startSubscript()"/>
+ <xsl:text>
+</xsl:text>
+<xsl:value-of select="mmax:endSubscript()"/>
+
+</xsl:template>
+
+
+</xsl:stylesheet>
+
+
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/common_paths.xml b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/common_paths.xml
new file mode 100644
index 000000000..8f55971b4
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/common_paths.xml
@@ -0,0 +1,17 @@
+<?xml version="1.0"?>
+<!DOCTYPE common_paths>
+<common_paths>
+<basedata_path>Basedata/</basedata_path>
+<markable_path>Markables/</markable_path>
+<scheme_path>Schemes/</scheme_path>
+<style_path>Styles/</style_path>
+<customization_path>Customizations/</customization_path>
+<views><stylesheet>default_style.xsl</stylesheet>
+</views>
+<annotations>
+<level name="proteins" schemefile="proteins.xml" customization_file="proteins.xml">proteins.xml</level>
+<level name="sentence" schemefile="sentence.xml" customization_file="sentence.xml" at_startup="visible">sentence.xml</level>
+</annotations>
+<user_switches>
+</user_switches>
+</common_paths>
\ No newline at end of file
diff --git a/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/project.mmax b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/project.mmax
new file mode 100644
index 000000000..52fc0b1c1
--- /dev/null
+++ b/jcore-mmax2-reader/src/test/resources/input2/mmax_23647/project.mmax
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<mmax_project>
+<turns></turns>
+<words>Basedata.xml</words>
+<gestures></gestures>
+<keyactions></keyactions>
+</mmax_project>
\ No newline at end of file

From 12664a548e8bbae79da694f1911af45f80db6646 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 11:42:29 +0200
Subject: [PATCH 194/269] Allow the BANNERAnnotator to set the componentId via
 a parameter.

---
 jcore-banner-ae/pom.xml                       |  4 ++
 .../jcore/ae/banner/BANNERAnnotator.java      |  6 ++-
 .../jcore/ae/banner/desc/jcore-banner-ae.xml  | 38 +++++++++++--------
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/jcore-banner-ae/pom.xml b/jcore-banner-ae/pom.xml
index ec5a25e53..26e4df8e2 100644
--- a/jcore-banner-ae/pom.xml
+++ b/jcore-banner-ae/pom.xml
@@ -58,6 +58,10 @@
             <artifactId>jcore-mallet-2.0.9</artifactId>
             <version>2.1.2</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-java-utilities</artifactId>
diff --git a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
index a29132d5c..43b29b9fd 100644
--- a/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
+++ b/jcore-banner-ae/src/main/java/de/julielab/jcore/ae/banner/BANNERAnnotator.java
@@ -50,6 +50,7 @@ public class BANNERAnnotator extends JCasAnnotator_ImplBase {
 
     public static final String PARAM_CONFIG_FILE = "ConfigFile";
     public static final String PARAM_TYPE_MAPPING = "TypeMapping";
+    public static final String PARAM_COMPONENT_ID = "ComponentId";
     private final static Logger log = LoggerFactory.getLogger(BANNERAnnotator.class);
     private Tokenizer tokenizer;
     private DictionaryTagger dictionary;
@@ -64,6 +65,8 @@ public class BANNERAnnotator extends JCasAnnotator_ImplBase {
     private String configFilePath;
     @ConfigurationParameter(name = PARAM_TYPE_MAPPING, mandatory = false, description = "A list of mappings from entity labels to UIMA types in the form <label>=<fully qualified type name>. If not given, all entities will be realized as EntityMention instances.")
     private String[] typeMappings;
+    @ConfigurationParameter(name = PARAM_COMPONENT_ID, mandatory = false, description = "Specifies the value of the 'componentId' feature for created entity annotations. Defaults to the fully qualified name of this class.")
+    private String componentId;
 
     private Map<String, String> typeMap;
     private InputStream modelIs;
@@ -77,6 +80,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
             configFilePath = (String) aContext.getConfigParameterValue(PARAM_CONFIG_FILE);
             typeMappings = (String[]) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_TYPE_MAPPING))
                     .orElse(new String[0]);
+            componentId = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_COMPONENT_ID)).orElse(BANNERAnnotator.class.getCanonicalName());
             File configFile = new File(configFilePath);
             if (configFile.exists()) {
                 log.debug("Found configuration file {}", configFile);
@@ -183,7 +187,7 @@ public void process(JCas jcas) throws AnalysisEngineProcessException {
                     if (a instanceof de.julielab.jcore.types.Annotation) {
                         de.julielab.jcore.types.Annotation jcoreA = (de.julielab.jcore.types.Annotation) a;
                         jcoreA.setId("BANNER, " + docId + ": " + geneCount++);
-                        jcoreA.setComponentId(BANNERAnnotator.class.getCanonicalName());
+                        jcoreA.setComponentId(componentId);
                         jcoreA.setConfidence(String.valueOf(mention.getProbability()));
                     }
                     if (a instanceof EntityMention) {
diff --git a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
index 6eddce439..1b6be9026 100644
--- a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
+++ b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
@@ -1,29 +1,37 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <primitive>true</primitive>  <annotatorImplementationName>de.julielab.jcore.ae.banner.BANNERAnnotator</annotatorImplementationName>
+  <primitive>true</primitive>
+  <annotatorImplementationName>de.julielab.jcore.ae.banner.BANNERAnnotator</annotatorImplementationName>
   <analysisEngineMetaData>
-    <name>jcore-banner-ae</name>
-    <description />
-    <version>2.6.0-SNAPSHOT</version>
-        <vendor />
+    <name>de.julielab.jcore.ae.banner.BANNERAnnotator</name>
+    <description>Descriptor automatically generated by uimaFIT</description>
+    <version>unknown</version>
+    <vendor>de.julielab.jcore.ae.banner</vendor>
     <configurationParameters>
       <configurationParameter>
         <name>ConfigFile</name>
-        <description>An XML configuration file that does all settings for the BANNER AnalysisEngine. May point to a regular file or a classpath resource.</description>
+        <description>The XML configuration file for BANNER.</description>
         <type>String</type>
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
-    <configurationParameter>
+      <configurationParameter>
         <name>TypeMapping</name>
         <description>A list of mappings from entity labels to UIMA types in the form &lt;label&gt;=&lt;fully qualified type name&gt;. If not given, all entities will be realized as EntityMention instances.</description>
         <type>String</type>
         <multiValued>true</multiValued>
         <mandatory>false</mandatory>
       </configurationParameter>
+      <configurationParameter>
+        <name>ComponentId</name>
+        <description>Specifies the value of the 'componentId' feature for created entity annotations. Defaults to the fully qualified name of this class.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
     </configurationParameters>
-    <configurationParameterSettings />
+    <configurationParameterSettings/>
     <typeSystemDescription>
       <imports>
         <import name="de.julielab.jcore.types.jcore-document-structure-types" />
@@ -32,24 +40,22 @@
         <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
       </imports>
     </typeSystemDescription>
-    <typePriorities />
-    <fsIndexCollection />
+    <fsIndexCollection/>
     <capabilities>
       <capability>
         <inputs>
-          <type allAnnotatorFeatures="true">de.julielab.jcore.types.Sentence</type>
+          <type>de.julielab.jcore.types.Sentence</type>
         </inputs>
         <outputs>
-          <type allAnnotatorFeatures="true">de.julielab.jcore.types.Gene</type>
+          <type>de.julielab.jcore.types.Gene</type>
         </outputs>
-        <languagesSupported />
+        <languagesSupported/>
       </capability>
     </capabilities>
-  <operationalProperties>
+    <operationalProperties>
       <modifiesCas>true</modifiesCas>
       <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
       <outputsNewCASes>false</outputsNewCASes>
     </operationalProperties>
   </analysisEngineMetaData>
-  <resourceManagerConfiguration />
 </analysisEngineDescription>
\ No newline at end of file

From 7da3af4df10586e98d79ab5638f2cfd43359bd2d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 11:43:38 +0200
Subject: [PATCH 195/269] Be more lenient for cases where GNormPlus did not
 specify a species for families.

If there are not given, just don't set them.
---
 .../de/julielab/jcore/reader/BioCCasPopulator.java   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 553224c3a..22746e634 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -253,12 +253,12 @@ private void addFamilyAnnotation(BioCAnnotation annotation, JCas jCas) {
         gene.setSpecificType("FamilyName");
         // e.g.  <infon key="FocusSpecies">NCBITaxonomyID:9606</infon>
         Optional<String> focusSpecies = annotation.getInfon("FocusSpecies");
-        if (!focusSpecies.isPresent())
-            throw new IllegalStateException("A FamilyName annotation does not specify its species: " + annotation);
-        String taxId = focusSpecies.get().substring(15);
-        StringArray speciesArray = new StringArray(jCas, 1);
-        speciesArray.set(0, taxId);
-        gene.setSpecies(speciesArray);
+        if (focusSpecies.isPresent()) {
+            String taxId = focusSpecies.get().substring(15);
+            StringArray speciesArray = new StringArray(jCas, 1);
+            speciesArray.set(0, taxId);
+            gene.setSpecies(speciesArray);
+        }
         gene.addToIndexes();
     }
 

From 548f8606390db6db12292e65d4164ca8f4020770 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 11:44:04 +0200
Subject: [PATCH 196/269] Add document meta types to the GNP BioC Writer
 descriptor.

---
 .../de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index c642193cf..23ca9529a 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -28,7 +28,8 @@ public BioCDocument populate(JCas jCas) {
         AnnotationIndex<Zone> zoneIndex = jCas.getAnnotationIndex(Zone.type);
         int annotationId = 0;
         for (Zone z : zoneIndex) {
-            if (z.getEnd() - z.getBegin() <= 0)
+            // skip empty zones
+            if (z.getCoveredText().isBlank())
                 continue;
             BioCPassage p = null;
             if (z instanceof Title) {

From 30288958dc17ec692f1d8e44a2bdc90185c3755c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 11:44:45 +0200
Subject: [PATCH 197/269] Fix an issue in the EntityEvaluatorConsumer where the
 document ID could not be retrieved.

---
 .../consumer/entityevaluator/OffsetsColumn.java   | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/OffsetsColumn.java b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/OffsetsColumn.java
index 44d08b055..0b5c599d5 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/OffsetsColumn.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/OffsetsColumn.java
@@ -15,11 +15,13 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.cas.TOP;
 import org.apache.uima.jcas.tcas.Annotation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.util.*;
 
 public class OffsetsColumn extends Column {
-
+private final static Logger log = LoggerFactory.getLogger(OffsetsColumn.class);
     private OffsetMode offsetMode;
     private JCoReTreeMapAnnotationIndex<Long, ? extends Annotation> sentenceIndex;
     private OffsetScope offsetScope;
@@ -61,9 +63,14 @@ public Deque<String> getValue(TOP a, JCas aJCas) {
 
             if (offsetScope == OffsetScope.Sentence) {
                 Annotation s = sentenceIndex.get(an);
-                if (this.offsetMode == OffsetMode.NonWsCharacters)
-                    numWsMap = getNumWsMapForSentence(s);
-                annotationOffset = s.getBegin();
+                if (s != null) {
+                    if (this.offsetMode == OffsetMode.NonWsCharacters)
+                        numWsMap = getNumWsMapForSentence(s);
+                    annotationOffset = s.getBegin();
+                } else {
+                    log.warn("There was no sentence for annotation {}, returning begin offset as -1.", an);
+                    annotationOffset = -1;
+                }
             }
 
             final String offsets = getOffsets(an, numWsMap, annotationOffset);

From a9609e5ffc1213361f1442291a136cf3451ee84c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 11:45:32 +0200
Subject: [PATCH 198/269] Add a test to better understand how stopwords
 influence the tagging result.

---
 .../uima/GazetteerAnnotatorTest.java          | 57 +++++++++++--------
 ...tteer.eg.testdict.teststopwords.properties |  8 +++
 .../src/test/resources/stopwords.test         |  1 +
 3 files changed, 43 insertions(+), 23 deletions(-)
 create mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.eg.testdict.teststopwords.properties
 create mode 100644 jcore-lingpipegazetteer-ae/src/test/resources/stopwords.test

diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 7e3197bd6..329a08d32 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -456,34 +456,13 @@ public void testAnnotateAcronymsWithFullFormEntity() throws Exception {
 			}
 			assertEquals("GENE", next.getSpecificType());
 		}
-		assertEquals( new Integer(1),  counter, "Wrong entity count: ");
+		assertEquals( Integer.valueOf(1),  counter, "Wrong entity count: ");
 	}
 
-	@Test
-	public void testAnnotatorWithTextNormalizationMuh()
-			throws ResourceInitializationException, AnalysisEngineProcessException {
-		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
-				ChunkerProviderImplAlt.class, new File("src/test/resources/normalizegazetteer.properties"));
-		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
-				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
-
-		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
-				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
-				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
-		JCas jCas = annotator.newJCas();
-
-		jCas.setDocumentText("We shall now describe our system setup followed by our proposed solution, which is a fully distributed and absolute localization solution specifically designed for both one-hop and multi-hop WSNs. Our considered WSN consists of Ns number of sensors randomly placed onto a map of predefined size with Nb number of beacons. Let 𝕊 and 𝔹 be the sets describing all sensors and beacons respectively, where each sensor is noted as Sensori, i ∈ 𝕊 and each beacon is noted as Beaconj, j ∈ 𝔹. Each node either a sensor or a beacon is noted as Nodep, p ∈ 𝕊 ∪ 𝔹, and vector V⃗p is used to represent the coordinate of Nodep. Beacons are placed onto the map with fixed coordinates V⃗j, where j ∈ 𝔹. We assume that each beacon is aware of its own absolute location. Whereas each sensor is unaware of its own location, and is configured with an initial guess of location unrelated to its actual deployed location. The two-dimensional (2-D) localization problem is the estimation of Ns unknown-location coordinates V⃗i, where i ∈ 𝕊.\n");
-		annotator.process(jCas);
 
-		FSIterator<org.apache.uima.jcas.tcas.Annotation> it = jCas.getAnnotationIndex(EntityMention.type).iterator();
-while (it.hasNext()) {
-	Annotation annotation = (Annotation) it.next();
-	System.out.println(annotation.getCoveredText());
-}
-	}
 
 	@Test
-	public void testSontesthalt() throws Exception {
+	public void testGeneRecognition() throws Exception {
 		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
 				ChunkerProviderImplAlt.class, new File("src/test/resources/normalizegazetteer.eg.testdict.properties"));
 		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
@@ -536,6 +515,38 @@ public void testSontesthalt() throws Exception {
 		assertEquals("Yak1", it.next().getCoveredText());
 	}
 
+	@Test
+	public void testStopwords() throws Exception {
+		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
+				ChunkerProviderImplAlt.class, new File("src/test/resources/normalizegazetteer.eg.testdict.teststopwords.properties"));
+		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
+				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
+
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
+				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
+				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
+
+		JCas jCas = annotator.newJCas();
+
+		// Warning: This text does not make sense ;-)
+		jCas.setDocumentText(
+				"Identification of cDNAs encoding two human alpha class microsomal glutathione and the heterologous expression of glutathione S-transferase alpha-4.");
+
+		annotator.process(jCas);
+
+		Set<String> extractedGenes = new HashSet<>();
+		for (var e : JCasUtil.select(jCas, EntityMention.class)) {
+			extractedGenes.add(e.getCoveredText());
+		}
+		// The stop word list contains the term "glutathione"
+		// The current algorithm in GazetteerAnnotator#filterStopwords(String) computes the fraction that the
+		// stop word has on the whole entity and only rejects it if it exceeds some threshold. For this reason,
+		// the shorter mention is excluded while the longer is retained.
+		assertThat(extractedGenes).doesNotContain("microsomal glutathione");
+		// The whole "glutathione S-transferase alpha-4" is on the stop word list.
+		assertThat(extractedGenes).contains("glutathione S-transferase alpha-4");
+	}
+
 	@Test
 	public void testApproximate() throws Exception {
 		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.eg.testdict.teststopwords.properties b/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.eg.testdict.teststopwords.properties
new file mode 100644
index 000000000..3eda6cdbb
--- /dev/null
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/normalizegazetteer.eg.testdict.teststopwords.properties
@@ -0,0 +1,8 @@
+DictionaryFile=src/test/resources/eg.testdict
+StopWordFile=src/test/resources/stopwords.test
+NormalizeText=true
+TransliterateText=true
+UseApproximateMatching=true
+MakeVariants=false
+CaseSensitive=false
+
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/stopwords.test b/jcore-lingpipegazetteer-ae/src/test/resources/stopwords.test
new file mode 100644
index 000000000..b2da0a5ec
--- /dev/null
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/stopwords.test
@@ -0,0 +1 @@
+glutathione

From 0abf57b81c1c2a330b5f98fe18efbc1d8dd38a55 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 11:45:58 +0200
Subject: [PATCH 199/269] Add document meta pubmed types to the XMI DB reader
 descriptor.

---
 .../julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index fa909d57c..6fe2b6a03 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -28,6 +28,7 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
             </imports>

From b42c59d5168c0f79afd043bf700b9ce8d04e11a5 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 4 May 2022 17:09:14 +0200
Subject: [PATCH 200/269] Set the componentId for Genes in the GNP BioC reader.

---
 .../src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 22746e634..9788261de 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -219,6 +219,7 @@ private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws Miss
         // for GNormPlus, there are no discontinuing annotations anyway
         BioCLocation location = annotation.getTotalLocation();
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
+        gene.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
         gene.setSpecificType("Gene");
         // one gene mention might have multiple IDs when there are ranges or enumerations, e.g. "IL2-5", "B7-1 and B7-2" or "B7-1/2"
         String[] geneIds = geneId.get().split(";");

From b5177156b1e93d65f7d75cc5e9d49d9612778f57 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 5 May 2022 08:00:47 +0200
Subject: [PATCH 201/269] Push dependency versions to fix the tests.

---
 jcore-neo4j-relations-consumer/pom.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index 3c5d394e3..8dbc00784 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -42,13 +42,13 @@
         <dependency>
             <groupId>org.neo4j.test</groupId>
             <artifactId>neo4j-harness</artifactId>
-            <version>4.0.4</version>
+            <version>4.4.2</version>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
-            <version>3.0.1-SNAPSHOT</version>
+            <version>3.1.0-SNAPSHOT</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>

From ecd86ffef66fb32570f250b0ad54bceeeb741842 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 5 May 2022 16:51:35 +0200
Subject: [PATCH 202/269] Support boolean values in RawToken#write.

---
 .../de/julielab/jcore/consumer/es/preanalyzed/RawToken.java     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/preanalyzed/RawToken.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/preanalyzed/RawToken.java
index 4c73f5b9a..4c4b08e58 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/preanalyzed/RawToken.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/preanalyzed/RawToken.java
@@ -64,6 +64,8 @@ public void write(JsonWriter out, RawToken token) throws IOException {
 				out.value((String) tokenValue);
 			else if (tokenValue instanceof Number)
 				out.value((Number) tokenValue);
+			else if (tokenValue instanceof Boolean)
+				out.value((boolean) tokenValue);
 			else if (tokenValue == null)
 				out.nullValue();
 			else

From 5031fe6c2762427708b260c6176b34acc743ff9c Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 10 May 2022 15:31:01 +0200
Subject: [PATCH 203/269] Exclude the commons-cli library from the biosem-ae.

It collides with the JCoRe pipeline runner and is not needed anyway.
---
 jcore-biosem-ae/pom.xml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/jcore-biosem-ae/pom.xml b/jcore-biosem-ae/pom.xml
index 42cfd7194..321563826 100644
--- a/jcore-biosem-ae/pom.xml
+++ b/jcore-biosem-ae/pom.xml
@@ -39,6 +39,12 @@
             <groupId>de.julielab</groupId>
             <artifactId>biosem-event-extractor</artifactId>
             <version>1.1.7</version>
+            <exclusions>
+                <exclusion>
+                    <groupId>commons-cli</groupId>
+                    <artifactId>commons-cli</artifactId>
+                </exclusion>
+            </exclusions>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>

From ea99de99d284cde71cf5d3c00796d71b79fa3817 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 10 May 2022 15:32:38 +0200
Subject: [PATCH 204/269] Minor changes.

---
 .../jcore/ae/eventflattener/EventFlattener.java    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/jcore-event-flattener-ae/src/main/java/de/julielab/jcore/ae/eventflattener/EventFlattener.java b/jcore-event-flattener-ae/src/main/java/de/julielab/jcore/ae/eventflattener/EventFlattener.java
index 5a7e09497..10cca7a88 100644
--- a/jcore-event-flattener-ae/src/main/java/de/julielab/jcore/ae/eventflattener/EventFlattener.java
+++ b/jcore-event-flattener-ae/src/main/java/de/julielab/jcore/ae/eventflattener/EventFlattener.java
@@ -40,7 +40,7 @@
  * roles for arguments of those event types. For more information, please refer
  * to http://www.nactem.ac.uk/tsujii/GENIA/SharedTask/detail.shtml#event.
  * 
- * @see http://www.nactem.ac.uk/tsujii/GENIA/SharedTask/detail.shtml#event
+ * @see <url>http://www.nactem.ac.uk/tsujii/GENIA/SharedTask/detail.shtml#event</url>
  *      </p>
  * 
  * @author faessler
@@ -59,9 +59,9 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
 			List<EventMention> topEvents = determineTopEvents(aJCas);
 			for (EventMention topEvent : topEvents) {
 				List<EventMention> events = collectEventsInTree(topEvent,
-						new ArrayList<EventMention>());
+						new ArrayList<>());
 				List<ArgumentMention> arguments = collectPrimitiveArguments(
-						topEvent, new ArrayList<ArgumentMention>());
+						topEvent, new ArrayList<>());
 				List<ArgumentMention> agentArguments = null;
 				List<ArgumentMention> patientArguments = null;
 				switch (topEvent.getSpecificType()) {
@@ -69,9 +69,9 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
 				case "Positive_regulation":
 				case "Negative_regulation":
 					agentArguments = collectAgentArguments(topEvent,
-							new ArrayList<ArgumentMention>());
+							new ArrayList<>());
 					patientArguments = collectPatientArguments(topEvent,
-							new ArrayList<ArgumentMention>());
+							new ArrayList<>());
 					break;
 				default:
 					break;
@@ -98,7 +98,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
 		} catch (Exception e) {
 			Header header = (Header) aJCas.getAnnotationIndex(Header.type)
 					.iterator().next();
-			log.error("Exception occurred in document {}: {}",
+			log.error("Exception occurred in document {}:",
 					header.getDocId(), e);
 			throw new AnalysisEngineProcessException(e);
 		}
@@ -178,7 +178,7 @@ private List<ArgumentMention> collectPrimitiveArguments(
 	 * Returns the <tt>EventMention</tt>s in the CAS that are not the argument
 	 * of another event.
 	 * 
-	 * @param events
+	 * @param aJCas
 	 * @return
 	 */
 	private List<EventMention> determineTopEvents(JCas aJCas) {

From 651df2909877bc32cd0506b09d58993e5e850350 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 11 May 2022 18:09:25 +0200
Subject: [PATCH 205/269] Add a RegExReplaceFilter to the Es consumer.

---
 .../es/filter/RegExReplaceFilter.java         | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/RegExReplaceFilter.java

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/RegExReplaceFilter.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/RegExReplaceFilter.java
new file mode 100644
index 000000000..5eb554b76
--- /dev/null
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/filter/RegExReplaceFilter.java
@@ -0,0 +1,36 @@
+package de.julielab.jcore.consumer.es.filter;
+
+import java.util.List;
+
+/**
+ * Replaces portions of terms according to the given regular expression and replacement string.
+ * @author faessler
+ *
+ */
+public class RegExReplaceFilter extends AbstractFilter {
+
+	private String regex;
+	private String replacement;
+	private boolean replaceAll;
+
+	public RegExReplaceFilter(String regex, String replacement, boolean replaceAll) {
+		this.regex = regex;
+		this.replacement = replacement;
+		this.replaceAll = replaceAll;
+	}
+	
+	@Override
+	public List<String> filter(String input) {
+		newOutput();
+		if (input != null) {
+			output.add(replaceAll ? input.replaceAll(regex, replacement) : input.replaceFirst(regex, replacement));
+		}
+		return output;
+	}
+
+	@Override
+	public Filter copy() {
+		return new RegExReplaceFilter(regex, replacement, replaceAll);
+	}
+
+}

From a586d0082954d57aafeda5b29a862b8373a14edd Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 17 May 2022 11:05:10 +0200
Subject: [PATCH 206/269] Skip lines beginning with '#' in LingpipeGazetteer,
 ChunkerProviderImplAlt.

---
 .../ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
index 23f4800d6..302621969 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
@@ -279,6 +279,8 @@ private void readDictionary(InputStream dictFileStream) throws IOException, Anal
 			if (normalize)
 				tokenizerFactory = new IndoEuropeanTokenizerFactory();
 			while ((line = bf.readLine()) != null) {
+				if (line.startsWith("#"))
+					continue;
 				String[] values = line.split("\t");
 				if (values.length != 2) {
 					LOGGER.error("readDictionary() - wrong format of line: " + line);

From 2c9f7b7e003a526d6497b872c265cbb95b0bbb3b Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 17 May 2022 11:14:17 +0200
Subject: [PATCH 207/269] Skip lines beginning with '#' in LingpipeGazetteer,
 ConfigurableChunkerProviderImplAlt.

---
 .../chunking/ConfigurableChunkerProviderImplAlt.java            | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
index 8d9e63b44..f0809f759 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
@@ -239,6 +239,8 @@ private void readDictionary(InputStream dictFileStream) throws IOException, Anal
             if (normalize)
                 tokenizerFactory = new IndoEuropeanTokenizerFactory();
             while ((line = bf.readLine()) != null) {
+                if (line.startsWith("#"))
+                    continue;
                 String[] values = line.split("\t");
                 if (values.length != 2) {
                     LOGGER.error("readDictionary() - wrong format of line: " + line);

From 43958b30337818a2245305c396fedeab87fe468d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 23 May 2022 18:47:37 +0200
Subject: [PATCH 208/269] Adding a feature to first delete index documents
 associated with the current CAS document and then index anew.

In relation index there is the issue that depending on some non-deterministic annotation orderings the relation annotation IDs differ from pipeline run to pipeline run. This causes the appearance of index document duplicates just because their ID is different due to some side effects. The new feature should allow to first delete all index documents associated with the PubMed document to avoid this issue.
Currently working on a test for this.
---
 .../consumer/es/ElasticSearchConsumer.java    | 85 ++++++++++++++++++-
 .../consumer/es/ElasticSearchConsumerIT.java  | 44 +++++++++-
 2 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java
index e92be2e6a..8bef9330a 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java
@@ -35,18 +35,26 @@ public class ElasticSearchConsumer extends AbstractCasToJsonConsumer {
      */
     public static final String PARAM_TYPE = "type";
     public static final String PARAM_BATCH_SIZE = "batchSize";
+    public static final String PARAM_DELETE_DOCS_BEFORE_INDEXING = "deleteDocumentsBeforeIndexing";
+    public static final String PARAM_DOC_ID_FIELD = "documentIdField";
     final Logger log = LoggerFactory.getLogger(ElasticSearchConsumer.class);
     @ConfigurationParameter(name = PARAM_URLS, description = "A list of URLs pointing to different nodes of the ElasticSearch cluster, e.g. http://localhost:9300/. Documents will be sent bulk-wise to the nodes in a round-robin fashion.")
     private String[] urls;
     @ConfigurationParameter(name = PARAM_INDEX_NAME, description = "The ElasticSearch index name to send the created documents to.")
     private String indexName;
-    @ConfigurationParameter(name = PARAM_TYPE, mandatory = false, description = "The index type the generated documents should have. The types are removed from ElasticSearch with version 7 and should omitted for ES >= 7.")
+    @ConfigurationParameter(name = PARAM_TYPE, mandatory = false, description = "The index type the generated documents should have. The types are removed from ElasticSearch with version 7 and should be omitted for ES >= 7.")
     private String type;
     @ConfigurationParameter(name = PARAM_BATCH_SIZE, mandatory = false, description = "The number of documents to be sent to ElasticSearch in a single batch. Defaults to 50.")
     private int batchSize;
+    @ConfigurationParameter(name = PARAM_DELETE_DOCS_BEFORE_INDEXING, mandatory = false, description = "Whether or not to delete documents with the docId of the UIMA CASes in ElasticSearch prior to indexing. This is useful when parts of the document are indexed whose IDs are not stable or that might change after document updates and would not just be overwritten when indexing anew. Defaults to false.")
+    private boolean deleteDocsBeforeIndexing;
+    @ConfigurationParameter(name = PARAM_DOC_ID_FIELD, mandatory = false, description = "Required when " + PARAM_DELETE_DOCS_BEFORE_INDEXING + " is set to true. This should be an existing index field that contains the document ID of each CAS. It is used to remove existing index documents related to the CAS document ID prior to indexing.")
+    private String docIdField;
 
     private List<String> bulkCommand;
+    private List<String> docIdsToDelete;
     private HttpPost[] indexPosts;
+    private HttpPost[] indexDeletes;
 
     private int urlIndex = 0;
 
@@ -62,6 +70,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         type = (String) getContext().getConfigParameterValue(PARAM_TYPE);
         batchSize = Optional.ofNullable((Integer) getContext().getConfigParameterValue(PARAM_BATCH_SIZE)).orElse(50);
         bulkCommand = new ArrayList<>(4000);
+        deleteDocsBeforeIndexing = (boolean) Optional.ofNullable(getContext().getConfigParameterValue(PARAM_DELETE_DOCS_BEFORE_INDEXING)).orElse(false);
+        docIdField = (String) getContext().getConfigParameterValue(PARAM_DOC_ID_FIELD);
 
         httpclient = HttpClientBuilder.create().build();
         if (urls != null) {
@@ -75,10 +85,24 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
             }
         }
 
+        if (deleteDocsBeforeIndexing) {
+            indexDeletes = new HttpPost[urls.length];
+            for (int i = 0; i < urls.length; i++) {
+                String url = urls[i];
+                if (null != url && url.endsWith("/_bulk"))
+                    url = url.replace("/_bulk/?", "") + "/" + indexName + "/" + "_delete_by_query";
+                indexDeletes[i] = new HttpPost(url);
+                indexDeletes[i].addHeader("Content-Type", "application/x-ndjson");
+            }
+            docIdsToDelete = new ArrayList<>();
+        }
+
         if (log.isInfoEnabled()) {
             log.info("{}: {}", PARAM_URLS, Arrays.toString(urls));
             log.info("{}: {}", PARAM_INDEX_NAME, indexName);
             log.info("{}: {}", PARAM_TYPE, type);
+            log.info("{}: {}", PARAM_DELETE_DOCS_BEFORE_INDEXING, deleteDocsBeforeIndexing);
+            log.info("{}: {}", PARAM_DOC_ID_FIELD, docIdField);
         }
     }
 
@@ -89,6 +113,10 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             w.start();
             Gson gson = new Gson();
 
+            if (deleteDocsBeforeIndexing) {
+                docIdsToDelete.add(JCoReTools.getDocId(aJCas));
+            }
+
             // This is the default case: For each CAS, create one document. This
             // document is populated with fields by field generators. The field
             // generator classes are delivered by the user.
@@ -157,6 +185,7 @@ public void customBatchProcessComplete() throws AnalysisEngineProcessException {
         super.batchProcessComplete();
         log.debug("Batch of {} documents is sent to ElasticSearch.", docNum);
         docNum = 0;
+        deleteDocuments();
         postBulkIndexAction();
     }
 
@@ -164,9 +193,61 @@ public void customBatchProcessComplete() throws AnalysisEngineProcessException {
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
         log.info("Collection complete.");
+        deleteDocuments();
         postBulkIndexAction();
     }
 
+    private void deleteDocuments() throws AnalysisEngineProcessException {
+        if (deleteDocsBeforeIndexing) {
+            // Post to all the ElasticSearch nodes in a round-robin fashion.
+            HttpPost indexDelete = indexDeletes[urlIndex];
+            urlIndex = (urlIndex + 1) % indexDeletes.length;
+            try {
+                int lastIndex = 0;
+                List<String> subList;
+                do {
+                    subList = docIdsToDelete.subList(lastIndex, Math.min(bulkCommand.size(), lastIndex + 1000));
+                    if (subList.isEmpty())
+                        continue;
+                    lastIndex += subList.size();
+                    log.debug("Delete {} documents in index {}.", subList.size() / 2, indexName);
+                    long time = System.currentTimeMillis();
+                    StringBuilder deleteQuery = new StringBuilder();
+                    deleteQuery.append("{\"query\":{\"terms\":{\"").append(docIdField).append("\":[");
+                    for (int i = 0; i < subList.size(); i++) {
+                        String docId = subList.get(i);
+                        deleteQuery.append("\"").append(docId).append("\"");
+                        if (i < subList.size() - 1)
+                            deleteQuery.append(",");
+                    }
+                    deleteQuery.append("]}}}");
+                    StringEntity deleteByQueryEntity = new StringEntity(deleteQuery.toString(), "UTF-8");
+                    indexDelete.setEntity(deleteByQueryEntity);
+                    HttpResponse response = httpclient.execute(indexDelete);
+                    int statusCode = response.getStatusLine().getStatusCode();
+                    HttpEntity responseEntity = response.getEntity();
+                    if (statusCode > 200) {
+                        log.error("The server responded with a non-OK status code: {}", statusCode);
+                        log.error("Response status line: {}", response.getStatusLine());
+                        log.error("Response body: {}", EntityUtils.toString(responseEntity));
+                        log.error("Delete-by-query command was: {}", deleteQuery);
+                    }
+                    EntityUtils.consume(responseEntity);
+                    time = System.currentTimeMillis() - time;
+                    log.debug("Sending took {}ms ({}s) and returned status code {}", time, time / 1000, statusCode);
+                } while (null != subList && !subList.isEmpty());
+            } catch (IOException e) {
+                log.error("Error when sending data to ElasticSearch:", e);
+                throw new AnalysisEngineProcessException(e);
+            } finally {
+                indexDelete.reset();
+                bulkCommand.clear();
+            }
+
+            docIdsToDelete.clear();
+        }
+    }
+
     private void postBulkIndexAction() throws AnalysisEngineProcessException {
         if (bulkCommand.isEmpty())
             return;
@@ -175,7 +256,7 @@ private void postBulkIndexAction() throws AnalysisEngineProcessException {
         urlIndex = (urlIndex + 1) % indexPosts.length;
         try {
             int lastIndex = 0;
-            List<String> subList = null;
+            List<String> subList;
             do {
                 subList = bulkCommand.subList(lastIndex, Math.min(bulkCommand.size(), lastIndex + 1000));
                 if (subList.isEmpty())
diff --git a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
index c780ee2f9..4e0e87a49 100644
--- a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
+++ b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
@@ -1,8 +1,10 @@
 package de.julielab.jcore.consumer.es;
 
+import de.julielab.java.utilities.IOStreamUtilities;
 import de.julielab.jcore.consumer.es.preanalyzed.Document;
 import de.julielab.jcore.consumer.es.preanalyzed.RawToken;
 import de.julielab.jcore.types.Header;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
@@ -18,9 +20,13 @@
 import org.testcontainers.junit.jupiter.Testcontainers;
 import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
 
+import java.io.BufferedWriter;
+import java.io.OutputStreamWriter;
+import java.net.HttpURLConnection;
 import java.net.URL;
 import java.time.Duration;
 import java.util.Map;
+import java.util.Random;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
@@ -31,7 +37,7 @@ public class ElasticSearchConsumerIT {
     private final static Logger log = LoggerFactory.getLogger(ElasticSearchConsumerIT.class);
     // in case we need to disable X-shield: https://stackoverflow.com/a/51172136/1314955
     @Container
-    public static GenericContainer es = new GenericContainer("docker.elastic.co/elasticsearch/elasticsearch:7.0.1")
+    public static GenericContainer es = new GenericContainer("docker.elastic.co/elasticsearch/elasticsearch:7.17.0")
             .withEnv("xpack.security.enabled", "false")
             .withEnv("discovery.type", "single-node")
             .withExposedPorts(9200)
@@ -63,6 +69,41 @@ public void testMinimal() throws Exception {
         assertEquals(jCas.getDocumentText(), ((Map) map.get("_source")).get("text"));
     }
 
+    @Test
+    public void testDeleteDocumentsBeforeIndexing() throws Exception {
+        final Random r = new Random();
+        final URL countUrl = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_count");
+        final HttpURLConnection urlConnection = (HttpURLConnection) countUrl.openConnection();
+        urlConnection.setRequestMethod("POST");
+        urlConnection.setDoOutput(true);
+        urlConnection.setRequestProperty("Content-Type", "application/json");
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-types");
+        final AnalysisEngine consumer = AnalysisEngineFactory.createEngine(ElasticSearchConsumer.class,
+                ElasticSearchConsumer.PARAM_INDEX_NAME, TEST_INDEX,
+                ElasticSearchConsumer.PARAM_URLS, "http://localhost:" + es.getMappedPort(9200),
+                ElasticSearchConsumer.PARAM_FIELD_GENERATORS, new String[]{"de.julielab.jcore.consumer.es.ElasticSearchConsumerIT$TestFieldGenerator"});
+        for (int i = 0; i < 10; i++) {
+            jCas.setDocumentText("Some text.");
+            final Header header = new Header(jCas);
+            // get some random ID; this allows documents to exist multiple times in the index
+            header.setDocId(String.valueOf(r.nextInt()));
+            header.addToIndexes();
+            consumer.process(jCas);
+            jCas.reset();
+        }
+        consumer.collectionProcessComplete();
+        Thread.sleep(3000);
+        try(BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(urlConnection.getOutputStream()))){
+            bw.write("{\"query\":{\"match_all\":{}}}");
+        }
+        System.out.println(IOStreamUtilities.getStringFromInputStream(urlConnection.getInputStream()));
+
+        final URL url = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_doc/987");
+        final ObjectMapper om = new ObjectMapper();
+        final Map<?, ?> map = om.readValue(url.openStream(), Map.class);
+        assertEquals(jCas.getDocumentText(), ((Map) map.get("_source")).get("text"));
+    }
+
     /**
      * This class is passed by name as parameter to the test consumer AE.
      */
@@ -74,6 +115,7 @@ public TestFieldGenerator(FilterRegistry filterRegistry) {
         @Override
         public Document addFields(JCas aJCas, Document doc) {
             doc.addField("text", new RawToken(aJCas.getDocumentText()));
+            doc.addField("docId", new RawToken(JCoReTools.getDocId(aJCas)));
             return doc;
         }
     }

From 7dcb5352a2010650473bca5539ba0559ca0e2e6b Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 24 May 2022 16:27:48 +0200
Subject: [PATCH 209/269] Add a `getAnnotationsBetween` method.

To find the tokens between two entities, for example.
---
 .../jcore/utility/JCoReAnnotationTools.java   | 39 +++++++++++++++++--
 .../utility/JCoReAnnotationToolsTest.java     | 29 ++++++++++++++
 2 files changed, 64 insertions(+), 4 deletions(-)

diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationTools.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationTools.java
index 04eb42a78..a580bcd94 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationTools.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationTools.java
@@ -18,6 +18,7 @@
  **/
 package de.julielab.jcore.utility;
 
+import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
 import org.apache.commons.lang3.Range;
 import org.apache.uima.cas.FSIterator;
 import org.apache.uima.jcas.JCas;
@@ -26,10 +27,9 @@
 
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 // import de.julielab.jcore.types.Annotation;
 
@@ -588,4 +588,35 @@ public static <T extends Annotation> T getLastOverlappingAnnotation(JCas aJCas,
         return null;
     }
 
+    /**
+     * <p>Determines and returns the annotations in an annotation sequence (e.g. tokens) whose offsets are between the two input annotations (e.g. some entities we want to get their token distance of).</p>
+     * @param a1 An annotation.
+     * @param a2 Another annotation.
+     * @param underlyingAnnotationIndex An overlap index to efficiently find the border the underlying annotations of the sequence to return.
+     * @return The annotations from <tt>underlyingAnnotationSequence</tt> between a1 and a2, excluding annotations overlapping either.
+     */
+    public static <T extends Annotation> List<T> getAnnotationsBetween(Annotation a1, Annotation a2, JCoReOverlapAnnotationIndex<T> underlyingAnnotationIndex) {
+        List<T> tokensInBetween = Collections.emptyList();
+        List<T> underlyingAnnotationSequence = underlyingAnnotationIndex.getBeginIndex();
+        Annotation firstAnnotation = a1.getBegin() <= a2.getBegin() ? a1 : a2;
+        Annotation secondAnnotation = a1.getBegin() <= a2.getBegin() ? a2 : a1;
+        final Optional<T> firstSequenceAnnotation = underlyingAnnotationIndex.search(firstAnnotation).stream().findFirst();
+        final Optional<T> secondSequenceAnnotation = underlyingAnnotationIndex.search(secondAnnotation).stream().findFirst();
+        if (firstSequenceAnnotation.isPresent() && secondSequenceAnnotation.isPresent()) {
+             int firstSequenceAnnotationIndex = Collections.binarySearch(underlyingAnnotationSequence, firstSequenceAnnotation.get(), Comparator.comparingInt(Annotation::getBegin));
+             int secondSequenceAnnotationIndex = Collections.binarySearch(underlyingAnnotationSequence, secondSequenceAnnotation.get(), Comparator.comparingInt(Annotation::getBegin));
+            if (firstSequenceAnnotationIndex != -1 && secondSequenceAnnotationIndex != -1) {
+                // move the first and second token outside of the spans of the input annotations
+                while(firstSequenceAnnotationIndex < secondSequenceAnnotationIndex && underlyingAnnotationSequence.get(firstSequenceAnnotationIndex).getBegin() < firstAnnotation.getEnd())
+                    ++firstSequenceAnnotationIndex;
+                while(secondSequenceAnnotationIndex > firstSequenceAnnotationIndex && underlyingAnnotationSequence.get(secondSequenceAnnotationIndex).getEnd() > secondAnnotation.getBegin())
+                    --secondSequenceAnnotationIndex;
+                if (firstSequenceAnnotationIndex != secondSequenceAnnotationIndex) {
+                    tokensInBetween = IntStream.rangeClosed(firstSequenceAnnotationIndex, secondSequenceAnnotationIndex).mapToObj(underlyingAnnotationSequence::get).collect(Collectors.toList());
+                }
+            }
+        }
+        return tokensInBetween;
+    }
+
 }
diff --git a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java
index 9a101452b..37e3ffbff 100644
--- a/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java
+++ b/jcore-utilities/src/test/java/de/julielab/jcore/utility/JCoReAnnotationToolsTest.java
@@ -18,6 +18,7 @@
 package de.julielab.jcore.utility;
 
 import de.julielab.jcore.types.*;
+import de.julielab.jcore.utility.index.JCoReOverlapAnnotationIndex;
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.cas.CAS;
 import org.apache.uima.cas.FSIterator;
@@ -33,6 +34,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.FileInputStream;
+import java.util.ArrayList;
 import java.util.List;
 
 import static org.junit.jupiter.api.Assertions.*;
@@ -470,4 +472,31 @@ public void testGetLastOverlappingAnnotation() throws Exception {
 		Token result = JCoReAnnotationTools.getLastOverlappingAnnotation(jcas, em, Token.class);
 		assertEquals(t4, result);
 	}
+
+	@Test
+	public void testGetAnnotationsBetween() throws Exception{
+		final JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types");
+		// create some token sequence; omit white spaces for simplicity
+		List<Token> tokenList = new ArrayList<>();
+		JCoReOverlapAnnotationIndex<Token> tokenIndex = new JCoReOverlapAnnotationIndex<>();
+		for (int i = 0; i < 100; i++) {
+			final Token token = new Token(jcas, i * 5, i * 5 + 5);
+			tokenList.add(token);
+			tokenIndex.index(token);
+		}
+		tokenIndex.freeze();
+		final List<Token> between1 = JCoReAnnotationTools.getAnnotationsBetween(new Annotation(jcas, 0, 2), new Annotation(jcas, 497, 500), tokenIndex);
+		assertEquals(98, between1.size());
+		// the same setup as above but with switched annotations
+		final List<Token> between2 = JCoReAnnotationTools.getAnnotationsBetween(new Annotation(jcas, 497, 500), new Annotation(jcas, 0, 2), tokenIndex);
+		assertEquals(98, between2.size());
+		// the input annotations overlap, there should be no output
+		final List<Token> between3 = JCoReAnnotationTools.getAnnotationsBetween(new Annotation(jcas, 1, 10), new Annotation(jcas, 0, 2), tokenIndex);
+		assertEquals(0, between3.size());
+		final List<Token> between4 = JCoReAnnotationTools.getAnnotationsBetween(new Annotation(jcas, 255, 260), new Annotation(jcas, 235, 240), tokenIndex);
+		assertEquals(3, between4.size());
+		// the annotations are out of the token span
+		final List<Token> between5 = JCoReAnnotationTools.getAnnotationsBetween(new Annotation(jcas, 1000, 1005), new Annotation(jcas, 600, 6005), tokenIndex);
+		assertEquals(0, between5.size());
+	}
 }

From 52bb3b9b6cc79b3c7f97cfefbb9ae8df870ba734 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 25 May 2022 12:31:05 +0200
Subject: [PATCH 210/269] Add a function to delete index documents before
 sending the current documents to the index.

This is useful when the index documents have IDs that are not the document ID itself. For example, when indexing entities, the index document IDs will be something like the document ID plus the entity index in the document. If those entity index documents have a field disclosing the original document ID, this field can be used to delete all index documents associated with that document first. If then the entity tagger is changed, for example, old, deprecated documents can be removed instead of staying in the index, e.g. when the new tagger recognizes less entities than the old one.
---
 .../es/AbstractCasToJsonConsumer.java         | 38 +++++----
 .../consumer/es/ElasticSearchConsumer.java    | 17 ++--
 .../consumer/es/ElasticSearchConsumerIT.java  | 85 +++++++++++++------
 3 files changed, 89 insertions(+), 51 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractCasToJsonConsumer.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractCasToJsonConsumer.java
index 7cd73ef1b..ecbde6219 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractCasToJsonConsumer.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/AbstractCasToJsonConsumer.java
@@ -124,25 +124,27 @@ protected Document convertCasToDocument(JCas aJCas) throws AnalysisEngineProcess
 			if (doc.isEmpty())
 				log.debug("Document for document with ID {} does not contain any non-empty fields.",
 						JCoReTools.getDocId(aJCas));
-			String docId = JCoReTools.getDocId(aJCas);
-			if (null != idField) {
-				IFieldValue idFieldValue = doc.get(idField);
-				if (idFieldValue instanceof RawToken) {
-					docId = String.valueOf(((RawToken) idFieldValue).token);
-				} else if (idFieldValue instanceof PreanalyzedFieldValue) {
-					PreanalyzedFieldValue preAnalyzedIdValue = (PreanalyzedFieldValue) idFieldValue;
-					docId = preAnalyzedIdValue.fieldString;
-				} else
-					throw new IllegalArgumentException("Class " + idFieldValue.getClass() + " for value of field "
-							+ idField + " is not supported as ID field value");
+			if (doc.getId() == null || doc.getId().isBlank()) {
+				String docId = JCoReTools.getDocId(aJCas);
+				if (null != idField) {
+					IFieldValue idFieldValue = doc.get(idField);
+					if (idFieldValue instanceof RawToken) {
+						docId = String.valueOf(((RawToken) idFieldValue).token);
+					} else if (idFieldValue instanceof PreanalyzedFieldValue) {
+						PreanalyzedFieldValue preAnalyzedIdValue = (PreanalyzedFieldValue) idFieldValue;
+						docId = preAnalyzedIdValue.fieldString;
+					} else
+						throw new IllegalArgumentException("Class " + idFieldValue.getClass() + " for value of field "
+								+ idField + " is not supported as ID field value");
+				}
+				if (null != idPrefix)
+					docId = idPrefix + docId;
+				if (docId == null)
+					throw new AnalysisEngineProcessException(new IllegalStateException(
+							"Could neither get a document ID from the generated document nor from the CAS directly. The generated document is: "
+									+ gson.toJson(doc)));
+				doc.setId(docId);
 			}
-			if (null != idPrefix)
-				docId = idPrefix + docId;
-			if (docId == null)
-				throw new AnalysisEngineProcessException(new IllegalStateException(
-						"Could neither get a document ID from the generated document nor from the CAS directly. The generated document is: "
-								+ gson.toJson(doc)));
-			doc.setId(docId);
 			return doc;
 		} catch (Exception e) {
 			log.error("Error with document ID {}.", JCoReTools.getDocId(aJCas));
diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java
index 8bef9330a..94d8ba622 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/ElasticSearchConsumer.java
@@ -73,6 +73,9 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         deleteDocsBeforeIndexing = (boolean) Optional.ofNullable(getContext().getConfigParameterValue(PARAM_DELETE_DOCS_BEFORE_INDEXING)).orElse(false);
         docIdField = (String) getContext().getConfigParameterValue(PARAM_DOC_ID_FIELD);
 
+        if (deleteDocsBeforeIndexing && docIdField == null)
+            throw new ResourceInitializationException(new IllegalArgumentException(PARAM_DELETE_DOCS_BEFORE_INDEXING + " is true but no " + PARAM_DOC_ID_FIELD + " was specified."));
+
         httpclient = HttpClientBuilder.create().build();
         if (urls != null) {
             indexPosts = new HttpPost[urls.length];
@@ -90,9 +93,11 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
             for (int i = 0; i < urls.length; i++) {
                 String url = urls[i];
                 if (null != url && url.endsWith("/_bulk"))
-                    url = url.replace("/_bulk/?", "") + "/" + indexName + "/" + "_delete_by_query";
+                    url = url.replace("/_bulk/?", "");
+                url += "/" + indexName + "/" + "_delete_by_query";
                 indexDeletes[i] = new HttpPost(url);
                 indexDeletes[i].addHeader("Content-Type", "application/x-ndjson");
+
             }
             docIdsToDelete = new ArrayList<>();
         }
@@ -206,11 +211,11 @@ private void deleteDocuments() throws AnalysisEngineProcessException {
                 int lastIndex = 0;
                 List<String> subList;
                 do {
-                    subList = docIdsToDelete.subList(lastIndex, Math.min(bulkCommand.size(), lastIndex + 1000));
+                    subList = docIdsToDelete.subList(lastIndex, Math.min(docIdsToDelete.size(), lastIndex + 1000));
                     if (subList.isEmpty())
                         continue;
                     lastIndex += subList.size();
-                    log.debug("Delete {} documents in index {}.", subList.size() / 2, indexName);
+                    log.debug("Delete {} documents in index {}.", subList.size(), indexName);
                     long time = System.currentTimeMillis();
                     StringBuilder deleteQuery = new StringBuilder();
                     deleteQuery.append("{\"query\":{\"terms\":{\"").append(docIdField).append("\":[");
@@ -241,10 +246,8 @@ private void deleteDocuments() throws AnalysisEngineProcessException {
                 throw new AnalysisEngineProcessException(e);
             } finally {
                 indexDelete.reset();
-                bulkCommand.clear();
+                docIdsToDelete.clear();
             }
-
-            docIdsToDelete.clear();
         }
     }
 
@@ -262,7 +265,7 @@ private void postBulkIndexAction() throws AnalysisEngineProcessException {
                 if (subList.isEmpty())
                     continue;
                 lastIndex += subList.size();
-                log.debug("Sending {} documents to index {}.", subList.size() / 2, indexName);
+                log.debug("Sending {} documents to index {}.", subList.size(), indexName);
                 long time = System.currentTimeMillis();
                 // The bulk format requires us to have a newline also after the
                 // last
diff --git a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
index 4e0e87a49..a3b6507c9 100644
--- a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
+++ b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
@@ -6,6 +6,7 @@
 import de.julielab.jcore.types.Header;
 import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
 import org.apache.uima.jcas.JCas;
@@ -21,12 +22,15 @@
 import org.testcontainers.shaded.com.fasterxml.jackson.databind.ObjectMapper;
 
 import java.io.BufferedWriter;
+import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.net.HttpURLConnection;
 import java.net.URL;
 import java.time.Duration;
 import java.util.Map;
-import java.util.Random;
+import java.util.function.Supplier;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
@@ -71,43 +75,70 @@ public void testMinimal() throws Exception {
 
     @Test
     public void testDeleteDocumentsBeforeIndexing() throws Exception {
-        final Random r = new Random();
-        final URL countUrl = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_count");
-        final HttpURLConnection urlConnection = (HttpURLConnection) countUrl.openConnection();
-        urlConnection.setRequestMethod("POST");
-        urlConnection.setDoOutput(true);
-        urlConnection.setRequestProperty("Content-Type", "application/json");
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-types");
         final AnalysisEngine consumer = AnalysisEngineFactory.createEngine(ElasticSearchConsumer.class,
                 ElasticSearchConsumer.PARAM_INDEX_NAME, TEST_INDEX,
                 ElasticSearchConsumer.PARAM_URLS, "http://localhost:" + es.getMappedPort(9200),
                 ElasticSearchConsumer.PARAM_FIELD_GENERATORS, new String[]{"de.julielab.jcore.consumer.es.ElasticSearchConsumerIT$TestFieldGenerator"});
-        for (int i = 0; i < 10; i++) {
-            jCas.setDocumentText("Some text.");
-            final Header header = new Header(jCas);
-            // get some random ID; this allows documents to exist multiple times in the index
-            header.setDocId(String.valueOf(r.nextInt()));
-            header.addToIndexes();
-            consumer.process(jCas);
-            jCas.reset();
-        }
-        consumer.collectionProcessComplete();
-        Thread.sleep(3000);
-        try(BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(urlConnection.getOutputStream()))){
-            bw.write("{\"query\":{\"match_all\":{}}}");
-        }
-        System.out.println(IOStreamUtilities.getStringFromInputStream(urlConnection.getInputStream()));
+        // The indexing code is put into a lambda so we don't have to repeat ourselves
+        Runnable doIndex = () -> {
+            try {
+//                for (int j = 0; j < 2; ++j) {
+                    for (int i = 0; i < 10; i++) {
+                        jCas.setDocumentText("Some text.");
+                        final Header header = new Header(jCas);
+                        header.setDocId(String.valueOf(i));
+                        header.addToIndexes();
+                        consumer.process(jCas);
+                        jCas.reset();
+                    }
+//                }
+                consumer.collectionProcessComplete();
+            } catch (AnalysisEngineProcessException e) {
+                throw new RuntimeException(e);
+            }
+        };
+        Supplier<Integer> getNumDocuments = () -> {
+            try {
+                Thread.sleep(3000);
+                final URL countUrl = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_count");
+                final HttpURLConnection urlConnection = (HttpURLConnection) countUrl.openConnection();
+                urlConnection.setRequestMethod("POST");
+                urlConnection.setDoOutput(true);
+                urlConnection.setRequestProperty("Content-Type", "application/json");
+                try (BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(urlConnection.getOutputStream()))) {
+                    bw.write("{\"query\":{\"match_all\":{}}}");
+                }
+                final String response = IOStreamUtilities.getStringFromInputStream(urlConnection.getInputStream());
+                final Matcher matcher = Pattern.compile("count\":([0-9]+)").matcher(response);
+                matcher.find();
+                return Integer.parseInt(matcher.group(1));
+            } catch (InterruptedException| IOException e) {
+                throw new RuntimeException(e);
+            }
+        };
 
-        final URL url = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_doc/987");
-        final ObjectMapper om = new ObjectMapper();
-        final Map<?, ?> map = om.readValue(url.openStream(), Map.class);
-        assertEquals(jCas.getDocumentText(), ((Map) map.get("_source")).get("text"));
+        doIndex.run();
+        doIndex.run();
+        // we expect 20 document although we have indexed the same documents twice; the reason is that the index
+        // document ID is set randomly to simulate the situation where we index individual entities or relations
+        // that have a document ID different from the main docId
+        assertEquals(20, getNumDocuments.get());
+
+        // now activate delete-before-index. After indexing anew, there should be only 10 documents in the index
+        consumer.setConfigParameterValue(ElasticSearchConsumer.PARAM_DELETE_DOCS_BEFORE_INDEXING, true);
+        consumer.setConfigParameterValue(ElasticSearchConsumer.PARAM_DOC_ID_FIELD, "docId");
+        consumer.reconfigure();
+        doIndex.run();
+        assertEquals(10, getNumDocuments.get());
     }
 
     /**
      * This class is passed by name as parameter to the test consumer AE.
      */
     public static class TestFieldGenerator extends FieldGenerator {
+        private int internalTestIdCounter = 0;
+
         public TestFieldGenerator(FilterRegistry filterRegistry) {
             super(filterRegistry);
         }
@@ -116,6 +147,8 @@ public TestFieldGenerator(FilterRegistry filterRegistry) {
         public Document addFields(JCas aJCas, Document doc) {
             doc.addField("text", new RawToken(aJCas.getDocumentText()));
             doc.addField("docId", new RawToken(JCoReTools.getDocId(aJCas)));
+            // some diverging index document ID; we use this to test if the delete-before-index function works
+            doc.setId("divergingid" + internalTestIdCounter++);
             return doc;
         }
     }

From 465e1356bf6e9a320d3f8479cc2b2946b735fa4e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 26 May 2022 10:10:23 +0200
Subject: [PATCH 211/269] Bump Flair version to 0.11.3.

While 0.6.1 should still work, there is a dependency glitch to transformers which causes a missing library 'sacremoses'.
---
 .github/workflows/maven.yml                   |  2 +-
 .../es/desc/jcore-elasticsearch-consumer.xml  | 27 ++++++++++++++-----
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
index 2c3de94b2..1d065c4e8 100644
--- a/.github/workflows/maven.yml
+++ b/.github/workflows/maven.yml
@@ -22,7 +22,7 @@ jobs:
       - name: Install python dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install flair==0.6.1
+          pip install flair==0.11.3
       - uses: actions/checkout@v2
       - name: Set up JDK 11
         uses: actions/setup-java@v2
diff --git a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml
index c2334321e..3b120a9d7 100644
--- a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml
+++ b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-elasticsearch-consumer.xml
@@ -1,11 +1,10 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
     <annotatorImplementationName>de.julielab.jcore.consumer.es.ElasticSearchConsumer</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCore ElasticSearch Consumer</name>
-        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>urls</name>
@@ -23,7 +22,7 @@
             </configurationParameter>
             <configurationParameter>
                 <name>type</name>
-                <description>The index type the generated documents should have. The types are removed from ElasticSearch with version 7 so this parameter is set to have the same value for all documents.</description>
+                <description>The index type the generated documents should have. The types are removed from ElasticSearch with version 7 and should be omitted for ES &gt;= 7.</description>
                 <type>String</type>
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
@@ -35,6 +34,20 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>deleteDocumentsBeforeIndexing</name>
+                <description>Whether or not to delete documents with the docId of the UIMA CASes in ElasticSearch prior to indexing. This is useful when parts of the document are indexed whose IDs are not stable or that might change after document updates and would not just be overwritten when indexing anew. Defaults to false.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>documentIdField</name>
+                <description>Required when deleteDocumentsBeforeIndexing is set to true. This should be an existing index field that contains the document ID of each CAS. It is used to remove existing index documents related to the CAS document ID prior to indexing.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
             <configurationParameter>
                 <name>FieldGenerators</name>
                 <description>An array of qualified Java class names. Each enumerated class must implement the FieldGenerator interface and is delivered by the user. These classes will be applied to the consumed CAS and populate Document instances with fields and thus determine the structure and content of the output documents. The field values are derived from CAS data. FieldGenerators always populate a single Document instance with fields. If multiple documents must be created for each CAS, refer to the DocumentGenerators parameter.</description>
@@ -71,10 +84,10 @@
                 <mandatory>false</mandatory>
             </configurationParameter>
         </configurationParameters>
-        <configurationParameterSettings />
-        <typeSystemDescription />
-        <fsIndexCollection />
-        <capabilities />
+        <configurationParameterSettings/>
+        <typeSystemDescription/>
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>

From 21e860ff825432f6862c279d70fe319a811a4a2a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sun, 19 Jun 2022 10:39:22 +0200
Subject: [PATCH 212/269] Resolves #137.

---
 jcore-annotation-adder-ae/README.md           |   4 +-
 .../FeatureBasedTSVFormat.java                | 102 ++++++++++++++++++
 .../ExternalTextAnnotation.java               |   9 ++
 .../FeatureBasedTSVFormatTest.java            |  27 +++++
 4 files changed, 140 insertions(+), 2 deletions(-)
 create mode 100644 jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java
 create mode 100644 jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java

diff --git a/jcore-annotation-adder-ae/README.md b/jcore-annotation-adder-ae/README.md
index cf0a558ff..c3e1f9fe0 100644
--- a/jcore-annotation-adder-ae/README.md
+++ b/jcore-annotation-adder-ae/README.md
@@ -28,11 +28,11 @@ For document class annotations, no offset mode is required, obviously. Whether t
 
 **3. External Resource Dependencies**
 
-This component requires an external resource given with the `AnnotationSource` key. This dependency definition is pre-configured in the provided default descriptor and must be added to point to the correct annotation source.
+This component requires an external resource given with the `AnnotationSource` key. This dependency definition is pre-configured in the provided default descriptor and must be adapted to point to the correct annotation source.
 
 The external dependency may currently be a file which is read completely into an in-memory map by the `de.julielab.jcore.ae.annotationadder.annotationsources.InMemoryFileTextAnnotationProvider` class for textual annotations with offsets or by the `de.julielab.jcore.ae.annotationadder.annotationsources.InMemoryFileDocumentClassAnnotationProvider` class for document classes. Both provider classes implement the required external resource interface `de.julielab.jcore.ae.annotationadder.annotationsources.AnnotationProvider`.
 
-Other approaches, that are possible easier on the resources - might be implemented if necessary.
+Other approaches that are possibly easier on the resources might be implemented if necessary.
 
 Currently, the external resource definition looks as follows:
 
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java
new file mode 100644
index 000000000..6b57a162a
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java
@@ -0,0 +1,102 @@
+package de.julielab.jcore.ae.annotationadder.annotationformat;
+
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class FeatureBasedTSVFormat implements AnnotationFormat<ExternalTextAnnotation> {
+    private final static Logger log = LoggerFactory.getLogger(FeatureBasedTSVFormat.class);
+    private String[] header;
+    private boolean withHeader;
+    private Integer uimaTypeIndex;
+    private List<Class<?>> columnDataTypes;
+
+    @Override
+    public ExternalTextAnnotation parse(String data) {
+        if (data == null || data.startsWith("#"))
+            return null;
+        final String[] record = data.split("\t");
+        if (record.length < 3)
+            throw new IllegalArgumentException("Expected at least 3 column format providing document ID, begin and end offset for the annotation but got " + record.length + " columns: " + data);
+        if (withHeader && header == null) {
+            header = record;
+            return null;
+        }
+        if (columnDataTypes == null)
+            columnDataTypes = new ArrayList<>(header.length);
+        if (uimaTypeIndex == null) {
+            uimaTypeIndex = -1;
+            for (int i = 0; i < header.length; i++) {
+                if (header[i].equals("uima_type"))
+                    uimaTypeIndex = i;
+            }
+            if (uimaTypeIndex == 0)
+                throw new IllegalArgumentException("Found the uima_type column at index 0. However, the first column is reserved for the document ID.");
+        }
+        if (columnDataTypes.isEmpty())
+            determineColumnDataTypes(record);
+        String docId = record[0];
+        String type = uimaTypeIndex >= 0 ? record[uimaTypeIndex] : null;
+        ExternalTextAnnotation externalTextAnnotation = new ExternalTextAnnotation(docId, 0, 0, type);
+        externalTextAnnotation.setPayloadFeatureValues(true);
+        for (int i = 1; i < Math.min(header.length, record.length); i++) {
+            String featureName = header[i];
+            String columnValue = record[i];
+            if (!featureName.equals("uima_type"))
+                externalTextAnnotation.addPayload(featureName, convertValueToFieldDataType(columnValue, i));
+        }
+
+        return externalTextAnnotation;
+    }
+
+    private Object convertValueToFieldDataType(String columnValue, int columnIndex) {
+        final Class<?> columnDataType = columnDataTypes.get(columnIndex);
+        if (columnDataType.equals(Integer.class))
+            return Integer.parseInt(columnValue);
+        else if (columnDataType.equals(Double.class))
+            return Double.parseDouble(columnValue);
+        else if (columnDataType.equals(Boolean.class))
+            return Boolean.parseBoolean(columnValue);
+        return columnValue.intern();
+    }
+
+    private void determineColumnDataTypes(String[] record) {
+        for (int i = 0; i < record.length; i++) {
+            String value = record[i];
+            try {
+                Integer.parseInt(value);
+                columnDataTypes.add(Integer.class);
+                continue;
+            } catch (NumberFormatException e) {
+                // ignore
+            }
+            try {
+                Double.parseDouble(value);
+                columnDataTypes.add(Double.class);
+                continue;
+            } catch (NumberFormatException e) {
+                // ignore
+            }
+            if (value.equalsIgnoreCase("false") || value.equalsIgnoreCase("true")) {
+                columnDataTypes.add(Boolean.class);
+                continue;
+            }
+            // no other type detected, this seems to be an actual String
+            columnDataTypes.add(String.class);
+        }
+        log.info("Identified the data types of columns {} as {}", header, columnDataTypes);
+    }
+
+    @Override
+    public void hasHeader(boolean withHeader) {
+        this.withHeader = withHeader;
+    }
+
+    @Override
+    public void setColumnNames(String[] header) {
+        this.header = header;
+    }
+}
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
index 7c1dd7c03..bd91fafa8 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
@@ -10,6 +10,7 @@ public class ExternalTextAnnotation implements TextAnnotation {
     private int start;
     private int end;
     private String uimaType;
+    private boolean payloadFeatureValues;
     private Map<String, Object> payload;
 
     public ExternalTextAnnotation(String documentId, int start, int end, String uimaType) {
@@ -65,4 +66,12 @@ public Object getPayload(String key) {
     public Collection<String> getPayloadKeys() {
         return payload != null ? payload.keySet() : Collections.emptySet();
     }
+
+    public boolean isPayloadFeatureValues() {
+        return payloadFeatureValues;
+    }
+
+    public void setPayloadFeatureValues(boolean payloadFeatureValues) {
+        this.payloadFeatureValues = payloadFeatureValues;
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java
new file mode 100644
index 000000000..ab216a64e
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java
@@ -0,0 +1,27 @@
+package de.julielab.jcore.ae.annotationadder.annotationformat;
+
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+class FeatureBasedTSVFormatTest {
+
+    @Test
+    void parse() {
+        FeatureBasedTSVFormat format = new FeatureBasedTSVFormat();
+        format.hasHeader(true);
+        // should be ignored
+        assertNull(format.parse("# comment"));
+        // should be stored as header but not return something
+        assertNull(format.parse("docId\tbegin\tend\tcomponentId\tuima_type\tspecificType"));
+        ExternalTextAnnotation extAnnotation = format.parse("123\t0\t5\tGoldAnnotation\tde.julielab.jcore.types.Gene\tprotein");
+        assertEquals("123", extAnnotation.getDocumentId());
+        assertEquals(0, extAnnotation.getStart());
+        assertEquals(0, extAnnotation.getEnd());
+        assertEquals("de.julielab.jcore.types.Gene", extAnnotation.getUimaType());
+        assertEquals("protein", extAnnotation.getPayload("specificType"));
+        assertEquals("GoldAnnotation", extAnnotation.getPayload("componentId"));
+    }
+}
\ No newline at end of file

From a8dda5dc2fe4c8d2677666dd9cbcff416ea78dd4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Sun, 19 Jun 2022 15:38:51 +0200
Subject: [PATCH 213/269] Resolves #138.

---
 jcore-annotation-adder-ae/pom.xml             |   5 +
 .../AnnotationAdderHelper.java                |  28 ++-
 .../annotationformat/AnnotationFormat.java    |  27 +++
 .../DocumentClassAnnotationFormat.java        |  21 +-
 .../FeatureBasedTSVFormat.java                |  46 ++--
 .../SimpleTSVEntityAnnotationFormat.java      |  43 +++-
 ...tyWithDocumentTextShaAnnotationFormat.java |  24 ++
 .../ExternalTextAnnotation.java               |  13 ++
 .../annotationsources/H2AnnotationSource.java | 219 ++++++++++++++++++
 .../H2TextAnnotationProvider.java             |  17 ++
 ...rce.java => InMemoryAnnotationSource.java} |   6 +-
 ...ryFileDocumentClassAnnotationProvider.java |   2 +-
 .../InMemoryFileTextAnnotationProvider.java   |  46 +---
 .../TextAnnotationProvider.java               |  58 +++++
 .../AnnotationAdderAnnotatorTest.java         |  33 ++-
 .../FeatureBasedTSVFormatTest.java            |   2 +-
 .../geneannotations_character_offsets.tsv     |   6 +-
 .../uima/GazetteerAnnotatorTest.java          |  25 ++
 18 files changed, 540 insertions(+), 81 deletions(-)
 create mode 100644 jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java
 create mode 100644 jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2TextAnnotationProvider.java
 rename jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/{FileAnnotationSource.java => InMemoryAnnotationSource.java} (86%)
 create mode 100644 jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/TextAnnotationProvider.java

diff --git a/jcore-annotation-adder-ae/pom.xml b/jcore-annotation-adder-ae/pom.xml
index dc4379f04..88318f459 100644
--- a/jcore-annotation-adder-ae/pom.xml
+++ b/jcore-annotation-adder-ae/pom.xml
@@ -53,6 +53,11 @@
             <artifactId>commons-codec</artifactId>
             <version>1.13</version>
         </dependency>
+        <dependency>
+            <groupId>com.h2database</groupId>
+            <artifactId>h2</artifactId>
+            <version>2.1.214</version>
+        </dependency>
     </dependencies>
 
     <name>JCoRe Annotation Adder</name>
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
index a3c87e749..219d4d286 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderHelper.java
@@ -7,6 +7,8 @@
 import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.cas.CASException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
@@ -146,6 +148,7 @@ public List<Token> createTokenList(JCas jCas, AnnotationAdderConfiguration confi
     }
 
     public void setAnnotationPayloadsToFeatures(Annotation annotation, ExternalTextAnnotation a) {
+        final TypeSystem ts = annotation.getCAS().getTypeSystem();
         Collection<String> keys = a.getPayloadKeys();
         if (!keys.isEmpty())
             featureSetters = new HashMap<>();
@@ -154,13 +157,36 @@ public void setAnnotationPayloadsToFeatures(Annotation annotation, ExternalTextA
                 Object value = a.getPayload(key);
                 Method setter = featureSetters.get(key);
                 if (setter == null) {
-                    setter = annotation.getClass().getMethod("set" + StringUtils.capitalize(key), value.getClass());
+                    Class<?> valueClass = convertUimaTypeToJavaType(ts.getType(annotation.getClass().getCanonicalName()).getFeatureByBaseName(key).getRange());
+                    setter = annotation.getClass().getMethod("set" + StringUtils.capitalize(key), valueClass);
                     featureSetters.put(key, setter);
                 }
+                // We do this because it is possible a string feature could have values there are actually numbers.
+                // The automatic type detection of some formats will read those as numbers so we might need to
+                // convert here.
+                if (setter.getParameterTypes()[0].equals(String.class))
+                    value = String.valueOf(value);
                 setter.invoke(annotation, value);
             }
         } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
             e.printStackTrace();
         }
     }
+
+    private Class<?> convertUimaTypeToJavaType(Type type) {
+        switch (type.getName()) {
+            case "uima.cas.String":
+                return String.class;
+            case "uima.cas.Integer":
+                return int.class;
+            case "uima.cas.Double":
+                return double.class;
+            case "uima.cas.Boolean":
+                return boolean.class;
+            case "uima.cas.Long":
+                return long.class;
+            default:
+                throw new IllegalArgumentException("Unsupported type for arbitrary feature-based input columns: " + type);
+        }
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
index a0c31a52f..d6d791256 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/AnnotationFormat.java
@@ -2,10 +2,37 @@
 
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationData;
 
+import java.util.List;
+
 public interface AnnotationFormat<T extends AnnotationData> {
     T parse(String data);
 
     void hasHeader(boolean withHeader);
 
+    String[] getHeader();
+
+    List<Class<?>> getColumnDataTypes();
+
     void setColumnNames(String[] header);
+
+    int getDocumentIdColumnIndex();
+
+    default Class<?> determineDataType(String value) {
+        Class<?> dataType = String.class;
+        try {
+            Integer.parseInt(value);
+            dataType = Integer.class;
+        } catch (NumberFormatException e) {
+            try {
+                Double.parseDouble(value);
+                dataType = Double.class;
+            } catch (NumberFormatException e2) {
+                if (value.equalsIgnoreCase("false") || value.equalsIgnoreCase("true")) {
+                    dataType = Boolean.class;
+                }
+            }
+        }
+        return dataType;
+    }
+
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
index bc24816e3..115d8de94 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/DocumentClassAnnotationFormat.java
@@ -2,6 +2,10 @@
 
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalDocumentClassAnnotation;
 
+import java.util.List;
+
+import static de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider.COL_DOC_ID;
+
 public class DocumentClassAnnotationFormat implements AnnotationFormat<ExternalDocumentClassAnnotation> {
     @Override
     public ExternalDocumentClassAnnotation parse(String data) {
@@ -14,7 +18,6 @@ public ExternalDocumentClassAnnotation parse(String data) {
         String docId = record[1];
         String documentClass = record[2].intern();
         String componentId = record[3].intern();
-        String type = null;
         return new ExternalDocumentClassAnnotation(docId, documentClass, confidence, componentId);
     }
 
@@ -23,8 +26,24 @@ public void hasHeader(boolean withHeader) {
         // does nothing right now
     }
 
+    @Override
+    public String[] getHeader() {
+        return new String[]{"confidence", COL_DOC_ID, "documentClass", "componentId"};
+    }
+
+    @Override
+    public List<Class<?>> getColumnDataTypes() {
+        return List.of(Double.class, String.class, String.class, String.class);
+    }
+
     @Override
     public void setColumnNames(String[] header) {
         // does nothing right now
     }
+
+    @Override
+    public int getDocumentIdColumnIndex() {
+        return 1;
+    }
+
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java
index 6b57a162a..1e83dc73d 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormat.java
@@ -7,6 +7,8 @@
 import java.util.ArrayList;
 import java.util.List;
 
+import static de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider.COL_UIMA_TYPE;
+
 public class FeatureBasedTSVFormat implements AnnotationFormat<ExternalTextAnnotation> {
     private final static Logger log = LoggerFactory.getLogger(FeatureBasedTSVFormat.class);
     private String[] header;
@@ -30,7 +32,7 @@ public ExternalTextAnnotation parse(String data) {
         if (uimaTypeIndex == null) {
             uimaTypeIndex = -1;
             for (int i = 0; i < header.length; i++) {
-                if (header[i].equals("uima_type"))
+                if (header[i].equals(COL_UIMA_TYPE))
                     uimaTypeIndex = i;
             }
             if (uimaTypeIndex == 0)
@@ -45,7 +47,7 @@ public ExternalTextAnnotation parse(String data) {
         for (int i = 1; i < Math.min(header.length, record.length); i++) {
             String featureName = header[i];
             String columnValue = record[i];
-            if (!featureName.equals("uima_type"))
+            if (!featureName.equals(COL_UIMA_TYPE))
                 externalTextAnnotation.addPayload(featureName, convertValueToFieldDataType(columnValue, i));
         }
 
@@ -66,37 +68,37 @@ else if (columnDataType.equals(Boolean.class))
     private void determineColumnDataTypes(String[] record) {
         for (int i = 0; i < record.length; i++) {
             String value = record[i];
-            try {
-                Integer.parseInt(value);
-                columnDataTypes.add(Integer.class);
-                continue;
-            } catch (NumberFormatException e) {
-                // ignore
-            }
-            try {
-                Double.parseDouble(value);
-                columnDataTypes.add(Double.class);
-                continue;
-            } catch (NumberFormatException e) {
-                // ignore
-            }
-            if (value.equalsIgnoreCase("false") || value.equalsIgnoreCase("true")) {
-                columnDataTypes.add(Boolean.class);
-                continue;
-            }
-            // no other type detected, this seems to be an actual String
-            columnDataTypes.add(String.class);
+            Class<?> dataType = determineDataType(value);
+            columnDataTypes.add(dataType);
         }
         log.info("Identified the data types of columns {} as {}", header, columnDataTypes);
     }
 
+
     @Override
     public void hasHeader(boolean withHeader) {
         this.withHeader = withHeader;
     }
 
+    @Override
+    public String[] getHeader() {
+        return header;
+    }
+
+    @Override
+    public List<Class<?>> getColumnDataTypes() {
+        if (columnDataTypes == null)
+            throw new IllegalStateException("The column data types are not yet set. This call must come after the first line of data has been read.");
+        return columnDataTypes;
+    }
+
     @Override
     public void setColumnNames(String[] header) {
         this.header = header;
     }
+
+    @Override
+    public int getDocumentIdColumnIndex() {
+        return 0;
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
index bee28da11..a47bc5d55 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityAnnotationFormat.java
@@ -2,13 +2,20 @@
 
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
 
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+
+import static de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider.*;
+
 public class SimpleTSVEntityAnnotationFormat implements AnnotationFormat<ExternalTextAnnotation> {
     private String[] header;
     private boolean withHeader;
+    private List<Class<?>> columnDataTypes;
 
     @Override
     public ExternalTextAnnotation parse(String data) {
-            if (data == null || data.startsWith("#"))
+        if (data == null || data.startsWith("#"))
             return null;
         final String[] record = data.split("\t");
         if (record.length < 3)
@@ -17,17 +24,30 @@ public ExternalTextAnnotation parse(String data) {
             header = record;
             return null;
         }
+        boolean columnDataTypesWasNull = columnDataTypes == null;
+        if (columnDataTypesWasNull) {
+            columnDataTypes = Stream.of(String.class, Integer.class, Integer.class).collect(Collectors.toList());
+        }
         String docId = record[0];
         int begin = Integer.parseInt(record[1]);
         int end = Integer.parseInt(record[2]);
         String type = null;
-        if (record.length > 3)
+        if (record.length > 3) {
             type = record[3];
+            if (columnDataTypesWasNull)
+                columnDataTypes.add(String.class);
+        }
+        if (header == null && record.length <= 3)
+            header = new String[]{COL_DOC_ID, COL_BEGIN, COL_END, COL_UIMA_TYPE};
         ExternalTextAnnotation externalTextAnnotation = new ExternalTextAnnotation(docId, begin, end, type);
         if (record.length > 4) {
             if (header != null) {
-                for (int i = 4; i < record.length; i++)
+                for (int i = 4; i < record.length; i++) {
                     externalTextAnnotation.addPayload(header[i], record[i]);
+                    if (columnDataTypesWasNull) {
+                        columnDataTypes.add(determineDataType(record[i]));
+                    }
+                }
             }
         }
         return externalTextAnnotation;
@@ -38,8 +58,25 @@ public void hasHeader(boolean withHeader) {
         this.withHeader = withHeader;
     }
 
+    @Override
+    public String[] getHeader() {
+        return header;
+    }
+
+    @Override
+    public List<Class<?>> getColumnDataTypes() {
+        if (columnDataTypes == null)
+            throw new IllegalStateException("The column data types are not yet set. This call must come after the first line of data has been read.");
+        return columnDataTypes;
+    }
+
     @Override
     public void setColumnNames(String[] header) {
         this.header = header;
     }
+
+    @Override
+    public int getDocumentIdColumnIndex() {
+        return 0;
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
index 0c1c10824..39bdf0016 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationformat/SimpleTSVEntityWithDocumentTextShaAnnotationFormat.java
@@ -2,7 +2,12 @@
 
 import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
 
+import java.util.List;
+
+import static de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider.*;
+
 public class SimpleTSVEntityWithDocumentTextShaAnnotationFormat implements AnnotationFormat<ExternalTextAnnotation> {
+    private List<Class<?>> columnDataTypes;
     @Override
     public ExternalTextAnnotation parse(String data) {
         if (data == null || data.startsWith("#"))
@@ -17,6 +22,8 @@ public ExternalTextAnnotation parse(String data) {
         String type = null;
         if (record.length > 4)
             type = record[4].intern();
+        if (columnDataTypes==null)
+            columnDataTypes = List.of(String.class, Integer.class, Integer.class, String.class, String.class);
         final ExternalTextAnnotation externalTextAnnotation = new ExternalTextAnnotation(docId, begin, end, type);
         externalTextAnnotation.addPayload("sha", sha);
         return externalTextAnnotation;
@@ -27,8 +34,25 @@ public void hasHeader(boolean withHeader) {
         // does nothing right now
     }
 
+    @Override
+    public String[] getHeader() {
+        return new String[]{COL_DOC_ID, COL_BEGIN, COL_END, "sha", COL_UIMA_TYPE};
+    }
+
+    @Override
+    public List<Class<?>> getColumnDataTypes() {
+        if (columnDataTypes == null)
+            throw new IllegalStateException("The column data types are not yet set. This call must come after the first line of data has been read.");
+        return columnDataTypes;
+    }
+
     @Override
     public void setColumnNames(String[] header) {
         // does nothing right now
     }
+
+    @Override
+    public int getDocumentIdColumnIndex() {
+        return 0;
+    }
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
index bd91fafa8..cd43296f0 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationrepresentations/ExternalTextAnnotation.java
@@ -5,6 +5,8 @@
 import java.util.HashMap;
 import java.util.Map;
 
+import static de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider.*;
+
 public class ExternalTextAnnotation implements TextAnnotation {
     private String documentId;
     private int start;
@@ -59,6 +61,17 @@ public void addPayload(String key, Object value) {
         payload.put(key, value);
     }
 
+    public Map<String, Object> getAllFieldValuesAsMap() {
+        final Map<String, Object> values = new HashMap<>();
+        values.put(COL_BEGIN, start);
+        values.put(COL_END, end);
+        values.put(COL_UIMA_TYPE, uimaType);
+        values.put(COL_DOC_ID, documentId);
+        if (payload != null)
+            values.putAll(payload);
+        return values;
+    }
+
     public Object getPayload(String key) {
         return payload != null ? payload.get(key) : null;
     }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java
new file mode 100644
index 000000000..b1e7fbf02
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java
@@ -0,0 +1,219 @@
+package de.julielab.jcore.ae.annotationadder.annotationsources;
+
+import de.julielab.java.utilities.UriUtilities;
+import de.julielab.jcore.ae.annotationadder.annotationformat.AnnotationFormat;
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationData;
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationList;
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalDocumentClassAnnotation;
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.NotImplementedException;
+import org.apache.uima.resource.DataResource;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.*;
+import java.util.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import static de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider.*;
+
+public class H2AnnotationSource<T extends AnnotationData> implements AnnotationSource<AnnotationList<T>> {
+    private final static Logger log = LoggerFactory.getLogger(H2AnnotationSource.class);
+    private AnnotationFormat<T> format;
+    private Path h2DbPath;
+    private Statement queryStmt;
+    private Class<?> annotationDataClass;
+
+    public H2AnnotationSource(AnnotationFormat<T> format) {
+        this.format = format;
+        if (format.getHeader() == null)
+            throw new IllegalArgumentException("To use the H2AnnotationSource, the input format must define the column headers. The employed format " + format + " does not specify them itself. Thus, the header must be specified in the component descriptor external resource definition.");
+        try {
+            Class.forName("org.h2.Driver");
+        } catch (ClassNotFoundException e) {
+            log.error("Could not load the h2 Driver through 'Class.forName(\"org.h2.Driver\").");
+            throw new IllegalStateException(e);
+        }
+    }
+
+    @Override
+    public void loadAnnotations(URI annotationUri) throws IOException {
+        final Path annotationFilePath = Path.of(annotationUri);
+        h2DbPath = Path.of(annotationFilePath + ".h2");
+        if (!Files.exists(h2DbPath) || Files.getLastModifiedTime(annotationFilePath).toMillis() < Files.getLastModifiedTime(h2DbPath).toMillis()) {
+            log.info("Source annotation file {} is newer than database file {}. Creating a new database.", annotationFilePath, h2DbPath);
+            Files.list(h2DbPath.getParent()).filter(p -> p.toString().startsWith(h2DbPath.toString())).forEach(p -> FileUtils.deleteQuietly(p.toFile()));
+            try (Connection conn = DriverManager.
+                    getConnection("jdbc:h2:" + h2DbPath, "sa", "")) {
+                conn.setAutoCommit(false);
+                PreparedStatement ps = null;
+                Map<String, Integer> columnIndexes = new HashMap<>();
+                try (BufferedReader br = UriUtilities.getReaderFromUri(annotationUri)) {
+                    final Iterator<T> iterator = br.lines().map(format::parse).filter(Objects::nonNull).iterator();
+                    boolean firstDataItem = true;
+                    int psSize = 0;
+                    while (iterator.hasNext()) {
+                        T annotationData = iterator.next();
+                        // We need to create the table after the retrieval of the first annotation item because the
+                        // format parser derive the data types from the data
+                        if (firstDataItem) {
+                            for (int i = 0; i < format.getHeader().length; i++) {
+                                if (format.getHeader()[i].equals("begin"))
+                                    format.getHeader()[i] = COL_BEGIN;
+                                else if (format.getHeader()[i].equals("end"))
+                                    format.getHeader()[i] = COL_END;
+                            }
+                            IntStream.range(0, format.getHeader().length).forEach(i -> columnIndexes.put(format.getHeader()[i], i));
+                            annotationDataClass = annotationData.getClass();
+                            createAnnotationTable(conn, annotationData);
+                            String insertionSql = "INSERT INTO annotations VALUES (" + IntStream.range(0, format.getHeader().length).mapToObj(i -> "?").collect(Collectors.joining(",")) + ")";
+                            ps = conn.prepareStatement(insertionSql);
+                            firstDataItem = false;
+                        }
+                        if (annotationData instanceof ExternalDocumentClassAnnotation)
+                            throw new NotImplementedException("ExternalDocumentClassAnnotation data is currently not supprted by the H2AnnotationSource.");
+                        ExternalTextAnnotation textAnnotation = (ExternalTextAnnotation) annotationData;
+                        final Map<String, Object> fieldValues = textAnnotation.getAllFieldValuesAsMap();
+                        for (String columnName : format.getHeader()) {
+                            ps.setObject(columnIndexes.get(columnName) + 1, fieldValues.get(columnName));
+                        }
+                        ps.addBatch();
+                        ++psSize;
+                        if (psSize == 50) {
+                            ps.executeBatch();
+                            psSize = 0;
+                        }
+                    }
+                    if (psSize > 0)
+                        ps.executeBatch();
+                }
+                if (log.isTraceEnabled()) {
+                    int numRows = getCount(conn, "SELECT count(*) FROM annotations");
+                    int numDocIds = getCount(conn, "SELECT count(DISTINCT docId) FROM annotations");
+                    log.trace("Loaded {} entity annotations for {} document IDs.", numRows, numDocIds);
+                }
+                conn.commit();
+            } catch (SQLException e) {
+                log.error("Could not create H2 database at {}", h2DbPath);
+                throw new IllegalStateException(e);
+            }
+        }
+    }
+
+    private int getCount(Connection conn, String sql) {
+        try {
+            final ResultSet rs = conn.createStatement().executeQuery(sql);
+            if (rs.next())
+                return rs.getInt(1);
+        } catch (SQLException e) {
+            log.error("Could not count rows via SQL query {}", sql, e);
+            throw new IllegalStateException(e);
+        }
+        return 0;
+    }
+
+    private void createAnnotationTable(Connection conn, T annotationData) throws SQLException {
+        final Statement stmt = conn.createStatement();
+        String tableCreationSql = getTableCreationSql(format.getHeader(), format.getColumnDataTypes(), annotationData);
+        try {
+            stmt.execute(tableCreationSql);
+        } catch (SQLException e) {
+            log.error("Could not create the annotation SQL table with command {}", tableCreationSql, e);
+            throw new IllegalStateException(e);
+        }
+        final String indexCreationSql = "CREATE INDEX annotations_doc_id_idx ON annotations (" + format.getHeader()[format.getDocumentIdColumnIndex()] + ")";
+        try {
+            stmt.execute(indexCreationSql);
+        } catch (SQLException e) {
+            log.error("Could not create index on document ID column which should be found at index {} of the header {} with SQL {}.", format.getDocumentIdColumnIndex(), format.getHeader(), indexCreationSql, e);
+            throw new IllegalStateException(e);
+        }
+    }
+
+    private String getTableCreationSql(String[] header, List<Class<?>> columnDataTypes, T annotationData) {
+        StringBuilder sb = new StringBuilder();
+        sb.append("CREATE TABLE annotations (");
+        for (int i = 0; i < header.length; i++) {
+            String columnName = header[i];
+            Class<?> dataType = columnDataTypes.get(i);
+            String dbDataType = getDbDataType(dataType);
+            sb.append(columnName).append(" ").append(dbDataType);
+            if (i < header.length - 1)
+                sb.append(",");
+        }
+        sb.append(")");
+        return sb.toString();
+    }
+
+    private String getDbDataType(Class<?> dataType) {
+        if (dataType.equals(Integer.class))
+            return "INT";
+        else if (dataType.equals(Double.class))
+            return "DOUBLE";
+        else if (dataType.equals(Boolean.class))
+            return "BOOL";
+        return "VARCHAR";
+    }
+
+    @Override
+    public void initialize(DataResource dataResource) throws IOException {
+        log.info("Loading entity annotations from {}", dataResource.getUri());
+        loadAnnotations(dataResource.getUri());
+    }
+
+    @Override
+    public AnnotationList<T> getAnnotations(String id) {
+        try {
+            if (queryStmt == null) {
+                Connection queryConn = DriverManager.
+                        getConnection("jdbc:h2:" + h2DbPath, "sa", "");
+                queryStmt = queryConn.createStatement();
+            }
+        } catch (SQLException e) {
+            log.error("Could not connect to database at {}", h2DbPath, e);
+            throw new IllegalStateException(e);
+        }
+        final String sql = "SELECT * FROM annotations WHERE docId='" + id + "'";
+        try {
+            final ResultSet rs = queryStmt.executeQuery(sql);
+            final AnnotationList<T> annotationList = new AnnotationList<>();
+            while (rs.next()) {
+                T textAnnotation = null;
+                if (annotationDataClass == null)
+                    throw new IllegalStateException("The annotation data class should have been recorded when data was read from file but it is null.");
+                try {
+                    if (annotationDataClass.equals(ExternalTextAnnotation.class))
+                        textAnnotation = (T) annotationDataClass.getConstructor(String.class, int.class, int.class, String.class).newInstance(rs.getString(COL_DOC_ID), rs.getInt(COL_BEGIN), rs.getInt(COL_END), rs.getString(COL_UIMA_TYPE));
+                    else
+                        throw new NotImplementedException("The annotation class " + annotationDataClass + " is currently not supported by the H2AnnotationSource.");
+                } catch (Exception e) {
+                    log.error("Could not create instance of annotation data class {}", annotationDataClass, e);
+                }
+                for (String columnName : format.getHeader()) {
+                        final Object value = rs.getObject(columnName);
+                    if (value != null && textAnnotation instanceof ExternalTextAnnotation && !columnName.equals(COL_UIMA_TYPE) && !columnName.equals(COL_DOC_ID)) {
+                        ExternalTextAnnotation a = (ExternalTextAnnotation) textAnnotation;
+                        String payLoadKey = columnName;
+                        if(payLoadKey.equals(COL_BEGIN))
+                            payLoadKey = "begin";
+                        else if (payLoadKey.equals(COL_END))
+                            payLoadKey = "end";
+                        a.addPayload(payLoadKey, value);
+                    }
+                }
+                annotationList.add(textAnnotation);
+            }
+            return annotationList;
+        } catch (SQLException e) {
+            log.error("Could not retrieve annotation values from the H2 database via SQL query '{}'", sql);
+            throw new IllegalStateException(e);
+        }
+    }
+}
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2TextAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2TextAnnotationProvider.java
new file mode 100644
index 000000000..a70c3af5f
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2TextAnnotationProvider.java
@@ -0,0 +1,17 @@
+package de.julielab.jcore.ae.annotationadder.annotationsources;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class H2TextAnnotationProvider extends TextAnnotationProvider {
+    private final static Logger log = LoggerFactory.getLogger(H2TextAnnotationProvider.class);
+    @Override
+    void initializeAnnotationSource() {
+        annotationSource = new H2AnnotationSource<>(format);
+    }
+
+    @Override
+    Logger getLogger() {
+        return log;
+    }
+}
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryAnnotationSource.java
similarity index 86%
rename from jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
rename to jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryAnnotationSource.java
index 845c42c95..f82929792 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/FileAnnotationSource.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryAnnotationSource.java
@@ -15,12 +15,12 @@
 import java.util.Objects;
 import java.util.stream.Collectors;
 
-public class FileAnnotationSource<T extends AnnotationData> implements AnnotationSource<AnnotationList<T>> {
-    private final static Logger log = LoggerFactory.getLogger(FileAnnotationSource.class);
+public class InMemoryAnnotationSource<T extends AnnotationData> implements AnnotationSource<AnnotationList<T>> {
+    private final static Logger log = LoggerFactory.getLogger(InMemoryAnnotationSource.class);
     private AnnotationFormat<T> format;
     private Map<String, AnnotationList<T>> entitiesByDocId;
 
-    public FileAnnotationSource(AnnotationFormat<T> format) {
+    public InMemoryAnnotationSource(AnnotationFormat<T> format) {
         this.format = format;
     }
 
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java
index 731f114ce..69e91f14a 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileDocumentClassAnnotationProvider.java
@@ -19,7 +19,7 @@ public AnnotationList<ExternalDocumentClassAnnotation> getAnnotations(String id)
     @Override
     public void load(DataResource dataResource) throws ResourceInitializationException {
         // This logic could be made configurable if required so in the future.
-        annotationSource = new FileAnnotationSource(new DocumentClassAnnotationFormat());
+        annotationSource = new InMemoryAnnotationSource(new DocumentClassAnnotationFormat());
         try {
             annotationSource.initialize(dataResource);
         } catch (IOException e) {
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
index ac89d5b1e..950069570 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/InMemoryFileTextAnnotationProvider.java
@@ -1,53 +1,17 @@
 package de.julielab.jcore.ae.annotationadder.annotationsources;
 
-import de.julielab.jcore.ae.annotationadder.annotationformat.AnnotationFormat;
-import de.julielab.jcore.ae.annotationadder.annotationformat.SimpleTSVEntityAnnotationFormat;
-import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationList;
-import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
-import org.apache.uima.resource.DataResource;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Optional;
-
-public class InMemoryFileTextAnnotationProvider implements AnnotationProvider<AnnotationList> {
-    public static final String PARAM_ANNOTATION_FORMAT = "AnnotationFormatClass";
-    public static final String PARAM_INPUT_HAS_HEADER = "InputHasHeader";
-    public static final String PARAM_COLUMN_NAMES = "ColumnNames";
+public class InMemoryFileTextAnnotationProvider extends TextAnnotationProvider {
     private final static Logger log = LoggerFactory.getLogger(InMemoryFileTextAnnotationProvider.class);
-    private AnnotationSource<AnnotationList> annotationSource;
-    private AnnotationFormat<ExternalTextAnnotation> format;
-
     @Override
-    public AnnotationList<ExternalTextAnnotation> getAnnotations(String id) {
-        return annotationSource.getAnnotations(id);
+    void initializeAnnotationSource() {
+        annotationSource = new InMemoryAnnotationSource<>(format);
     }
 
     @Override
-    public void load(DataResource dataResource) throws ResourceInitializationException {
-        final ConfigurationParameterSettings parameterSettings = dataResource.getMetaData().getConfigurationParameterSettings();
-        final String formatClassName = (String) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_ANNOTATION_FORMAT)).orElse(SimpleTSVEntityAnnotationFormat.class.getCanonicalName());
-        final boolean hasHeader = (boolean) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_INPUT_HAS_HEADER)).orElse(false);
-        final String[] columnNames = (String[])parameterSettings.getParameterValue(PARAM_COLUMN_NAMES);
-        try {
-            format = (AnnotationFormat<ExternalTextAnnotation>) Class.forName(formatClassName).getDeclaredConstructor().newInstance();
-            format.hasHeader(hasHeader);
-            format.setColumnNames(columnNames);
-        } catch (NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException | ClassNotFoundException e) {
-            log.error("Could not instantiate class {}", formatClassName);
-            throw new ResourceInitializationException(e);
-        }
-        annotationSource = new FileAnnotationSource(format);
-        try {
-            annotationSource.initialize(dataResource);
-        } catch (IOException e) {
-            throw new ResourceInitializationException(e);
-        }
+    Logger getLogger() {
+        return log;
     }
-
-
 }
diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/TextAnnotationProvider.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/TextAnnotationProvider.java
new file mode 100644
index 000000000..007ac0bae
--- /dev/null
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/TextAnnotationProvider.java
@@ -0,0 +1,58 @@
+package de.julielab.jcore.ae.annotationadder.annotationsources;
+
+import de.julielab.jcore.ae.annotationadder.annotationformat.AnnotationFormat;
+import de.julielab.jcore.ae.annotationadder.annotationformat.SimpleTSVEntityAnnotationFormat;
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.AnnotationList;
+import de.julielab.jcore.ae.annotationadder.annotationrepresentations.ExternalTextAnnotation;
+import org.apache.uima.resource.DataResource;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
+import org.slf4j.Logger;
+
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
+import java.util.Optional;
+
+public abstract class TextAnnotationProvider implements AnnotationProvider<AnnotationList> {
+    public static final String PARAM_ANNOTATION_FORMAT = "AnnotationFormatClass";
+    public static final String PARAM_INPUT_HAS_HEADER = "InputHasHeader";
+    public static final String PARAM_COLUMN_NAMES = "ColumnNames";
+    public static final String COL_DOC_ID = "docId";
+    public static final String COL_BEGIN = "beginOffset";
+    public static final String COL_END = "endOffset";
+    public static final String COL_UIMA_TYPE = "uimaType";
+    protected Logger log;
+    protected AnnotationSource<AnnotationList<ExternalTextAnnotation>> annotationSource;
+    protected AnnotationFormat<ExternalTextAnnotation> format;
+
+    @Override
+    public AnnotationList<ExternalTextAnnotation> getAnnotations(String id) {
+        return annotationSource.getAnnotations(id);
+    }
+
+    abstract void initializeAnnotationSource();
+
+    abstract Logger getLogger();
+
+    @Override
+    public void load(DataResource dataResource) throws ResourceInitializationException {
+        final ConfigurationParameterSettings parameterSettings = dataResource.getMetaData().getConfigurationParameterSettings();
+        final String formatClassName = (String) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_ANNOTATION_FORMAT)).orElse(SimpleTSVEntityAnnotationFormat.class.getCanonicalName());
+        final boolean hasHeader = (boolean) Optional.ofNullable(parameterSettings.getParameterValue(PARAM_INPUT_HAS_HEADER)).orElse(false);
+        final String[] columnNames = (String[])parameterSettings.getParameterValue(PARAM_COLUMN_NAMES);
+        try {
+            format = (AnnotationFormat<ExternalTextAnnotation>) Class.forName(formatClassName).getDeclaredConstructor().newInstance();
+            format.hasHeader(hasHeader);
+            format.setColumnNames(columnNames);
+        } catch (NoSuchMethodException | InvocationTargetException | InstantiationException | IllegalAccessException | ClassNotFoundException e) {
+            getLogger().error("Could not instantiate class {}", formatClassName);
+            throw new ResourceInitializationException(e);
+        }
+        initializeAnnotationSource();
+        try {
+            annotationSource.initialize(dataResource);
+        } catch (IOException e) {
+            throw new ResourceInitializationException(e);
+        }
+    }
+}
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
index 83f2aa54d..d0be14929 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/AnnotationAdderAnnotatorTest.java
@@ -1,9 +1,12 @@
 
 package de.julielab.jcore.ae.annotationadder;
 
+import de.julielab.jcore.ae.annotationadder.annotationsources.H2TextAnnotationProvider;
 import de.julielab.jcore.ae.annotationadder.annotationsources.InMemoryFileDocumentClassAnnotationProvider;
 import de.julielab.jcore.ae.annotationadder.annotationsources.InMemoryFileTextAnnotationProvider;
+import de.julielab.jcore.ae.annotationadder.annotationsources.TextAnnotationProvider;
 import de.julielab.jcore.types.*;
+import org.apache.commons.io.FileUtils;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.analysis_engine.AnalysisEngineDescription;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
@@ -12,10 +15,13 @@
 import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ExternalResourceDescription;
+import org.apache.uima.resource.SharedResourceObject;
 import org.assertj.core.data.Offset;
+import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Test;
 
 import java.io.File;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -25,10 +31,27 @@
  *
  */
 public class AnnotationAdderAnnotatorTest{
+
+    @AfterEach
+    public void cleanup() {
+        Path h2DbPath = Path.of("src", "test", "resources", "geneannotations_character_offsets.tsv.h2.mv.db");
+        FileUtils.deleteQuietly(h2DbPath.toFile());
+    }
+
     @Test
-    public void testCharacterOffsets() throws Exception {
+    public void testCharacterOffsetsInMemory() throws Exception {
+        testCharacterOffsets(InMemoryFileTextAnnotationProvider.class);
+    }
+
+    @Test
+    public void testCharacterOffsetsH2DB() throws Exception {
+        testCharacterOffsets(H2TextAnnotationProvider.class);
+    }
+
+
+    public void testCharacterOffsets(Class<? extends SharedResourceObject> annotationProviderClass) throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types", "de.julielab.jcore.types.jcore-semantics-biology-types", "de.julielab.jcore.types.jcore-document-meta-types");
-        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(InMemoryFileTextAnnotationProvider.class, new File("src/test/resources/geneannotations_character_offsets.tsv"));
+        final ExternalResourceDescription externalResourceDescription = ExternalResourceFactory.createExternalResourceDescription(annotationProviderClass, new File("src/test/resources/geneannotations_character_offsets.tsv"), TextAnnotationProvider.PARAM_COLUMN_NAMES, new String[]{"docId", "begin", "end", "uimaType", "confidence", "specificType"});
         final AnalysisEngine engine = AnalysisEngineFactory.createEngine(AnnotationAdderAnnotator.class, AnnotationAdderAnnotator.KEY_ANNOTATION_SOURCE, externalResourceDescription);
         // Test doc1 (two gene annotations)
         jCas.setDocumentText("BRCA PRKII are the genes of this sentence.");
@@ -132,12 +155,12 @@ public void testHeaderParameter() throws Exception {
 
         assertThat(genes.get(0).getBegin()).isEqualTo(0);
         assertThat(genes.get(0).getEnd()).isEqualTo(4);
-        assertThat(genes.get(0).getSpecificType()).isEqualTo("additionalColumn1");
+        assertThat(genes.get(0).getSpecificType()).isEqualTo("0.1234");
         assertThat(genes.get(0).getComponentId()).isEqualTo("additionalColumn2");
 
         assertThat(genes.get(1).getBegin()).isEqualTo(5);
         assertThat(genes.get(1).getEnd()).isEqualTo(10);
-        assertThat(genes.get(1).getSpecificType()).isEqualTo("additionalColumn1");
+        assertThat(genes.get(1).getSpecificType()).isEqualTo("0.1234");
         assertThat(genes.get(1).getComponentId()).isEqualTo("additionalColumn2");
 
         // Test doc2 (no gene annotations, there will be a warning on DEBUG level)
@@ -159,7 +182,7 @@ public void testHeaderParameter() throws Exception {
         final Gene gene = JCasUtil.selectSingle(jCas, Gene.class);
         assertThat(gene.getBegin()).isEqualTo(0);
         assertThat(gene.getEnd()).isEqualTo(6);
-        assertThat(gene.getSpecificType()).isEqualTo("additionalColumn1");
+        assertThat(gene.getSpecificType()).isEqualTo("0.1234");
         assertThat(gene.getComponentId()).isEqualTo("additionalColumn2");
     }
 
diff --git a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java
index ab216a64e..74e086220 100644
--- a/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java
+++ b/jcore-annotation-adder-ae/src/test/java/de/julielab/jcore/ae/annotationadder/annotationformat/FeatureBasedTSVFormatTest.java
@@ -15,7 +15,7 @@ void parse() {
         // should be ignored
         assertNull(format.parse("# comment"));
         // should be stored as header but not return something
-        assertNull(format.parse("docId\tbegin\tend\tcomponentId\tuima_type\tspecificType"));
+        assertNull(format.parse("docId\tbegin\tend\tcomponentId\tuimaType\tspecificType"));
         ExternalTextAnnotation extAnnotation = format.parse("123\t0\t5\tGoldAnnotation\tde.julielab.jcore.types.Gene\tprotein");
         assertEquals("123", extAnnotation.getDocumentId());
         assertEquals(0, extAnnotation.getStart());
diff --git a/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv b/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv
index 33babd2dc..1f1f04a44 100644
--- a/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv
+++ b/jcore-annotation-adder-ae/src/test/resources/geneannotations_character_offsets.tsv
@@ -1,3 +1,3 @@
-doc1	0	4	de.julielab.jcore.types.Gene	additionalColumn1	additionalColumn2
-doc1	5	10	de.julielab.jcore.types.Gene	additionalColumn1	additionalColumn2
-doc3	0	6	de.julielab.jcore.types.Gene	additionalColumn1	additionalColumn2
\ No newline at end of file
+doc1	0	4	de.julielab.jcore.types.Gene	0.1234	additionalColumn2
+doc1	5	10	de.julielab.jcore.types.Gene	0.1234	additionalColumn2
+doc3	0	6	de.julielab.jcore.types.Gene	0.1234	additionalColumn2
\ No newline at end of file
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 329a08d32..1afb8c870 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -728,4 +728,29 @@ public void testEncoding() {
 		System.out.println(s2.length());
 	}
 
+	@Test
+	public void testpolar() throws Exception {
+		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
+				ChunkerProviderImplAlt.class, new File("src/test/resources/polartest.properties"));
+		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
+				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
+
+		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
+				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
+				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
+
+		JCas jCas = annotator.newJCas();
+
+		// Warning: This text does not make sense ;-)
+		jCas.setDocumentText(
+				"Der allgemeinchirurgische Operationsplan sah für den heutigen Mittwoch im Operationssaal 4 insgesamt drei laparoskopische Cholezystektomien vor. Anästhesist Dr. Sven dachte sich: »Das übliche Programm.« Als Ausbildungsassistent im 2. Jahr war Dr. Sven häufiger im Laparoskopiesaal eingeteilt. Hier fühlte er sich sicher, denn er war mit dem Ablauf und den anästhesiologischen und chirurgischen Besonderheiten dieser Eingriffe vertraut. Er kannte inzwischen auch die einzelnen Operateure und ihre speziellen Vorlieben. »Zum Glück operiert heute nicht Oberarzt Dr. Harald«, schwirrte ihm durch den Kopf. Dr. Harald war ein äußerst versierter und schnell operierender Chirurg, dem aber die Wechselzeiten der Anästhesie nie kurz genug waren und der die Anästhesie deshalb stets unter Druck setzte. Heute war Dr. Veikko eingeteilt, ein junger chirurgischer Kollege, der erst vor kurzem seine Facharztprüfung bestanden hatte. Dr. Sven kannte ihn aus der Zeit seines eigenen chirurgischen PJ-Tertials. Einmal pro Woche spielten die beiden außerdem zusammen Fußball in einer Hobbymannschaft. Der erste Eingriff des Tages verlief problemlos. Nachdem Dr. Sven den Patienten im Aufwachraum abgegeben hatte, kehrte er in die Einleitung vom OP 4 zurück. Hier begrüßte er Frau Tränkner, eine 41 Jahre alte, übergewichtige Patientin. Dr. Sven kannte sie von seiner gestrigen präoperativen Visite. Er sah nochmals in ihre Akte und auf das Anästhesieprotokoll. Frau Tränkner wog 96 kg bei 169 cm Körpergröße (BMI 33,6 kg/m2). Sie hatte vor ca. 2 Wochen eine akute Cholezystitis gehabt, war aber jetzt beschwerdefrei. Als Begleiterkrankung hatte Frau Tränkner einen arteriellen Hypertonus, der mit Lisinopril und Metoprolol behandelt wurde, und eine Epilepsie. Seit einem Jahr nahm sie Carbamazepin und war jetzt anfallsfrei. Die Beurteilung des Atemweges erbrachte einen Mallampati-Score von 3 und einen Arné-Score von 13. Alle bestimmten Laborparameter waren unauffällig.");
+
+		annotator.process(jCas);
+
+		Set<String> extractedGenes = new HashSet<>();
+		for (var e : JCasUtil.select(jCas, EntityMention.class)) {
+			System.out.println(e.getCoveredText() + "\t" + e.getSpecificType());
+		}
+	}
+
 }

From 700490f6b7105f7357e9614e5da2f940116fd116 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 20 Jul 2022 17:17:28 +0200
Subject: [PATCH 214/269] Resolves #144.

---
 .../annotationsources/H2AnnotationSource.java | 18 +++++++---
 jcore-biosem-ae/pom.xml                       |  2 +-
 .../jcore/ae/checkpoint/DBCheckpointAE.java   |  2 ++
 .../checkpoint/DocumentReleaseCheckpoint.java |  1 +
 .../jcore/reader/db/DBMultiplier.java         |  9 ++++-
 .../jcore/reader/db/DBMultiplierReader.java   | 34 +++++++++----------
 .../jcore/reader/xmi/XmiDBMultiplier.java     | 14 +++++++-
 7 files changed, 56 insertions(+), 24 deletions(-)

diff --git a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java
index b1e7fbf02..326c7746c 100644
--- a/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java
+++ b/jcore-annotation-adder-ae/src/main/java/de/julielab/jcore/ae/annotationadder/annotationsources/H2AnnotationSource.java
@@ -13,6 +13,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.BufferedReader;
+import java.io.File;
 import java.io.IOException;
 import java.net.URI;
 import java.nio.file.Files;
@@ -45,8 +46,8 @@ public H2AnnotationSource(AnnotationFormat<T> format) {
 
     @Override
     public void loadAnnotations(URI annotationUri) throws IOException {
-        final Path annotationFilePath = Path.of(annotationUri);
-        h2DbPath = Path.of(annotationFilePath + ".h2");
+        final Path annotationFilePath = annotationUri.toString().contains("file:"+File.separator) ? Path.of(annotationUri) : Path.of(annotationUri.toString().replace("file:", ""));
+        h2DbPath = annotationFilePath.isAbsolute() ? Path.of(annotationFilePath + ".h2") : Path.of("."+ File.separator+annotationFilePath+".h2");
         if (!Files.exists(h2DbPath) || Files.getLastModifiedTime(annotationFilePath).toMillis() < Files.getLastModifiedTime(h2DbPath).toMillis()) {
             log.info("Source annotation file {} is newer than database file {}. Creating a new database.", annotationFilePath, h2DbPath);
             Files.list(h2DbPath.getParent()).filter(p -> p.toString().startsWith(h2DbPath.toString())).forEach(p -> FileUtils.deleteQuietly(p.toFile()));
@@ -59,7 +60,9 @@ public void loadAnnotations(URI annotationUri) throws IOException {
                     final Iterator<T> iterator = br.lines().map(format::parse).filter(Objects::nonNull).iterator();
                     boolean firstDataItem = true;
                     int psSize = 0;
+                    int linesRead = 0;
                     while (iterator.hasNext()) {
+                        ++linesRead;
                         T annotationData = iterator.next();
                         // We need to create the table after the retrieval of the first annotation item because the
                         // format parser derive the data types from the data
@@ -86,9 +89,16 @@ else if (format.getHeader()[i].equals("end"))
                         }
                         ps.addBatch();
                         ++psSize;
-                        if (psSize == 50) {
+                        if (psSize % 50 == 0) {
                             ps.executeBatch();
-                            psSize = 0;
+                        }
+                        if (psSize % 10000 == 0 && log.isTraceEnabled()) {
+                            int numRows = getCount(conn, "SELECT count(*) FROM annotations");
+                            int numDocIds = getCount(conn, "SELECT count(DISTINCT docId) FROM annotations");
+                            log.trace("Loaded {} entity annotations for {} document IDs.", numRows, numDocIds);
+                        }
+                        if (linesRead % 10000 == 0 && log.isTraceEnabled()) {
+                            log.trace("Read {} lines from input {}", linesRead, annotationUri);
                         }
                     }
                     if (psSize > 0)
diff --git a/jcore-biosem-ae/pom.xml b/jcore-biosem-ae/pom.xml
index 321563826..5e86a75ff 100644
--- a/jcore-biosem-ae/pom.xml
+++ b/jcore-biosem-ae/pom.xml
@@ -38,7 +38,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>biosem-event-extractor</artifactId>
-            <version>1.1.7</version>
+            <version>1.1.8-SNAPSHOT</version>
             <exclusions>
                 <exclusion>
                     <groupId>commons-cli</groupId>
diff --git a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
index cc9b29c8d..cf6f77e9e 100644
--- a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
+++ b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DBCheckpointAE.java
@@ -136,6 +136,7 @@ private void customBatchProcessingComplete() throws AnalysisEngineProcessExcepti
      */
     @Override
     public void process(final JCas aJCas) throws AnalysisEngineProcessException {
+        log.trace("Processing jCas instance " + aJCas);
         DocumentId documentId;
         try {
             final DBProcessingMetaData dbProcessingMetaData = JCasUtil.selectSingle(aJCas, DBProcessingMetaData.class);
@@ -206,6 +207,7 @@ private void setLastComponent(CoStoSysConnection conn, String
         if (markIsProcessed) {
             log.debug("Marking {} documents to having been processed by component \"{}\".", processedDocumentIds.size(), componentDbName);
             log.debug("SQL: {}", sqlMarkIsProcessed);
+            log.trace("Marking the following document IDS as having been processed: {}", processedDocumentIds);
             updateSubsetTable(conn, processedDocumentIds, sqlMarkIsProcessed);
         }
         try {
diff --git a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
index fd40fa5e1..994063406 100644
--- a/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
+++ b/jcore-db-checkpoint-ae/src/main/java/de/julielab/jcore/ae/checkpoint/DocumentReleaseCheckpoint.java
@@ -111,6 +111,7 @@ public Set<DocumentId> getReleasedDocumentIds() {
             log.trace("The following {} components are registered for document release: {}", getNumberOfRegisteredComponents(), registeredComponents);
             log.trace("Released document counts: {}", this.releasedDocuments);
             returnedIds = this.releasedDocuments.keySet().stream().filter(k -> this.releasedDocuments.get(k).containsAll(this.registeredComponents)).collect(Collectors.toSet());
+            log.trace("Final Document IDs to release: {}", returnedIds);
             // Remove the completely released documents from the pool of potentially not yet completely released documents.
             returnedIds.forEach(id -> this.releasedDocuments.remove(id));
         }
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
index b52c111c5..2dcc1e0d9 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplier.java
@@ -15,10 +15,14 @@
 import org.apache.uima.jcas.cas.FSArray;
 import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.FileNotFoundException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
+import java.util.stream.Collectors;
 
 /**
  * A multiplier retrieving feature structures of type of {@link RowBatch} in its {@link #process(JCas)} method.
@@ -37,7 +41,7 @@
 @OperationalProperties(outputsNewCases = true)
 @TypeCapability(inputs = {"de.julielab.jcore.types.casmultiplier.RowBatch"})
 public abstract class DBMultiplier extends JCasMultiplier_ImplBase {
-
+private final static Logger log = LoggerFactory.getLogger(DBMultiplier.class);
     protected DataBaseConnector dbc;
     protected DBCIterator<byte[][]> documentDataIterator;
     protected String[] tables;
@@ -104,6 +108,9 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             StringArray primaryKey = (StringArray) identifiers.get(i);
             documentIdsForQuery.add(primaryKey.toArray());
         }
+        if (log.isTraceEnabled()) {
+            log.trace("Received document IDs: {}", documentIdsForQuery.stream().map(o -> Arrays.stream(o).map(Object::toString).collect(Collectors.joining(","))).collect(Collectors.joining(" ; ")));
+        }
         documentDataIterator = dbc.retrieveColumnsByTableSchema(documentIdsForQuery,
                 tables,
                 schemaNames);
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
index 992c64a00..c41bfe4e0 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
@@ -67,10 +67,11 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
 
     @Override
     public void getNext(JCas jCas) throws CollectionException, IOException {
+        log.trace("jCas instance: " + jCas);
         log.trace("Requesting next batch of document IDs from the database.");
         List<Object[]> idList = getNextDocumentIdBatch();
         if (idList.isEmpty())
-            throw new CollectionException(new IllegalStateException("There are no documents to read in the database. Please call hasNext() to check if there is more data to read."));
+            throw new CollectionException(new IllegalStateException("There are no documents to read in the database. Please call hasNext() to check if there is more data to read. Retriever: " + retriever));
         log.trace("Received a list of {} ID from the database.", idList.size());
         RowBatch rowbatch = new RowBatch(jCas);
         FSArray ids = new FSArray(jCas, idList.size());
@@ -171,7 +172,7 @@ private List<Object[]> getNextFromSubset() {
 
         // When this method is called for the first time, no retriever thread
         // will yet exist. Initialize it.
-        if (retriever == null) {
+        if (retriever == null || !fetchIdsProactively) {
             retriever = new DBMultiplierReader.RetrievingThread();
         }
         idList = retriever.getDocumentIds();
@@ -213,14 +214,18 @@ public void close() throws IOException {
      */
     protected class RetrievingThread extends Thread {
         private List<Object[]> ids;
+        private long timestamp = System.currentTimeMillis();
 
         public RetrievingThread() {
             // Only fetch ID batches in advance when the parameter is set to
             // true.
             if (fetchIdsProactively) {
-                log.debug("Fetching ID batches in a background thread.");
+                log.debug("[{}] Fetching ID batches in a background thread.", timestamp);
                 setName(DBMultiplierReader.class.getSimpleName() + " RetrievingThread (" + getName() + ")");
                 start();
+            } else {
+                log.debug("[{}] Fetching new documents (without employing a background thread).", timestamp);
+                run();
             }
         }
 
@@ -234,14 +239,14 @@ public void run() {
             int limit = Math.min(batchSize, totalDocumentCount - numberFetchedDocIDs);
             try {
                 try (CoStoSysConnection ignored = dbc.obtainOrReserveConnection()) {
-                    log.trace("Using connection {} to retrieveAndMark", ignored.getConnection());
+                    log.trace("[{}] Using connection {} to retrieveAndMark", timestamp, ignored.getConnection());
                     ids = dbc.retrieveAndMark(tableName, getClass().getSimpleName(), hostName, pid, limit, selectionOrder);
                     if (log.isTraceEnabled()) {
-                        log.trace("Retrieved the following IDs from the database: {}", ids.stream().map(Arrays::toString).collect(Collectors.joining(", ")));
+                        log.trace("[{}] Retrieved the following IDs from the database: {}", timestamp, ids.stream().map(Arrays::toString).collect(Collectors.joining(", ")));
                     }
                 }
                 numberFetchedDocIDs += ids.size();
-                log.debug("Retrieved {} document IDs to fetch from the database.", ids.size());
+                log.debug("[{}] Retrieved {} document IDs to fetch from the database.", timestamp, ids.size());
             } catch (TableSchemaMismatchException e) {
                 log.error("Table schema mismatch: The active table schema {} specified in the CoStoSys configuration" +
                                 " file {} does not match the columns in the subset table {}: {}", dbc.getActiveTableSchema(),
@@ -254,18 +259,13 @@ public void run() {
         }
 
         public List<Object[]> getDocumentIds() {
-            // If we don't use this as a background thread, we have to get the
-            // IDs now in a sequential manner.
-            if (!fetchIdsProactively) {
-                // Use run as we don't have a use for real threads anyway.
-                log.debug("Fetching new documents (without employing a background thread).");
-                run();
-            }
             try {
-                // If this is a background thread started with start(): Wait for
-                // the IDs to be retrieved, i.e. that run() ends.
-                log.debug("Waiting for the background thread to finish fetching documents to return them.");
-                join();
+                if (fetchIdsProactively) {// If this is a background thread started with start(): Wait for
+                    // the IDs to be retrieved, i.e. that run() ends.
+                    log.debug("[{}] Waiting for the background thread to finish fetching documents to return them.", timestamp);
+                    join();
+                }
+                log.debug("[{}] Delivering {} document IDs", timestamp, ids.size());
                 return ids;
             } catch (InterruptedException e) {
                 log.error("Background ID fetching thread was interrupted", e);
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
index cb0306216..ba2f35eb9 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
@@ -46,6 +46,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 
     @Override
     public void process(JCas aJCas) throws AnalysisEngineProcessException {
+        log.trace("Incoming jCas instance: " + aJCas);
         boolean initDone = super.initialized;
         RowBatch rowBatch = null;
         if (!initDone) {
@@ -94,6 +95,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
             jCas.release();
             throw new AnalysisEngineProcessException(throwable);
         }
+        log.trace("Outgoing multiplier jCas instance: " + jCas);
         return jCas;
     }
 
@@ -102,7 +104,17 @@ private void populateCas(JCas jCas) throws AnalysisEngineProcessException {
             throw new AnalysisEngineProcessException(new IllegalStateException("Initialization of the component was not finished. See previous errors to learn the reason. Cannot continue."));
         try {
             final byte[][] data = documentDataIterator.next();
-            log.trace("Populating CAS with {}", casPopulator);
+            if (log.isTraceEnabled()) {
+                List<String> l = new ArrayList<>();
+                for (int i = 1; i < data.length; i++) {
+                    if (data[i] ==  null)
+                        continue;
+                    int length = data[i].length;
+                    double lengthInMb = (length/1024d)/1024d;
+                    l.add("col"+i+":"+lengthInMb + "MB");
+                }
+                log.trace("Populating CAS for document ID {} with column data of sizes {}", new String(data[0]), String.join(",", l));
+            }
             if (data != null)
                 casPopulator.populateCas(data, jCas);
         } catch (CasPopulationException e) {

From 3281b3047580b9f9863aebc3a87ed3c8330f25c5 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 21 Jul 2022 08:32:06 +0200
Subject: [PATCH 215/269] Remove a gazetteer test that malfunctioned.

And that was not meant to be a proper unit test anyway.
---
 .../uima/GazetteerAnnotatorTest.java          | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 1afb8c870..2671605dc 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -727,30 +727,4 @@ public void testEncoding() {
 		System.out.println(s2.getBytes(UTF_8).length);
 		System.out.println(s2.length());
 	}
-
-	@Test
-	public void testpolar() throws Exception {
-		ExternalResourceDescription extDesc = ExternalResourceFactory.createExternalResourceDescription(
-				ChunkerProviderImplAlt.class, new File("src/test/resources/polartest.properties"));
-		TypeSystemDescription tsDesc = TypeSystemDescriptionFactory
-				.createTypeSystemDescription("de.julielab.jcore.types.jcore-semantics-mention-types");
-
-		AnalysisEngine annotator = AnalysisEngineFactory.createEngine(GazetteerAnnotator.class, tsDesc,
-				GazetteerAnnotator.PARAM_OUTPUT_TYPE, "de.julielab.jcore.types.EntityMention",
-				GazetteerAnnotator.CHUNKER_RESOURCE_NAME, extDesc);
-
-		JCas jCas = annotator.newJCas();
-
-		// Warning: This text does not make sense ;-)
-		jCas.setDocumentText(
-				"Der allgemeinchirurgische Operationsplan sah für den heutigen Mittwoch im Operationssaal 4 insgesamt drei laparoskopische Cholezystektomien vor. Anästhesist Dr. Sven dachte sich: »Das übliche Programm.« Als Ausbildungsassistent im 2. Jahr war Dr. Sven häufiger im Laparoskopiesaal eingeteilt. Hier fühlte er sich sicher, denn er war mit dem Ablauf und den anästhesiologischen und chirurgischen Besonderheiten dieser Eingriffe vertraut. Er kannte inzwischen auch die einzelnen Operateure und ihre speziellen Vorlieben. »Zum Glück operiert heute nicht Oberarzt Dr. Harald«, schwirrte ihm durch den Kopf. Dr. Harald war ein äußerst versierter und schnell operierender Chirurg, dem aber die Wechselzeiten der Anästhesie nie kurz genug waren und der die Anästhesie deshalb stets unter Druck setzte. Heute war Dr. Veikko eingeteilt, ein junger chirurgischer Kollege, der erst vor kurzem seine Facharztprüfung bestanden hatte. Dr. Sven kannte ihn aus der Zeit seines eigenen chirurgischen PJ-Tertials. Einmal pro Woche spielten die beiden außerdem zusammen Fußball in einer Hobbymannschaft. Der erste Eingriff des Tages verlief problemlos. Nachdem Dr. Sven den Patienten im Aufwachraum abgegeben hatte, kehrte er in die Einleitung vom OP 4 zurück. Hier begrüßte er Frau Tränkner, eine 41 Jahre alte, übergewichtige Patientin. Dr. Sven kannte sie von seiner gestrigen präoperativen Visite. Er sah nochmals in ihre Akte und auf das Anästhesieprotokoll. Frau Tränkner wog 96 kg bei 169 cm Körpergröße (BMI 33,6 kg/m2). Sie hatte vor ca. 2 Wochen eine akute Cholezystitis gehabt, war aber jetzt beschwerdefrei. Als Begleiterkrankung hatte Frau Tränkner einen arteriellen Hypertonus, der mit Lisinopril und Metoprolol behandelt wurde, und eine Epilepsie. Seit einem Jahr nahm sie Carbamazepin und war jetzt anfallsfrei. Die Beurteilung des Atemweges erbrachte einen Mallampati-Score von 3 und einen Arné-Score von 13. Alle bestimmten Laborparameter waren unauffällig.");
-
-		annotator.process(jCas);
-
-		Set<String> extractedGenes = new HashSet<>();
-		for (var e : JCasUtil.select(jCas, EntityMention.class)) {
-			System.out.println(e.getCoveredText() + "\t" + e.getSpecificType());
-		}
-	}
-
 }

From 3fe2999bf8ab58f9b0c2bfc49a55c0a72842dba9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 25 Jul 2022 12:02:31 +0200
Subject: [PATCH 216/269] Resolves #145.

---
 jcore-nlmgene-reader/BioC.dtd                 | 158 ++++++++
 jcore-nlmgene-reader/LICENSE                  |  26 ++
 jcore-nlmgene-reader/README.md                |  34 ++
 jcore-nlmgene-reader/pom.xml                  |  64 ++++
 .../jcore/reader/nlmgene/NLMGeneReader.java   | 170 +++++++++
 .../jcore/reader/nlmgene/desc/PLACEHOLDER     |   4 +
 .../nlmgene/desc/jcore-nlmgene-reader.xml     |  43 +++
 .../reader/nlmgene/NLMGeneReaderTest.java     |  56 +++
 .../test/resources/input/12461077.BioC.XML    |   3 +
 pom.xml                                       | 348 ++++++++++++------
 10 files changed, 791 insertions(+), 115 deletions(-)
 create mode 100644 jcore-nlmgene-reader/BioC.dtd
 create mode 100644 jcore-nlmgene-reader/LICENSE
 create mode 100644 jcore-nlmgene-reader/README.md
 create mode 100644 jcore-nlmgene-reader/pom.xml
 create mode 100644 jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
 create mode 100644 jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/PLACEHOLDER
 create mode 100644 jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
 create mode 100644 jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java
 create mode 100644 jcore-nlmgene-reader/src/test/resources/input/12461077.BioC.XML

diff --git a/jcore-nlmgene-reader/BioC.dtd b/jcore-nlmgene-reader/BioC.dtd
new file mode 100644
index 000000000..8bd0d55ca
--- /dev/null
+++ b/jcore-nlmgene-reader/BioC.dtd
@@ -0,0 +1,158 @@
+<!-- BioC.dtd -->
+
+        <!--
+
+            BioC is designed to allow programs that process text and
+            annotations on that text to easily share data and work
+            together. This DTD describes how that data is represented in XML
+            files.
+
+            Some believe XML is easily read by humans and that should be
+            supported by clearly formatting the elements. In the long run,
+            this is distracting. While the only meaningful spaces are in text
+            elements and the other spaces can be ignored, current tools add no
+            additional space.  Formatters and editors may be used to make the
+            XML file appear more readable.
+
+            The possible variety of annotations that one might want to produce
+            or use is nearly countless. There is no guarantee that these are
+            organized in the nice nested structure required for XML
+            elements. Even if they were, it would be nice to easily ignore
+            unwanted annotations.  So annotations are recorded in a stand off
+            manner, external to the annotated text. The exceptions are
+            passages and sentences because of their fundamental place in text.
+
+            The text is expected to be encoded in Unicode, specifically
+            UTF-8. This is one of the encodings required to be implemented by
+            XML tools, is portable between big-endian and little-endian
+            machines and is a superset of 7-bit ASCII. Code points beyond 127
+            may be expressed directly in UTF-8 or indirectly using numeric
+            entities.  Since many tools today still only directly process
+            ASCII characters, conversion should be available and
+            standardized.  Offsets should be in 8 bit code units (bytes) for
+            easier processing by naive programs.
+
+            collection:  Group of documents, usually from a larger corpus. If
+            a group of documents is from several corpora, use several
+            collections.
+
+            source:  Name of the source corpus from which the documents were selected
+
+            date:  Date documents extracted from original source. Can be as
+            simple as yyyymmdd or an ISO timestamp.
+
+            key: Separate file describing the infons used and any other useful
+            information about the data in the file. For example, if a file
+            includes part-of-speech tags, this file should describe the set of
+            part-of-speech tags used.
+
+            infon: key-value pairs. Can record essentially arbitrary
+            information. "type" will be a particular common key in the major
+            sub elements below. For PubMed references, passage "type" might
+            signal "title" or "abstract". For annotations, it might indicate
+            "noun phrase", "gene", or "disease". In the programming language
+            data structures, infons are typically represented as a map from a
+            string to a string.  This means keys should be unique within each
+            parent element.
+
+            document: A document in the collection. A single, complete
+            stand-alone document as described by its parent source.
+
+            id:  Typically, the id of the document in the parent
+            source. Should at least be unique in the collection.
+
+            passage: One portion of the document.  In the sample collection of
+            PubMed documents, each document has a title and frequently an
+            abstract. Structured abstracts could have additional passages. For
+            a full text document, passages could be sections such as
+            Introduction, Materials and Methods, or Conclusion. Another option
+            would be paragraphs. Passages impose a linear structure on the
+            document. Further structure in the document can be described by
+            infon values.
+
+            offset: Where the passage occurs in the parent document. Depending
+            on the source corpus, this might be a very relevant number.  They
+            should be sequential and identify a passage's position in the
+            document.  Since the sample PubMed collection is extracted from an
+            XML file, literal offsets have little value. The title is given an
+            offset of zero, while the abstract is assumed to begin after the
+            title and one space.
+
+            text: The original text of the passage.
+
+            sentence:  One sentence of the passage.
+
+            offset: A document offset to where the sentence begins in the
+            passage. This value is the sum of the passage offset and the local
+            offset within the passage.
+
+            text: The original text of the sentence.
+
+            annotation:  Stand-off annotation
+
+            id: Used to refer to this annotation in relations. Should be
+            unique at whatever level relations at appear. If relations appear
+            at the sentence level, annotation ids need to be unique within
+            each sentence. Similarly, if relations appear at the passage
+            level, annotation ids need to be unique within each passage.
+
+            location: Location of the annotated text. Multiple locations
+            indicate a multi-span annotation.
+
+            offset: Document offset to where the annotated text begins in
+            the passage or sentence. The value is the sum of the passage or
+            sentence offset and the local offset within the passage or
+            sentence.
+
+            length: Length of the annotated text. While unlikely, this could
+            be zero to describe an annotation that belongs between two
+            characters.
+
+            text:  Typically the annotated text.
+
+            relation: Relation between multiple annotations and / or other
+            relations. Relations are allowed to appear at several levels
+            (document, passage, and sentence). Typically they will all appear
+            at one level, the level at which they are determined.
+            Significantly different types of relations might appear at
+            different levels.
+
+            id: Used to refer to this relation in other relations. This id
+            needs to be unique at whatever level relations appear. (See
+            discussion of annotation ids.)
+
+            refid: Id of an annotation or an other relation.
+
+            role: Describes how the referenced annotattion or other relation
+            participates in the current relation. Has a default value so it
+            can be left out if there is no meaningful value.
+
+        -->
+
+        <!ELEMENT collection ( source, date, key, infon*, document+ ) >
+        <!ELEMENT source (#PCDATA)>
+        <!ELEMENT date (#PCDATA)>
+        <!ELEMENT key (#PCDATA)>
+        <!ELEMENT infon (#PCDATA)>
+        <!ATTLIST infon key CDATA #REQUIRED >
+
+        <!ELEMENT document ( id, infon*, passage+, relation* ) >
+        <!ELEMENT id (#PCDATA)>
+
+        <!ELEMENT passage ( infon*, offset, ( ( text?, annotation* ) | sentence* ), relation* ) >
+        <!ELEMENT offset (#PCDATA)>
+        <!ELEMENT text (#PCDATA)>
+
+        <!ELEMENT sentence ( infon*, offset, text?, annotation*, relation* ) >
+
+        <!ELEMENT annotation ( infon*, location*, text ) >
+        <!ATTLIST annotation id CDATA #IMPLIED >
+        <!ELEMENT location EMPTY>
+        <!ATTLIST location offset CDATA #REQUIRED >
+        <!ATTLIST location length CDATA #REQUIRED >
+
+        <!ELEMENT relation ( infon*, node* ) >
+        <!ATTLIST relation id CDATA #IMPLIED >
+        <!ELEMENT node EMPTY>
+        <!ATTLIST node refid CDATA #REQUIRED >
+        <!ATTLIST node role CDATA "" >
diff --git a/jcore-nlmgene-reader/LICENSE b/jcore-nlmgene-reader/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-nlmgene-reader/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-nlmgene-reader/README.md b/jcore-nlmgene-reader/README.md
new file mode 100644
index 000000000..2cacbc00a
--- /dev/null
+++ b/jcore-nlmgene-reader/README.md
@@ -0,0 +1,34 @@
+# JCoRe Component Skeleton
+`Text that describes the component in brevity...`
+
+**Descriptor Path**:
+```
+de.julielab.jcore.{reader, ae, consumer}.NAME.desc.ARTIFACT-NAME
+```
+
+`More thorough description`
+`Are there any requirements or dependencies for this component?`
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-nlmgene-reader/pom.xml b/jcore-nlmgene-reader/pom.xml
new file mode 100644
index 000000000..ba1cf2294
--- /dev/null
+++ b/jcore-nlmgene-reader/pom.xml
@@ -0,0 +1,64 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-nlmgene-reader</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <version>2.6.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.assertj</groupId>
+            <artifactId>assertj-core</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.pengyifan.bioc</groupId>
+            <artifactId>pengyifan-bioc</artifactId>
+            <version>1.0.3</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe NLM-Gene Reader</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-nlmgene-reader</url>
+    <description>Collection reader for the BioC format of the NLM-Gene corpus.</description>
+    <licenses>
+        <license>
+            <name>BSD 2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
+</project>
diff --git a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
new file mode 100644
index 000000000..36bcb0c1a
--- /dev/null
+++ b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
@@ -0,0 +1,170 @@
+package de.julielab.jcore.reader.nlmgene;
+
+import com.pengyifan.bioc.BioCAnnotation;
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.BioCDocument;
+import com.pengyifan.bioc.BioCPassage;
+import com.pengyifan.bioc.io.BioCCollectionReader;
+import de.julielab.jcore.types.Gene;
+import de.julielab.jcore.types.ResourceEntry;
+import de.julielab.jcore.types.Title;
+import de.julielab.jcore.types.pubmed.AbstractText;
+import de.julielab.jcore.types.pubmed.Header;
+import org.apache.uima.UimaContext;
+import org.apache.uima.collection.CollectionException;
+import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.cas.FSArray;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.Progress;
+import org.apache.uima.util.ProgressImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Iterator;
+import java.util.Optional;
+
+@ResourceMetaData(name = "JCoRe NLM-Gene Reader", description = "Collection reader for the BioC format of the NLM-Gene corpus.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.Gene", "de.julielab.jcore.types.ResourceEntry"})
+public class NLMGeneReader extends JCasCollectionReader_ImplBase {
+
+    public static final String PARAM_INPUT_DIR = "InputDirectory";
+    private final static Logger log = LoggerFactory.getLogger(NLMGeneReader.class);
+    @ConfigurationParameter(name = PARAM_INPUT_DIR, description = "Path to the directory that contains the BioC XML files of the NLM-Gene corpus.")
+    private String inputDir;
+    private Iterator<Path> corpusFileIterator;
+    private int numRead;
+
+    /**
+     * This method is called a single time by the framework at component
+     * creation. Here, descriptor parameters are read and initial setup is done.
+     */
+    @Override
+    public void initialize(UimaContext context) throws ResourceInitializationException {
+        super.initialize(context);
+        inputDir = (String) context.getConfigParameterValue(PARAM_INPUT_DIR);
+        try {
+            corpusFileIterator = readInputFiles(inputDir);
+        } catch (IOException e) {
+            log.error("Could not read NLM-Gene corpus input files.", e);
+            throw new ResourceInitializationException(e);
+        }
+        numRead = 0;
+    }
+
+    private Iterator<Path> readInputFiles(String inputDir) throws IOException {
+        Path inputPath = Path.of(inputDir);
+        return Files.list(inputPath).filter(p -> p.toString().toLowerCase().endsWith(".xml") || p.toString().toLowerCase().endsWith(".xml.gz")).iterator();
+    }
+
+    /**
+     * This method is called for each document going through the component. This
+     * is where the actual work happens.
+     */
+    @Override
+    public void getNext(JCas jCas) throws CollectionException {
+        final Path nextFile = corpusFileIterator.next();
+        try {
+            final BioCCollectionReader reader = new BioCCollectionReader(nextFile);
+            final BioCCollection collection = reader.readCollection();
+            if (collection.getDocmentCount() > 1)
+                throw new IllegalArgumentException("A single document per BioC collection is expected but the collection of file " + nextFile + " has " + collection.getDocmentCount() + " documents. This case is not supported.");
+            final BioCDocument document = collection.getDocument(0);
+
+            handleHeader(jCas, document);
+            StringBuilder textBuilder = new StringBuilder();
+            for (BioCPassage p : document.getPassages()) {
+                int previousTextLength = textBuilder.length();
+                textBuilder.append(p.getText().get());
+                handlePassageStructureType(jCas, textBuilder, p, previousTextLength);
+                handleAnnotation(jCas, document, p, textBuilder);
+                textBuilder.append(System.getProperty("line.separator"));
+            }
+
+
+            jCas.setDocumentText(textBuilder.toString());
+        } catch (XMLStreamException | IOException e) {
+            log.error("Could not read NLM-Gene corpus file {}", nextFile, e);
+            throw new CollectionException(e);
+        }
+    }
+
+    private void handleHeader(JCas jCas, BioCDocument document) {
+        final Header h = new Header(jCas);
+        h.setDocId(document.getID());
+        h.setComponentId(getClass().getSimpleName());
+        h.setSource("NLM-Gene");
+        h.addToIndexes();
+    }
+
+    private void handleAnnotation(JCas jCas, BioCDocument document, BioCPassage p, StringBuilder textBuilder) {
+        for (BioCAnnotation a : p.getAnnotations()) {
+            final Gene g = new Gene(jCas, a.getTotalLocation().getOffset(), a.getTotalLocation().getOffset() + a.getTotalLocation().getLength());
+            final Optional<String> typeInfon = a.getInfon("type");
+            final Optional<String> codeInfon = a.getInfon("code");
+            handleErrors(document, p, a, g, typeInfon, textBuilder);
+            handleGeneId(jCas, a, g);
+            handleSpecificType(g, typeInfon, codeInfon);
+            g.addToIndexes();
+        }
+    }
+
+    private void handleSpecificType(Gene g, Optional<String> typeInfon, Optional<String> codeInfon) {
+        g.setSpecificType(typeInfon.get());
+        if (codeInfon.isPresent())
+            g.setSpecificType(typeInfon.get() + "-" + codeInfon.get());
+    }
+
+    private void handleErrors(BioCDocument document, BioCPassage p, BioCAnnotation a, Gene g, Optional<String> typeInfon, StringBuilder textBuilder) {
+        if (typeInfon.isPresent() && !(typeInfon.get().equals("Gene") || typeInfon.get().equals("GENERIF")))
+            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " was neither of type Gene nor GENERIF.");
+        if (!typeInfon.isPresent())
+            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " does not specify a type.");
+        if (!textBuilder.substring(g.getBegin(), g.getEnd()).equals(a.getText().get()))
+            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " has the covered text " + textBuilder.substring(g.getBegin(), g.getEnd()) + " but should have the text " + a.getText().get() + " according to the BioC XML information.");
+    }
+
+    private void handleGeneId(JCas jCas, BioCAnnotation a, Gene g) {
+        final Optional<String> ncbiGeneId = a.getInfon("NCBI Gene identifier");
+        if (ncbiGeneId.isPresent()) {
+            final ResourceEntry re = new ResourceEntry(jCas, g.getBegin(), g.getEnd());
+            re.setEntryId(ncbiGeneId.get());
+            re.setComponentId(getClass().getSimpleName());
+            final FSArray resourceEntryList = new FSArray(jCas, 1);
+            resourceEntryList.set(0, re);
+            g.setResourceEntryList(resourceEntryList);
+        }
+    }
+
+    private void handlePassageStructureType(JCas jCas, StringBuilder textBuilder, BioCPassage p, int previousTextLength) {
+        final Optional<String> typeInfon = p.getInfon("type");
+        if (typeInfon.isPresent() && typeInfon.get().equals("title")) {
+            final Title t = new Title(jCas, previousTextLength, textBuilder.length());
+            t.setTitleType("document");
+            t.setComponentId(getClass().getSimpleName());
+            t.addToIndexes();
+        } else if (typeInfon.isPresent() && typeInfon.get().equals("abstract")) {
+            final AbstractText abstractText = new AbstractText(jCas, previousTextLength, textBuilder.length());
+            abstractText.setComponentId(getClass().getSimpleName());
+            abstractText.addToIndexes();
+        }
+    }
+
+    @Override
+    public Progress[] getProgress() {
+        return new Progress[]{new ProgressImpl(numRead, 0, "documents")};
+    }
+
+    @Override
+    public boolean hasNext() {
+        return corpusFileIterator.hasNext();
+    }
+
+}
diff --git a/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/PLACEHOLDER b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/PLACEHOLDER
new file mode 100644
index 000000000..e4b0b196a
--- /dev/null
+++ b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/PLACEHOLDER
@@ -0,0 +1,4 @@
+The actual descriptor must be created by UIMA fit.
+For this purpose, use UIMAfit annotations to annotate the reader component class.
+Then employ the jcore-descriptor-creator's main method to build the descriptor from the reader class.
+The jcore-descriptor-creator is already on the classpath as a Maven dependency.
diff --git a/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
new file mode 100644
index 000000000..c4f2d5028
--- /dev/null
+++ b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
@@ -0,0 +1,43 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <implementationName>de.julielab.jcore.reader.nlmgene.NLMGeneReader</implementationName>
+  <processingResourceMetaData>
+    <name>JCoRe NLM-Gene Reader</name>
+    <description>Collection reader for the BioC format of the NLM-Gene corpus.</description>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>InputDirectory</name>
+        <description>Path to the directory that contains the BioC XML files of the NLM-Gene corpus.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings/>
+    <typeSystemDescription>
+      <imports>
+        <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+      </imports>
+    </typeSystemDescription>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs>
+          <type>de.julielab.jcore.types.Gene</type>
+          <type>de.julielab.jcore.types.ResourceEntry</type>
+        </outputs>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+      <outputsNewCASes>true</outputsNewCASes>
+    </operationalProperties>
+  </processingResourceMetaData>
+</collectionReaderDescription>
\ No newline at end of file
diff --git a/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java b/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java
new file mode 100644
index 000000000..d21b35292
--- /dev/null
+++ b/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java
@@ -0,0 +1,56 @@
+
+package de.julielab.jcore.reader.nlmgene;
+
+import de.julielab.jcore.types.Gene;
+import de.julielab.jcore.types.ResourceEntry;
+import de.julielab.jcore.types.Title;
+import de.julielab.jcore.types.pubmed.AbstractText;
+import de.julielab.jcore.types.pubmed.Header;
+import org.apache.uima.collection.CollectionReader;
+import org.apache.uima.fit.factory.CollectionReaderFactory;
+import org.apache.uima.fit.factory.JCasFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.assertj.core.api.Assertions.assertThat;
+/**
+ * Unit tests for jcore-nlmgene-reader.
+ * @author 
+ *
+ */
+public class NLMGeneReaderTest{
+
+    private final static Logger log = LoggerFactory.getLogger(NLMGeneReaderTest.class);
+
+    @Test
+    public void testReader() throws Exception {
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        final CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.reader.nlmgene.desc.jcore-nlmgene-reader", NLMGeneReader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString());
+        assertThat(reader.hasNext()).isTrue();
+        reader.getNext(jCas.getCas());
+        assertThat(reader.hasNext()).isFalse();
+        final Header header = JCasUtil.selectSingle(jCas, Header.class);
+        assertThat(header.getDocId()).isEqualTo("12461077");
+        final Title title = JCasUtil.selectSingle(jCas, Title.class);
+        assertThat(title).extracting(Title::getBegin, Title::getEnd).isEqualTo(List.of(0, 151));
+        final AbstractText abstractText = JCasUtil.selectSingle(jCas, AbstractText.class);
+        assertThat(abstractText).extracting(AbstractText::getBegin, AbstractText::getEnd).isEqualTo(List.of(152, 2168));
+        final List<Gene> genes = new ArrayList<>(JCasUtil.select(jCas, Gene.class));
+        assertThat(genes).hasSize(39);
+        final Gene firstGene = genes.get(0);
+        assertThat(firstGene).extracting(Gene::getCoveredText).isEqualTo("ICSBP");
+        assertThat(firstGene.getResourceEntryList()).isNotNull().isNotEmpty();
+        assertThat(firstGene.getResourceEntryList(0)).extracting(ResourceEntry::getEntryId).isEqualTo("15900");
+
+        final Gene secondGene = genes.get(9);
+        assertThat(secondGene).extracting(Gene::getCoveredText).isEqualTo("CD11c");
+        assertThat(secondGene.getResourceEntryList(0)).extracting(ResourceEntry::getEntryId).isEqualTo("16411");
+    }
+}
diff --git a/jcore-nlmgene-reader/src/test/resources/input/12461077.BioC.XML b/jcore-nlmgene-reader/src/test/resources/input/12461077.BioC.XML
new file mode 100644
index 000000000..5347ff6f6
--- /dev/null
+++ b/jcore-nlmgene-reader/src/test/resources/input/12461077.BioC.XML
@@ -0,0 +1,3 @@
+<?xml version='1.0' encoding='UTF-8'?><!DOCTYPE collection SYSTEM "BioC.dtd"><collection><source>PubTator</source><date>2020-12-04</date><key>BioC.key</key>
+<document><id>12461077</id><passage><infon key="type">title</infon><offset>0</offset><text>ICSBP is essential for the development of mouse type I interferon-producing cells and for the generation and activation of CD8alpha(+) dendritic cells.</text><annotation id="0"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="0" length="5"/><text>ICSBP</text></annotation><annotation id="1"><infon key="code">333</infon><infon key="NCBI Gene identifier">15977</infon><infon key="type">Gene</infon><location offset="48" length="17"/><text>type I interferon</text></annotation><annotation id="2"><infon key="NCBI Gene identifier">12525</infon><infon key="type">Gene</infon><location offset="123" length="8"/><text>CD8alpha</text></annotation></passage><passage><infon key="type">abstract</infon><offset>152</offset><text>Interferon (IFN) consensus sequence-binding protein (ICSBP) is a transcription factor playing a critical role in the regulation of lineage commitment, especially in myeloid cell differentiation. In this study, we have characterized the phenotype and activation pattern of subsets of dendritic cells (DCs) in ICSBP(-/-) mice. Remarkably, the recently identified mouse IFN-producing cells (mIPCs) were absent in all lymphoid organs from ICSBP(-/-) mice, as revealed by lack of CD11c(low)B220(+)Ly6C(+)CD11b(-) cells. In parallel, CD11c(+) cells isolated from ICSBP(-/-) spleens were unable to produce type I IFNs in response to viral stimulation. ICSBP(-/-) mice also displayed a marked reduction of the DC subset expressing the CD8alpha marker (CD8alpha(+) DCs) in spleen, lymph nodes, and thymus. Moreover, ICSBP(-/-) CD8alpha(+) DCs exhibited a markedly impaired phenotype when compared with WT DCs. They expressed very low levels of costimulatory molecules (intercellular adhesion molecule [ICAM]-1, CD40, CD80, CD86) and of the T cell area-homing chemokine receptor CCR7, whereas they showed higher levels of CCR2 and CCR6, as revealed by reverse transcription PCR. In addition, these cells were unable to undergo full phenotypic activation upon in vitro culture in presence of maturation stimuli such as lipopolysaccharide or poly (I:C), which paralleled with lack of Toll-like receptor (TLR)3 mRNA expression. Finally, cytokine expression pattern was also altered in ICSBP(-/-) DCs, as they did not express interleukin (IL)-12p40 or IL-15, but they displayed detectable IL-4 mRNA levels. On the whole, these results indicate that ICSBP is a crucial factor in the regulation of two possibly linked processes: (a) the development and activity of mIPCs, whose lack in ICSBP(-/-) mice may explain their high susceptibility to virus infections; (b) the generation and activation of CD8alpha(+) DCs, whose impairment in ICSBP(-/-) mice can be responsible for the defective generation of a Th1 type of immune response.</text><annotation id="3"><infon key="code">000</infon><infon key="NCBI Gene identifier">15900</infon><infon key="type">Gene</infon><location offset="152" length="51"/><text>Interferon (IFN) consensus sequence-binding protein</text></annotation><annotation id="4"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="205" length="5"/><text>ICSBP</text></annotation><annotation id="5"><infon key="code">222</infon><infon key="NCBI Gene identifier">15900</infon><infon key="type">Gene</infon><location offset="217" length="20"/><text>transcription factor</text></annotation><annotation id="6"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="460" length="5"/><text>ICSBP</text></annotation><annotation id="7"><infon key="code">333</infon><infon key="NCBI Gene identifier">15978</infon><infon key="type">Gene</infon><location offset="519" length="3"/><text>IFN</text></annotation><annotation id="8"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="587" length="5"/><text>ICSBP</text></annotation><annotation id="9"><infon key="NCBI Gene identifier">16411</infon><infon key="type">Gene</infon><location offset="627" length="5"/><text>CD11c</text></annotation><annotation id="10"><infon key="NCBI Gene identifier">19264</infon><infon key="type">Gene</infon><location offset="637" length="4"/><text>B220</text></annotation><annotation id="11"><infon key="NCBI Gene identifier">17067</infon><infon key="type">Gene</infon><location offset="644" length="4"/><text>Ly6C</text></annotation><annotation id="12"><infon key="NCBI Gene identifier">16409</infon><infon key="type">Gene</infon><location offset="651" length="5"/><text>CD11b</text></annotation><annotation id="13"><infon key="NCBI Gene identifier">16411</infon><infon key="type">Gene</infon><location offset="680" length="5"/><text>CD11c</text></annotation><annotation id="14"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="709" length="5"/><text>ICSBP</text></annotation><annotation id="15"><infon key="code">333</infon><infon key="NCBI Gene identifier">15977</infon><infon key="type">Gene</infon><location offset="751" length="11"/><text>type I IFNs</text></annotation><annotation id="16"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="797" length="5"/><text>ICSBP</text></annotation><annotation id="17"><infon key="NCBI Gene identifier">12525</infon><infon key="type">Gene</infon><location offset="879" length="8"/><text>CD8alpha</text></annotation><annotation id="18"><infon key="NCBI Gene identifier">12525</infon><infon key="type">Gene</infon><location offset="896" length="8"/><text>CD8alpha</text></annotation><annotation id="19"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="959" length="5"/><text>ICSBP</text></annotation><annotation id="20"><infon key="NCBI Gene identifier">12525</infon><infon key="type">Gene</infon><location offset="970" length="8"/><text>CD8alpha</text></annotation><annotation id="21"><infon key="NCBI Gene identifier">15894</infon><infon key="type">Gene</infon><location offset="1112" length="40"/><text>intercellular adhesion molecule [ICAM]-1</text></annotation><annotation id="22"><infon key="NCBI Gene identifier">21939</infon><infon key="type">Gene</infon><location offset="1154" length="4"/><text>CD40</text></annotation><annotation id="23"><infon key="NCBI Gene identifier">12519</infon><infon key="type">Gene</infon><location offset="1160" length="4"/><text>CD80</text></annotation><annotation id="24"><infon key="NCBI Gene identifier">12524</infon><infon key="type">Gene</infon><location offset="1166" length="4"/><text>CD86</text></annotation><annotation id="25"><infon key="code">222</infon><infon key="NCBI Gene identifier">12458,12772,12775</infon><infon key="type">Gene</infon><location offset="1202" length="18"/><text>chemokine receptor</text></annotation><annotation id="26"><infon key="NCBI Gene identifier">12775</infon><infon key="type">Gene</infon><location offset="1221" length="4"/><text>CCR7</text></annotation><annotation id="27"><infon key="NCBI Gene identifier">12772</infon><infon key="type">Gene</infon><location offset="1264" length="4"/><text>CCR2</text></annotation><annotation id="28"><infon key="NCBI Gene identifier">12458</infon><infon key="type">Gene</infon><location offset="1273" length="4"/><text>CCR6</text></annotation><annotation id="29"><infon key="NCBI Gene identifier">142980</infon><infon key="type">Gene</infon><location offset="1524" length="25"/><text>Toll-like receptor (TLR)3</text></annotation><annotation id="30"><infon key="code">222</infon><infon key="NCBI Gene identifier">16160,16168,16189</infon><infon key="type">Gene</infon><location offset="1576" length="8"/><text>cytokine</text></annotation><annotation id="31"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="1624" length="5"/><text>ICSBP</text></annotation><annotation id="32"><infon key="NCBI Gene identifier">16160</infon><infon key="type">Gene</infon><location offset="1664" length="22"/><text>interleukin (IL)-12p40</text></annotation><annotation id="33"><infon key="NCBI Gene identifier">16168</infon><infon key="type">Gene</infon><location offset="1690" length="5"/><text>IL-15</text></annotation><annotation id="34"><infon key="NCBI Gene identifier">16189</infon><infon key="type">Gene</infon><location offset="1727" length="4"/><text>IL-4</text></annotation><annotation id="35"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="1787" length="5"/><text>ICSBP</text></annotation><annotation id="36"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="1922" length="5"/><text>ICSBP</text></annotation><annotation id="37"><infon key="NCBI Gene identifier">12525</infon><infon key="type">Gene</infon><location offset="2034" length="8"/><text>CD8alpha</text></annotation><annotation id="38"><infon key="NCBI Gene identifier">15900</infon><infon key="type">GENERIF</infon><location offset="2071" length="5"/><text>ICSBP</text></annotation></passage></document>
+</collection>
diff --git a/pom.xml b/pom.xml
index 4b6553f98..c57f51dd7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,350 +1,468 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-      
+        
+  
   
   <modelVersion>4.0.0</modelVersion>
-      
+        
+  
   
   <parent>
-            
+                
+    
     
     <groupId>de.julielab</groupId>
-            
+                
+    
     
     <artifactId>jcore-parent</artifactId>
-            
+                
+    
     
     <version>2.5.2-SNAPSHOT</version>
-          
+              
+  
   
   </parent>
-      
+        
+  
   
   <artifactId>jcore-base</artifactId>
-      
+        
+  
   
   <packaging>pom</packaging>
-      
+        
+  
   
   <name>JCoRe Base</name>
-      
+        
+  
   
   <description>The POM for the JCoRe Base projects.</description>
-      
+        
+  
   
   <version>2.6.0-SNAPSHOT</version>
-      
+        
+  
   
   <organization>
-            
+                
+    
     
     <name>JULIE Lab, Germany</name>
-            
+                
+    
     
     <url>http://www.julielab.de</url>
-          
+              
+  
   
   </organization>
-      
+        
+  
   
   <licenses>
-            
+                
+    
     
     <license>
-                  
+                        
+      
       
       <name>BSD-2-Clause</name>
-                  
+                        
+      
       
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-                
+                      
+    
     
     </license>
-          
+              
+  
   
   </licenses>
-      
+        
+  
   
   <url>https://github.com/JULIELab/jcore-base</url>
-      
+        
+  
   
   <dependencies>
-            
+                
+    
     
     <dependency>
-                  
+                        
+      
       
       <groupId>org.apache.uima</groupId>
-                  
+                        
+      
       
       <artifactId>uimaj-core</artifactId>
-                  
+                        
+      
       
       <version>${uima-version}</version>
-                
+                      
+    
     
     </dependency>
-            
+                
+    
     
     <dependency>
-                  
+                        
+      
       
       <groupId>org.apache.uima</groupId>
-                  
+                        
+      
       
       <artifactId>uimafit-core</artifactId>
-                  
+                        
+      
       
       <version>${uimafit-version}</version>
-                
+                      
+    
     
     </dependency>
-          
+              
+  
   
   </dependencies>
-      
+        
+  
   
   <modules>
-            
+                
+    
     
     <module>jcore-annotation-adder-ae</module>
-            
+                
+    
     
     <module>jcore-ace-reader</module>
-            
+                
+    
     
     <module>jcore-acronym-ae</module>
-            
+                
+    
     
     <module>jcore-acronym-writer</module>
-            
+                
+    
     
     <module>jcore-banner-ae</module>
-            
+                
+    
     
     <module>jcore-bc2gm-reader</module>
-            
+                
+    
     
     <module>jcore-bc2gmformat-writer</module>
-            
+                
+    
     
     <module>jcore-biolemmatizer-ae</module>
-            
+                
+    
     
     <module>jcore-bionlpformat-consumer</module>
-            
+                
+    
     
     <module>jcore-bionlpformat-reader</module>
-            
+                
+    
     
     <module>jcore-biosem-ae</module>
-            
+                
+    
     
     <module>jcore-conll-consumer</module>
-            
+                
+    
     
     <module>jcore-coordination-baseline-ae</module>
-            
+                
+    
     
     <module>jcore-cord19-reader</module>
-            
+                
+    
     
     <module>jcore-coreference-writer</module>
-            
+                
+    
     
     <module>jcore-ct-reader</module>
-            
+                
+    
     
     <module>jcore-db-checkpoint-ae</module>
-            
+                
+    
     
     <module>jcore-descriptor-creator</module>
-            
+                
+    
     
     <module>jcore-dta-reader</module>
-            
+                
+    
     
     <module>jcore-ec-code-ae</module>
-            
+                
+    
     
     <module>jcore-elasticsearch-consumer</module>
-            
+                
+    
     
     <module>jcore-embedding-writer</module>
-            
+                
+    
     
     <module>jcore-event-flattener-ae</module>
-            
+                
+    
     
     <module>jcore-feature-value-replacement-ae</module>
-            
+                
+    
     
     <module>jcore-file-reader</module>
-            
+                
+    
     
     <module>jcore-flair-ner-ae</module>
-            
+                
+    
     
     <module>jcore-flair-token-embedding-ae</module>
-            
+                
+    
     
     <module>jcore-flow-controllers</module>
-            
+                
+    
     
     <module>jcore-gnp-bioc-reader</module>
-            
+                
+    
     
     <module>jcore-gnp-bioc-writer</module>
-            
+                
+    
     
     <module>jcore-iexml-consumer</module>
-            
+                
+    
     
     <module>jcore-iexml-reader</module>
-            
+                
+    
     
     <module>jcore-ign-reader</module>
-            
+                
+    
     
     <module>jcore-iob-consumer</module>
-            
+                
+    
     
     <module>jcore-jnet-ae</module>
-            
+                
+    
     
     <module>jcore-jpos-ae</module>
-            
+                
+    
     
     <module>jcore-jsbd-ae</module>
-            
+                
+    
     
     <module>jcore-jtbd-ae</module>
-            
+                
+    
     
     <module>jcore-julielab-entity-evaluator-consumer</module>
-            
+                
+    
     
     <module>jcore-likelihood-assignment-ae</module>
-            
+                
+    
     
     <module>jcore-likelihood-detection-ae</module>
-            
+                
+    
     
     <module>jcore-line-multiplier</module>
-            
+                
+    
     
     <module>jcore-lingpipegazetteer-ae</module>
-            
+                
+    
     
     <module>jcore-lingpipe-porterstemmer-ae</module>
-            
+                
+    
     
     <module>jcore-lingscope-ae</module>
-            
+                
+    
     
     <module>jcore-linnaeus-species-ae</module>
-            
+                
+    
     
     <module>jcore-mantra-xml-types</module>
-            
+                
+    
     
     <module>jcore-medxn-ae</module>
-            
+                
+    
     
     <module>jcore-msdoc-reader</module>
-            
+                
+    
     
     <module>jcore-mstparser-ae</module>
-            
+                
+    
     
     <module>jcore-muc7-reader</module>
-            
+                
+    
     
     <module>jcore-mutationfinder-ae</module>
-            
+                
+    
     
     <module>jcore-neo4j-relations-consumer</module>
-            
+                
+    
     
     <module>jcore-opennlp-chunk-ae</module>
-            
+                
+    
     
     <module>jcore-opennlp-parser-ae</module>
-            
+                
+    
     
     <module>jcore-opennlp-postag-ae</module>
-            
+                
+    
     
     <module>jcore-opennlp-sentence-ae</module>
-            
+                
+    
     
     <module>jcore-opennlp-token-ae</module>
-            
+                
+    
     
     <module>jcore-ppd-writer</module>
-            
+                
+    
     
     <module>jcore-pmc-reader</module>
-            
+                
+    
     
     <module>jcore-pubtator-reader</module>
-            
+                
+    
     
     <module>jcore-stanford-lemmatizer-ae</module>
-            
+                
+    
     
     <module>jcore-topic-indexing-ae</module>
-            
+                
+    
     
     <module>jcore-topics-writer</module>
-            
+                
+    
     
     <module>jcore-txt-consumer</module>
-            
+                
+    
     
     <module>jcore-types</module>
-            
+                
+    
     
     <module>jcore-utilities</module>
-            
+                
+    
     
     <module>jcore-xml-mapper</module>
-            
+                
+    
     
     <module>jcore-xml-reader</module>
-            
+                
+    
     
     <module>jcore-xmi-reader</module>
-            
+                
+    
     
     <module>jcore-xmi-writer</module>
-            
+                
+    
     
     <module>jedis-parent</module>
-            
+                
+    
     
     <module>jcore-jedis-integration-tests</module>
-            
+                
+    
     
     <module>jcore-mmax2-reader</module>
+          
+    
+    <module>jcore-nlmgene-reader</module>
       
   </modules>
-      
+        
+  
   
   <scm>
-            
+                
+    
     
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-            
+                
+    
     
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-            
+                
+    
     
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-          
+              
+  
   
   </scm>
-    
+      
+
 
 </project>

From 70116787c0ad3b79133334a053a005236e83160e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 25 Jul 2022 12:14:45 +0200
Subject: [PATCH 217/269] Add the component.meta file for the NLM-Gene reader.

---
 jcore-nlmgene-reader/component.meta           | 20 +++++++++++++++++++
 .../jcore/reader/nlmgene/NLMGeneReader.java   |  1 -
 2 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 jcore-nlmgene-reader/component.meta

diff --git a/jcore-nlmgene-reader/component.meta b/jcore-nlmgene-reader/component.meta
new file mode 100644
index 000000000..cca571781
--- /dev/null
+++ b/jcore-nlmgene-reader/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "reader"
+    ],
+    "description": "Collection reader for the BioC format of the NLM-Gene corpus.",
+    "descriptors": [
+        {
+            "category": "reader",
+            "location": "de.julielab.jcore.reader.nlmgene.desc.jcore-nlmgene-reader"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-nlmgene-reader",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe NLM-Gene Reader"
+}
diff --git a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
index 36bcb0c1a..c1f9bd584 100644
--- a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
+++ b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
@@ -88,7 +88,6 @@ public void getNext(JCas jCas) throws CollectionException {
                 textBuilder.append(System.getProperty("line.separator"));
             }
 
-
             jCas.setDocumentText(textBuilder.toString());
         } catch (XMLStreamException | IOException e) {
             log.error("Could not read NLM-Gene corpus file {}", nextFile, e);

From 1a9550eb78aa7c9d107986d003d0ca5d3f290ccc Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 25 Jul 2022 14:45:24 +0200
Subject: [PATCH 218/269] Add the possibility to restrict the NLM-Gene files
 being read through files.

Thus we can use the train- and testset files delivered with the corpus to differentiate between the sets.
---
 .../jcore/reader/nlmgene/NLMGeneReader.java   | 18 +++++++++++++---
 .../nlmgene/desc/jcore-nlmgene-reader.xml     |  7 +++++++
 .../reader/nlmgene/NLMGeneReaderTest.java     | 21 ++++++++++++++++++-
 .../test/resources/input/listWithTestDoc.txt  |  1 +
 .../resources/input/listWithoutTestDoc.txt    |  1 +
 5 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 jcore-nlmgene-reader/src/test/resources/input/listWithTestDoc.txt
 create mode 100644 jcore-nlmgene-reader/src/test/resources/input/listWithoutTestDoc.txt

diff --git a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
index c1f9bd584..7e10f9081 100644
--- a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
+++ b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
@@ -28,17 +28,23 @@
 import java.io.IOException;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.Optional;
+import java.util.Set;
+import java.util.stream.Collectors;
 
 @ResourceMetaData(name = "JCoRe NLM-Gene Reader", description = "Collection reader for the BioC format of the NLM-Gene corpus.", vendor = "JULIE Lab Jena, Germany")
 @TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.Gene", "de.julielab.jcore.types.ResourceEntry"})
 public class NLMGeneReader extends JCasCollectionReader_ImplBase {
 
     public static final String PARAM_INPUT_DIR = "InputDirectory";
+    public static final String PARAM_ID_LIST_PATH = "IdList";
     private final static Logger log = LoggerFactory.getLogger(NLMGeneReader.class);
     @ConfigurationParameter(name = PARAM_INPUT_DIR, description = "Path to the directory that contains the BioC XML files of the NLM-Gene corpus.")
     private String inputDir;
+    @ConfigurationParameter(name = PARAM_ID_LIST_PATH, mandatory = false, description = "Path to a file with a list of IDs to restrict the read files to. This will typically be the list with IDs for the training or for the test set of the corpus. When no list is specified, the whole corpus is read.")
+    private String idList;
     private Iterator<Path> corpusFileIterator;
     private int numRead;
 
@@ -50,8 +56,9 @@ public class NLMGeneReader extends JCasCollectionReader_ImplBase {
     public void initialize(UimaContext context) throws ResourceInitializationException {
         super.initialize(context);
         inputDir = (String) context.getConfigParameterValue(PARAM_INPUT_DIR);
+        idList = (String) context.getConfigParameterValue(PARAM_ID_LIST_PATH);
         try {
-            corpusFileIterator = readInputFiles(inputDir);
+            corpusFileIterator = readInputFiles(inputDir, idList);
         } catch (IOException e) {
             log.error("Could not read NLM-Gene corpus input files.", e);
             throw new ResourceInitializationException(e);
@@ -59,9 +66,14 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
         numRead = 0;
     }
 
-    private Iterator<Path> readInputFiles(String inputDir) throws IOException {
+    private Iterator<Path> readInputFiles(String inputDir, String idList) throws IOException {
         Path inputPath = Path.of(inputDir);
-        return Files.list(inputPath).filter(p -> p.toString().toLowerCase().endsWith(".xml") || p.toString().toLowerCase().endsWith(".xml.gz")).iterator();
+        Path idListPath = idList != null ? Path.of(idList) : null;
+        Set<String> ids = idListPath != null && Files.exists(idListPath)  ? Files.readAllLines(idListPath).stream().collect(Collectors.toSet()) : Collections.emptySet();
+        return Files.list(inputPath)
+                .filter(p -> p.toString().toLowerCase().endsWith(".xml") || p.toString().toLowerCase().endsWith(".xml.gz"))
+                .filter(p -> ids.isEmpty() ? true : ids.contains(p.getFileName().toString().replaceAll("(?i)\\.bioc\\.xml(\\.gz)?", "")))
+                .iterator();
     }
 
     /**
diff --git a/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
index c4f2d5028..3f8940a5a 100644
--- a/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
+++ b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
@@ -14,6 +14,13 @@
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
+      <configurationParameter>
+        <name>IdList</name>
+        <description>Path to a file with a list of IDs to restrict the read files to. This will typically be the list with IDs for the training or for the test set of the corpus. When no list is specified, the whole corpus is read.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
     </configurationParameters>
     <configurationParameterSettings/>
     <typeSystemDescription>
diff --git a/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java b/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java
index d21b35292..a346cdb75 100644
--- a/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java
+++ b/jcore-nlmgene-reader/src/test/java/de/julielab/jcore/reader/nlmgene/NLMGeneReaderTest.java
@@ -6,6 +6,7 @@
 import de.julielab.jcore.types.Title;
 import de.julielab.jcore.types.pubmed.AbstractText;
 import de.julielab.jcore.types.pubmed.Header;
+import org.apache.uima.UIMAException;
 import org.apache.uima.collection.CollectionReader;
 import org.apache.uima.fit.factory.CollectionReaderFactory;
 import org.apache.uima.fit.factory.JCasFactory;
@@ -31,7 +32,7 @@ public class NLMGeneReaderTest{
 
     @Test
     public void testReader() throws Exception {
-        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        final JCas jCas = getJCas();
         final CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.reader.nlmgene.desc.jcore-nlmgene-reader", NLMGeneReader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString());
         assertThat(reader.hasNext()).isTrue();
         reader.getNext(jCas.getCas());
@@ -53,4 +54,22 @@ public void testReader() throws Exception {
         assertThat(secondGene).extracting(Gene::getCoveredText).isEqualTo("CD11c");
         assertThat(secondGene.getResourceEntryList(0)).extracting(ResourceEntry::getEntryId).isEqualTo("16411");
     }
+
+    private JCas getJCas() throws UIMAException {
+        final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-pubmed-types", "de.julielab.jcore.types.jcore-semantics-biology-types");
+        return jCas;
+    }
+
+    @Test
+    public void testReadFilesOnList() throws Exception{
+        final CollectionReader reader = CollectionReaderFactory.createReader("de.julielab.jcore.reader.nlmgene.desc.jcore-nlmgene-reader",
+                NLMGeneReader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString(),
+                NLMGeneReader.PARAM_ID_LIST_PATH, Path.of("src", "test", "resources", "input", "listWithTestDoc.txt").toString());
+        assertThat(reader.hasNext()).isTrue();
+
+        final CollectionReader reader2 = CollectionReaderFactory.createReader("de.julielab.jcore.reader.nlmgene.desc.jcore-nlmgene-reader",
+                NLMGeneReader.PARAM_INPUT_DIR, Path.of("src", "test", "resources", "input").toString(),
+                NLMGeneReader.PARAM_ID_LIST_PATH, Path.of("src", "test", "resources", "input", "listWithoutTestDoc.txt").toString());
+        assertThat(reader2.hasNext()).isFalse();
+    }
 }
diff --git a/jcore-nlmgene-reader/src/test/resources/input/listWithTestDoc.txt b/jcore-nlmgene-reader/src/test/resources/input/listWithTestDoc.txt
new file mode 100644
index 000000000..32547ffca
--- /dev/null
+++ b/jcore-nlmgene-reader/src/test/resources/input/listWithTestDoc.txt
@@ -0,0 +1 @@
+12461077
\ No newline at end of file
diff --git a/jcore-nlmgene-reader/src/test/resources/input/listWithoutTestDoc.txt b/jcore-nlmgene-reader/src/test/resources/input/listWithoutTestDoc.txt
new file mode 100644
index 000000000..7b4d68d70
--- /dev/null
+++ b/jcore-nlmgene-reader/src/test/resources/input/listWithoutTestDoc.txt
@@ -0,0 +1 @@
+empty
\ No newline at end of file

From 190be0a4dfb054790f5d191ae5561df3b97e2147 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 28 Jul 2022 11:24:32 +0200
Subject: [PATCH 219/269] Resolves #146.

---
 .../EntityEvaluatorConsumer.java              |   7 +-
 .../entityevaluator/FeatureValueFilter.java   |   7 +-
 .../EntityEvaluatorConsumerTest.java          | 102 ++++++++++++------
 3 files changed, 80 insertions(+), 36 deletions(-)

diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
index 413e8fa87..5dadad803 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumer.java
@@ -56,6 +56,7 @@ public class EntityEvaluatorConsumer extends JCasAnnotator_ImplBase {
     public static final String PARAM_TYPE_PREFIX = "TypePrefix";
     public final static String PARAM_ENTITY_TYPES = "EntityTypes";
     public static final String PARAM_FEATURE_FILTERS = "FeatureFilters";
+    public static final String PARAM_ALLOW_REGEX_FOR_FILTERS = "AllowRegexForFilters";
     public final static String PARAM_OFFSET_MODE = "OffsetMode";
     public final static String PARAM_OFFSET_SCOPE = "OffsetScope";
     public final static String PARAM_OUTPUT_FILE = "OutputFile";
@@ -77,6 +78,8 @@ public class EntityEvaluatorConsumer extends JCasAnnotator_ImplBase {
     private String typePrefix;
     @ConfigurationParameter(name = PARAM_FEATURE_FILTERS, mandatory = false, description = "Optional. Only lets those entities contribute to the output file that fulfill the given feature value(s). The syntax is <type>:<feature path>=<value>. The '<type>:' prefix is optional. If omitted, the filters will be applied to all entities given in the " + PARAM_ENTITY_TYPES + " parameter. An arbitrary number of filter expressions may be specified. In such cases, it is important to understand the boolean structure after which the expressions are evaluated in order to omit an annotation or take it into account for the output. The filter expressions are first grouped by feature path. Within such a group, the filter values form a disjunction. Thus, if any filter in a group is satisfied, the whole group is satisfied. The different groups form a conjunction. Thus, if any group is not satisfied, the whole conjunction is unsatisfied and the respective annotation will be omitted from output.")
     private String[] featureFilterDefinitions;
+    @ConfigurationParameter(name = PARAM_ALLOW_REGEX_FOR_FILTERS, mandatory = false, description = "Optional. If set to true, the filter values specified with the " + PARAM_FEATURE_FILTERS + " parameter are interpreted as regular expressions. The actual feature values are than matched by regular expression resolution instead of testing string equality.")
+    boolean allowRegexForFilters;
     @ConfigurationParameter(name = PARAM_OUTPUT_FILE, description = "Output file to which all entity information is written in the format\n"
             + "docId EGID begin end confidence\n"
             + "Where the fields are separated by tab stops. If the file name ends with .gz, the output file will automatically be gzipped.")
@@ -251,6 +254,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         typePrefix = (String) aContext.getConfigParameterValue(PARAM_TYPE_PREFIX);
 
         featureFilterDefinitions = (String[]) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FEATURE_FILTERS)).orElse(new String[0]);
+        allowRegexForFilters = (Boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ALLOW_REGEX_FOR_FILTERS)).orElse(false);
         outputFilePath = (String) aContext.getConfigParameterValue(PARAM_OUTPUT_FILE);
         appendThreadNameToOutputFile = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_APPEND_THREAD_NAME_TO_OUTPUT_FILE)).orElse(false);
         entityTypeStrings = (String[]) aContext.getConfigParameterValue(PARAM_ENTITY_TYPES);
@@ -279,6 +283,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         log.info("{}: {}", PARAM_OUTPUT_COLUMNS, outputColumnNames);
         log.info("{}: {}", PARAM_COLUMN_DEFINITIONS, columnDefinitionDescriptions);
         log.info("{}: {}", PARAM_FEATURE_FILTERS, featureFilterDefinitions);
+        log.info("{}: {}", PARAM_ALLOW_REGEX_FOR_FILTERS, allowRegexForFilters);
         log.info("{}: {}", PARAM_ENTITY_TYPES, entityTypeStrings);
         log.info("{}: {}", PARAM_TYPE_PREFIX, typePrefix);
         log.info("{}: {}", PARAM_OUTPUT_FILE, outputFilePath);
@@ -327,7 +332,7 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
                     throw new IllegalArgumentException("No entity names are given, neither by the " + PARAM_ENTITY_TYPES + " parameter nor in the " + PARAM_COLUMN_DEFINITIONS + " parameter.");
                 removeSubsumedTypes(entityTypes, ts);
 
-                featureFilters = Stream.of(featureFilterDefinitions).map(d -> new FeatureValueFilter(d, typePrefix, ts)).collect(Collectors.groupingBy(filter -> filter.getPathValuePair().fp.getFeaturePath()));
+                featureFilters = Stream.of(featureFilterDefinitions).map(d -> new FeatureValueFilter(d, typePrefix, ts, allowRegexForFilters)).collect(Collectors.groupingBy(filter -> filter.getPathValuePair().fp.getFeaturePath()));
 
                 addDocumentIdColumn(aJCas);
                 addDocumentTextSha256Column();
diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/FeatureValueFilter.java b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/FeatureValueFilter.java
index c84ba2ade..25a1a25d2 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/FeatureValueFilter.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/java/de/julielab/jcore/consumer/entityevaluator/FeatureValueFilter.java
@@ -17,6 +17,7 @@
 
 import java.util.Collections;
 import java.util.Set;
+import java.util.function.BiFunction;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -27,10 +28,12 @@ public class FeatureValueFilter {
     protected Set<Type> types;
     protected PathValuePair pathValuePair;
     private Matcher mfull;
+    private BiFunction<String, String, Boolean> featureValueMatchTest;
 
-    public FeatureValueFilter(String columnDefinition, String typePrefix, TypeSystem ts) {
+    public FeatureValueFilter(String columnDefinition, String typePrefix, TypeSystem ts, boolean allowRegexForFilters) {
         this();
         parseAndAddDefinition(columnDefinition, typePrefix, ts);
+        featureValueMatchTest = allowRegexForFilters ? String::matches : String::equals;
     }
 
     public FeatureValueFilter() {
@@ -60,7 +63,7 @@ public boolean contradictsFeatureFilter(TOP a) {
             return false;
         String fpValue = pathValuePair.fp.getValueAsString(a);
         if (fpValue != null)
-            return pathValuePair.targetValue == null || !fpValue.equals(pathValuePair.targetValue);
+            return pathValuePair.targetValue == null || !featureValueMatchTest.apply(fpValue, pathValuePair.targetValue);
         return pathValuePair.targetValue != null;
     }
 
diff --git a/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java b/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
index b0589b592..b50a25edd 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
+++ b/jcore-julielab-entity-evaluator-consumer/src/test/java/de/julielab/jcore/consumer/entityevaluator/EntityEvaluatorConsumerTest.java
@@ -15,6 +15,7 @@
 import de.julielab.jcore.types.pubmed.ManualDescriptor;
 import de.julielab.jcore.utility.JCoReTools;
 import org.apache.commons.codec.binary.Base64;
+import org.apache.uima.UIMAException;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.fit.factory.JCasFactory;
@@ -41,9 +42,7 @@ public class EntityEvaluatorConsumerTest {
 
 	@Test
 	public void testEntityEvaluatorConsumerSingleEntity() throws Exception {
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] { DOCUMENT_ID_COLUMN + ": Header = /docId",
@@ -75,12 +74,16 @@ public void testEntityEvaluatorConsumerSingleEntity() throws Exception {
 		assertEquals("document1	document1:0	23	gene", lines.get(0));
 	}
 
+	private JCas getjCas() throws UIMAException {
+		return JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
+				"de.julielab.jcore.types.jcore-semantics-biology-types",
+				"de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+	}
+
 	@Test
 	public void testEntityEvaluatorConsumerSingleEntity2() throws Exception {
 		// The same test as above but minus the DocumentId column
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] { "geneid:Gene=/resourceEntryList[0]/entryId", "name:/:coveredText()" },
@@ -113,9 +116,7 @@ public void testEntityEvaluatorConsumerSingleEntity2() throws Exception {
 
 	@Test
 	public void testEntityEvaluatorConsumerNoEntities() throws Exception {
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] { DOCUMENT_ID_COLUMN + ": Header = /docId",
@@ -143,9 +144,7 @@ public void testEntityEvaluatorConsumerNoEntities() throws Exception {
 
 	@Test
 	public void testEntityEvaluatorConsumerSingleEntityDocumentTextHash() throws Exception {
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] {
@@ -179,9 +178,7 @@ public void testEntityEvaluatorConsumerSingleEntityDocumentTextHash() throws Exc
 
 	@Test
 	public void testEntityEvaluatorConsumerMultipleEntities() throws Exception {
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] {  SENTENCE_ID_COLUMN + ": Sentence=/id",
@@ -216,9 +213,7 @@ public void testEntityEvaluatorConsumerMultipleEntities() throws Exception {
 
 	@Test
 	public void testEntityEvaluatorConsumerSingleEntityNoWSOffsets() throws Exception {
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] { DOCUMENT_ID_COLUMN + ": Header = /docId", SENTENCE_ID_COLUMN + ": Sentence=/id",
@@ -255,9 +250,7 @@ public void testEntityEvaluatorConsumerSuperType() throws Exception {
 		// other, e.g. EntityMention and Gene, then we don't want to traverse
 		// the subsumed types on their own. They are contained in the annotation
 		// index of their super type.
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] { DOCUMENT_ID_COLUMN + ": Header = /docId", SENTENCE_ID_COLUMN + ": Sentence=/id",
@@ -297,23 +290,21 @@ public void testCreateNonWsOffsetMap() throws Exception {
 		TreeMap<Integer, Integer> numWsMap = (TreeMap<Integer, Integer>) method.invoke(null, "one two three");
 		// first check the actual map entries (after each white space position
 		// there should be an entry)
-		assertEquals(new Integer(0), numWsMap.get(0));
-		assertEquals(new Integer(1), numWsMap.get(4));
-		assertEquals(new Integer(2), numWsMap.get(8));
+		assertEquals(Integer.valueOf(0), numWsMap.get(0));
+		assertEquals(Integer.valueOf(1), numWsMap.get(4));
+		assertEquals(Integer.valueOf(2), numWsMap.get(8));
 
 		// now check the intended use; using the floor element, we should be
 		// able to the correct value even for those positions we don't have an
 		// explicit mapping for
-		assertEquals(new Integer(0), numWsMap.floorEntry(2).getValue());
-		assertEquals(new Integer(1), numWsMap.floorEntry(5).getValue());
-		assertEquals(new Integer(2), numWsMap.floorEntry(11).getValue());
+		assertEquals(Integer.valueOf(0), numWsMap.floorEntry(2).getValue());
+		assertEquals(Integer.valueOf(1), numWsMap.floorEntry(5).getValue());
+		assertEquals(Integer.valueOf(2), numWsMap.floorEntry(11).getValue());
 	}
 
 	@Test
 	public void testEntityEvaluatorConsumerFeatureFilter() throws Exception {
-		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-				"de.julielab.jcore.types.jcore-semantics-biology-types",
-				"de.julielab.jcore.types.jcore-document-meta-types");
+		JCas jcas = getjCas();
 		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
 				PARAM_COLUMN_DEFINITIONS,
 				new String[] { DOCUMENT_ID_COLUMN + ": Header = /docId", SENTENCE_ID_COLUMN + ": Sentence=/id",
@@ -356,6 +347,53 @@ public void testEntityEvaluatorConsumerFeatureFilter() throws Exception {
 		assertEquals("document1	document1:0	42	One", lines.get(0));
 	}
 
+	@Test
+	public void testEntityEvaluatorConsumerFeatureFilterRegEx() throws Exception {
+		JCas jcas = getjCas();
+		AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
+				PARAM_COLUMN_DEFINITIONS,
+				new String[] { DOCUMENT_ID_COLUMN + ": Header = /docId", SENTENCE_ID_COLUMN + ": Sentence=/id",
+						"genetype:Gene=/specificType", "name:/:coveredText()" },
+				PARAM_OUTPUT_COLUMNS, new String[] { DOCUMENT_ID_COLUMN, SENTENCE_ID_COLUMN, "genetype", "name" },
+				PARAM_TYPE_PREFIX, "de.julielab.jcore.types", PARAM_OUTPUT_FILE, "src/test/resources/outfile-test.tsv",
+				PARAM_FEATURE_FILTERS, new String[] { "Gene:/specificType=Group[3-4]{2,3}s?" },
+				PARAM_ALLOW_REGEX_FOR_FILTERS, true);
+
+		jcas.setDocumentText("One gene one sentence.");
+		Header h = new Header(jcas);
+		h.setDocId("document1");
+		h.addToIndexes();
+		Sentence s = new Sentence(jcas, 0, jcas.getDocumentText().length());
+		s.setId("sentence1");
+		s.addToIndexes();
+		{
+			Gene g = new Gene(jcas, 4, 8);
+			// should not pass filter
+			g.setSpecificType("Group123");
+			g.addToIndexes();
+		}
+		{
+			Gene g = new Gene(jcas, 0, 3);
+			// should pass filter
+			g.setSpecificType("Group33s");
+			g.addToIndexes();
+		}
+		{
+			Gene g = new Gene(jcas, 0, 3);
+			// should pass filter
+			g.setSpecificType("Group344");
+			g.addToIndexes();
+		}
+
+		consumer.process(jcas.getCas());
+		consumer.collectionProcessComplete();
+
+		List<String> lines = Files.readLines(new File("src/test/resources/outfile-test.tsv"), Charset.forName("UTF-8"));
+		assertEquals(2, lines.size());
+		assertEquals("document1	document1:0	Group33s	One", lines.get(0));
+		assertEquals("document1	document1:0	Group344	One", lines.get(1));
+	}
+
 	@Test
 	public void testParallelMultiValues() throws Exception {
 		JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
@@ -403,9 +441,7 @@ public void testParallelMultiValues() throws Exception {
 
     @Test
     public void testCartesianMultiValues() throws Exception {
-        JCas jcas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-semantics-mention-types",
-                "de.julielab.jcore.types.jcore-semantics-biology-types",
-                "de.julielab.jcore.types.jcore-document-meta-types", "de.julielab.jcore.types.jcore-document-meta-pubmed-types");
+		JCas jcas = getjCas();
         AnalysisEngine consumer = AnalysisEngineFactory.createEngine(EntityEvaluatorConsumer.class,
                 PARAM_COLUMN_DEFINITIONS,
                 new String[] {

From 9af99fa541db4f0e56de75c6f1285ef2b13b0d85 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 28 Jul 2022 11:35:51 +0200
Subject: [PATCH 220/269] Update the descriptor of the EntityEvaluatorConsumer.

---
 .../jcore-julielab-entity-evaluator-consumer.xml | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml b/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
index 51c7fc6af..6e95e4205 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,7 +6,6 @@
     <analysisEngineMetaData>
         <name>JCoRe Entity Evaluator and TSV Consumer</name>
         <description>This component was originally created to output the tab separated format used the JULIE Entity Evaluator. However, this component can be used to create a TSV file from any annotation or annotation set. The component allows to define columns by specifying the annotation type to draw feature values from and a feature path that specifies the location of the desired feature. All feature paths will be applied to each configured annotation, returning null values if an annotation does not exhibit a value for a column's feature path.</description>
-        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -58,6 +57,13 @@
                 <multiValued>true</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>AllowRegexForFilters</name>
+                <description>Optional. If set to true, the filter values specified with the FeatureFilters parameter are interpreted as regular expressions. The actual feature values are than matched by regular expression resolution instead of testing string equality.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
             <configurationParameter>
                 <name>OutputFile</name>
                 <description>Output file to which all entity information is written in the format
@@ -122,9 +128,9 @@
                 </value>
             </nameValuePair>
         </configurationParameterSettings>
-        <typeSystemDescription />
-        <fsIndexCollection />
-        <capabilities />
+        <typeSystemDescription/>
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>

From ec8cf243d06b76a81a7d1db2bf20d444510a3095 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 28 Jul 2022 15:04:10 +0200
Subject: [PATCH 221/269] Fix a bug where hasNext return true although no more
 documents are available.

---
 .../de/julielab/jcore/reader/db/DBMultiplierReader.java     | 6 +++++-
 .../java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java  | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
index c41bfe4e0..f83abfe02 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
 
@@ -127,6 +128,7 @@ public boolean hasNext() throws IOException, CollectionException {
             hasNext = !retriever.getDocumentIds().isEmpty();
         if (!hasNext)
             close();
+        log.trace("hasNext returns {}", hasNext);
         return hasNext;
     }
 
@@ -266,7 +268,9 @@ public List<Object[]> getDocumentIds() {
                     join();
                 }
                 log.debug("[{}] Delivering {} document IDs", timestamp, ids.size());
-                return ids;
+                List<Object[]> ret = ids;
+                ids = Collections.emptyList();
+                return ret;
             } catch (InterruptedException e) {
                 log.error("Background ID fetching thread was interrupted", e);
             }
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
index ba2f35eb9..96a18ec6a 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
@@ -5,6 +5,7 @@
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.reader.db.DBMultiplier;
 import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.xml.JulieXMLConstants;
 import de.julielab.xml.XmiSplitConstants;
 import de.julielab.xml.binary.BinaryJeDISNodeEncoder;
@@ -95,7 +96,10 @@ public AbstractCas next() throws AnalysisEngineProcessException {
             jCas.release();
             throw new AnalysisEngineProcessException(throwable);
         }
-        log.trace("Outgoing multiplier jCas instance: " + jCas);
+        if (log.isTraceEnabled()) {
+            log.trace("Outgoing multiplier jCas instance: {}", jCas);
+            log.trace("Returning CAS containing document {}", JCoReTools.getDocId(jCas));
+        }
         return jCas;
     }
 

From 5fceaa8cead99abc6d5ee39bb84c865b5f0d2999 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 1 Aug 2022 11:21:24 +0200
Subject: [PATCH 222/269] Fix a bug where hasNext return false although more
 documents are available.

---
 jcore-db-reader/pom.xml                       | 131 ++++++++++++++----
 .../jcore/reader/db/DBMultiplierReader.java   |  56 ++++++--
 .../de/julielab/jcore/reader/db/DBReader.java |   2 +-
 .../reader/db/DBMultiplierReaderTest.java     |   9 +-
 .../jcore/reader/db/DBReaderTest.java         |   6 +-
 .../jcore/reader/xmi/CasPopulator.java        |   4 +-
 .../jcore/reader/xmi/XmiDBMultiplier.java     |  38 ++++-
 .../xmi/desc/jcore-xmi-db-multiplier.xml      |  14 +-
 jedis-parent/pom.xml                          |   2 +-
 9 files changed, 200 insertions(+), 62 deletions(-)

diff --git a/jcore-db-reader/pom.xml b/jcore-db-reader/pom.xml
index bf3b215b9..fd3a657b1 100644
--- a/jcore-db-reader/pom.xml
+++ b/jcore-db-reader/pom.xml
@@ -1,5 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
@@ -10,7 +11,7 @@
     <artifactId>jcore-db-reader</artifactId>
     <name>JCoRe Database Reader</name>
     <description>Abstract database reader for database driven processing</description>
-    
+
     <dependencies>
         <dependency>
             <groupId>de.julielab</groupId>
@@ -57,38 +58,114 @@
             <artifactId>jcore-db-test-utilities</artifactId>
             <scope>test</scope>
         </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
         <dependency>
             <groupId>org.apache.uima</groupId>
             <artifactId>uima-ducc-user</artifactId>
             <version>${uima-ducc-version}</version>
             <exclusions>
-                <exclusion><groupId>org.apache.uima</groupId><artifactId>uimaj-as-activemq</artifactId></exclusion>
-                <exclusion><groupId>org.apache.activemq</groupId><artifactId>activemq-camel</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-core</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-xstream</artifactId></exclusion>
-                <exclusion><groupId>org.apache.commons</groupId><artifactId>commons-pool2</artifactId></exclusion>
-                <exclusion><groupId>org.eclipse.jetty</groupId><artifactId>jetty-server</artifactId></exclusion>
-                <exclusion><groupId>xpp3</groupId><artifactId>xpp3</artifactId></exclusion>
-                <exclusion><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient</artifactId></exclusion>
-                <exclusion><groupId>xmlpull</groupId><artifactId>xmlpull</artifactId></exclusion>
-                <exclusion><groupId>org.apache.httpcomponents</groupId><artifactId>httpclient-cache</artifactId></exclusion>
-                <exclusion><groupId>org.apache.httpcomponents</groupId><artifactId>httpcore</artifactId></exclusion>
-                <exclusion><groupId>org.slf4j</groupId><artifactId>jcl-over-slf4j</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-context</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-http4</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-http</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-http-common</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-jetty-common</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-mina</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-xmlbeans</artifactId></exclusion>
-                <exclusion><groupId>org.apache.mina</groupId><artifactId>mina-core</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-servlet</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-test-spring</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-test</artifactId></exclusion>
-                <exclusion><groupId>org.apache.camel</groupId><artifactId>camel-stream</artifactId></exclusion>
+                <exclusion>
+                    <groupId>org.apache.uima</groupId>
+                    <artifactId>uimaj-as-activemq</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.activemq</groupId>
+                    <artifactId>activemq-camel</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-xstream</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.commons</groupId>
+                    <artifactId>commons-pool2</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.eclipse.jetty</groupId>
+                    <artifactId>jetty-server</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>xpp3</groupId>
+                    <artifactId>xpp3</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.httpcomponents</groupId>
+                    <artifactId>httpclient</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>xmlpull</groupId>
+                    <artifactId>xmlpull</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.httpcomponents</groupId>
+                    <artifactId>httpclient-cache</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.httpcomponents</groupId>
+                    <artifactId>httpcore</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.slf4j</groupId>
+                    <artifactId>jcl-over-slf4j</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-context</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-http4</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-http</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-http-common</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-jetty-common</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-mina</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-xmlbeans</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.mina</groupId>
+                    <artifactId>mina-core</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-servlet</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-test-spring</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-test</artifactId>
+                </exclusion>
+                <exclusion>
+                    <groupId>org.apache.camel</groupId>
+                    <artifactId>camel-stream</artifactId>
+                </exclusion>
             </exclusions>
         </dependency>
-   </dependencies>
+    </dependencies>
     <licenses>
         <license>
             <name>BSD-2-Clause</name>
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
index f83abfe02..37922d46d 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBMultiplierReader.java
@@ -22,7 +22,6 @@
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.List;
 import java.util.stream.Collectors;
 
@@ -124,8 +123,11 @@ public void getNext(JCas jCas) throws CollectionException, IOException {
      */
     public boolean hasNext() throws IOException, CollectionException {
         boolean hasNext = this.hasNext;
-        if (retriever != null)
+        if (retriever != null) {
+            if (retriever.isConsumed())
+                retriever.run();
             hasNext = !retriever.getDocumentIds().isEmpty();
+        }
         if (!hasNext)
             close();
         log.trace("hasNext returns {}", hasNext);
@@ -174,10 +176,11 @@ private List<Object[]> getNextFromSubset() {
 
         // When this method is called for the first time, no retriever thread
         // will yet exist. Initialize it.
-        if (retriever == null || !fetchIdsProactively) {
+        if (retriever == null) {
             retriever = new DBMultiplierReader.RetrievingThread();
         }
         idList = retriever.getDocumentIds();
+        retriever.setConsumed(true);
         // While returning the current set of IDs, already fetch the next batch
         if (fetchIdsProactively)
             retriever = new DBMultiplierReader.RetrievingThread();
@@ -217,6 +220,7 @@ public void close() throws IOException {
     protected class RetrievingThread extends Thread {
         private List<Object[]> ids;
         private long timestamp = System.currentTimeMillis();
+        private boolean consumed;
 
         public RetrievingThread() {
             // Only fetch ID batches in advance when the parameter is set to
@@ -226,12 +230,21 @@ public RetrievingThread() {
                 setName(DBMultiplierReader.class.getSimpleName() + " RetrievingThread (" + getName() + ")");
                 start();
             } else {
-                log.debug("[{}] Fetching new documents (without employing a background thread).", timestamp);
+                log.debug("[{}] Fetching ID batches without a background thread.", timestamp);
                 run();
             }
         }
 
+        public boolean isConsumed() {
+            return consumed;
+        }
+
+        public void setConsumed(boolean consumed) {
+            this.consumed = consumed;
+        }
+
         public void run() {
+            consumed = false;
             // Remember: If the Limit parameter is set, totalDocumentCount is
             // that limit (or the remaining number of documents, if that's
             // lower).
@@ -261,20 +274,37 @@ public void run() {
         }
 
         public List<Object[]> getDocumentIds() {
+            // If we don't use this as a background thread, we have to get the
+            // IDs now in a classic sequential manner.
+            if (!fetchIdsProactively) {
+                // Use run as we don't have a use for real threads anyway.
+                log.debug("Fetching new documents (without employing a background thread).");
+            }
             try {
-                if (fetchIdsProactively) {// If this is a background thread started with start(): Wait for
-                    // the IDs to be retrieved, i.e. that run() ends.
-                    log.debug("[{}] Waiting for the background thread to finish fetching documents to return them.", timestamp);
-                    join();
-                }
+                // If this is a background thread started with start(): Wait for
+                // the IDs to be retrieved, i.e. that run() ends.
+                log.debug("Waiting for the background thread to finish fetching documents to return them.");
+                join();
                 log.debug("[{}] Delivering {} document IDs", timestamp, ids.size());
-                List<Object[]> ret = ids;
-                ids = Collections.emptyList();
-                return ret;
+                return ids;
             } catch (InterruptedException e) {
-                log.error("Background ID fetching thread was interrupted", e);
+                e.printStackTrace();
             }
             return null;
+//            try {
+//                if (fetchIdsProactively) {// If this is a background thread started with start(): Wait for
+//                    // the IDs to be retrieved, i.e. that run() ends.
+//                    log.debug("[{}] Waiting for the background thread to finish fetching documents to return them.", timestamp);
+//                    join();
+//                }
+//                log.debug("[{}] Delivering {} document IDs", timestamp, ids.size());
+//                List<Object[]> ret = ids;
+//                ids = Collections.emptyList();
+//                return ret;
+//            } catch (InterruptedException e) {
+//                log.error("Background ID fetching thread was interrupted", e);
+//            }
+//            return null;
         }
     }
 
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java
index e580fa2fa..798d24782 100644
--- a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/DBReader.java
@@ -179,7 +179,7 @@ public boolean hasNext() throws IOException, CollectionException {
     public byte[][] getNextArtifactData() throws CollectionException {
         log.trace("Fetching next document from the current database batch");
 
-        byte[][] next = null;
+        byte[][] next;
         if (readDataTable)
             next = getNextFromDataTable();
         else
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
index c10ff9670..e602844a9 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBMultiplierReaderTest.java
@@ -1,7 +1,6 @@
 package de.julielab.jcore.reader.db;
 
 import de.julielab.costosys.Constants;
-import de.julielab.costosys.dbconnection.CoStoSysConnection;
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.db.test.DBTestUtils;
 import de.julielab.jcore.types.casmultiplier.RowBatch;
@@ -27,15 +26,13 @@
 @Testcontainers
 public class DBMultiplierReaderTest {
     @Container
-    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:"+DataBaseConnector.POSTGRES_VERSION);
+    public static PostgreSQLContainer postgres = new PostgreSQLContainer("postgres:" + DataBaseConnector.POSTGRES_VERSION);
 
     @BeforeAll
     public static void setup() throws SQLException {
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
-        try (final CoStoSysConnection ignore = dbc.obtainOrReserveConnection()) {
-            DBTestUtils.setupDatabase(dbc, "src/test/resources/pubmedsample18n0001.xml.gz", "medline_2017", 20, postgres);
-        }
-        dbc.close();
+        dbc.obtainOrReserveConnection();
+        DBTestUtils.setupDatabase(dbc, "src/test/resources/pubmedsample18n0001.xml.gz", "medline_2017", 20, postgres);
     }
 
     @Test
diff --git a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
index 46b8ac436..c681a369f 100644
--- a/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
+++ b/jcore-db-reader/src/test/java/de/julielab/jcore/reader/db/DBReaderTest.java
@@ -20,7 +20,6 @@
 import org.testcontainers.junit.jupiter.Container;
 import org.testcontainers.junit.jupiter.Testcontainers;
 
-import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.sql.SQLException;
@@ -38,7 +37,6 @@ public static void setup() throws SQLException {
         DataBaseConnector dbc = DBTestUtils.getDataBaseConnector(postgres);
         dbc.reserveConnection();
         DBTestUtils.setupDatabase("src/test/resources/pubmedsample18n0001.xml.gz", "medline_2017", 20, postgres);
-        dbc.close();
     }
 
     @Test
@@ -73,7 +71,9 @@ public void testReadDataTable() throws ConfigurationException, UIMAException, IO
         int docCount = 0;
         JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
                 "de.julielab.jcore.types.jcore-document-structure-types");
+        int i = 0;
         while (reader.hasNext()) {
+            System.out.println(++i);
             reader.getNext(jCas.getCas());
             assertNotNull(JCoReTools.getDocId(jCas));
             ++docCount;
@@ -95,7 +95,7 @@ public void getNext(JCas jCas) throws IOException, CollectionException {
             byte[][] artifactData = getNextArtifactData();
 
             log.trace("Getting next document from database");
-            XMLMapper xmlMapper = new XMLMapper(new FileInputStream(new File("src/test/resources/medline2016MappingFile.xml")));
+            XMLMapper xmlMapper = new XMLMapper(new FileInputStream("src/test/resources/medline2016MappingFile.xml"));
             xmlMapper.parse(artifactData[1], artifactData[0], jCas);
         }
     }
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
index e5d3bf36d..1b9c9c080 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/CasPopulator.java
@@ -185,7 +185,7 @@ public void populateCas(byte[][] data, JCas jCas) throws CasPopulationException
                 }
             }
             log.trace("Setting max XMI ID to the CAS.");
-            storeMaxXmiIdAndSofaMappings(jCas, data);
+            storeMaxXmiIdAndSofaMappings(jCas, data, storeMaxXmiId);
             log.trace("Setting meta data to: Reads data table: {}, table name: {}", readsDataTable, tableName);
             DBReader.setDBProcessingMetaData(dbc, readsDataTable, tableName, data, jCas);
         } catch (Exception e) {
@@ -238,7 +238,7 @@ private String getPkStringFromData(byte[][] data) {
         return sb.toString();
     }
 
-    private void storeMaxXmiIdAndSofaMappings(JCas aCAS, byte[][] data) {
+    public static void storeMaxXmiIdAndSofaMappings(JCas aCAS, byte[][] data, Boolean storeMaxXmiId) {
         if (storeMaxXmiId && data.length > 2) {
             String docId = JCoReTools.getDocId(aCAS);
             byte[] maxXmiIdBytes = data[2];
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
index 96a18ec6a..50e7527a2 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
@@ -4,7 +4,9 @@
 import de.julielab.costosys.dbconnection.CoStoSysConnection;
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.reader.db.DBMultiplier;
+import de.julielab.jcore.reader.db.DBReader;
 import de.julielab.jcore.types.casmultiplier.RowBatch;
+import de.julielab.jcore.types.pubmed.Header;
 import de.julielab.jcore.utility.JCoReTools;
 import de.julielab.xml.JulieXMLConstants;
 import de.julielab.xml.XmiSplitConstants;
@@ -22,6 +24,7 @@
 import java.io.ByteArrayInputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.*;
@@ -30,9 +33,12 @@
 
 public class XmiDBMultiplier extends DBMultiplier implements Initializable {
     public static final String PARAM_LOG_FINAL_XMI = Initializer.PARAM_LOG_FINAL_XMI;
+    public static final String PARAM_TRUNCATE_AT_SIZE = "TruncateAtSize";
     private final static Logger log = LoggerFactory.getLogger(XmiDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_LOG_FINAL_XMI, mandatory = false, defaultValue = "false", description = "For debugging purposes. If set to true, before parsing the final XMI data assembled from the annotation modules, it is printed to console.")
     private boolean logFinalXmi;
+    @ConfigurationParameter(name = PARAM_TRUNCATE_AT_SIZE, mandatory = false, description = "Specify size in bytes of the XMI sofa string, i.e. the document text. If the text surpasses that size, the document is not populated from XMI but given some placeholder information. This can be necessary when large documents cannot be handled by subsequent components in the pipeline.")
+    private int truncationSize;
     private Initializer initializer;
     private CasPopulator casPopulator;
     private String[] xmiModuleAnnotationNames;
@@ -43,6 +49,7 @@ public class XmiDBMultiplier extends DBMultiplier implements Initializable {
     public void initialize(UimaContext aContext) throws ResourceInitializationException {
         super.initialize(aContext);
         logFinalXmi = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_LOG_FINAL_XMI)).orElse(false);
+        truncationSize = Optional.ofNullable((Integer)aContext.getConfigParameterValue(PARAM_TRUNCATE_AT_SIZE)).orElse(0);
     }
 
     @Override
@@ -108,19 +115,40 @@ private void populateCas(JCas jCas) throws AnalysisEngineProcessException {
             throw new AnalysisEngineProcessException(new IllegalStateException("Initialization of the component was not finished. See previous errors to learn the reason. Cannot continue."));
         try {
             final byte[][] data = documentDataIterator.next();
+            final int pkSize = (int) dbc.getActiveTableFieldConfiguration().getPrimaryKeyFields().count();
             if (log.isTraceEnabled()) {
                 List<String> l = new ArrayList<>();
-                for (int i = 1; i < data.length; i++) {
-                    if (data[i] ==  null)
+                for (int i = pkSize; i < data.length; i++) {
+                    if (data[i] == null)
                         continue;
                     int length = data[i].length;
-                    double lengthInMb = (length/1024d)/1024d;
-                    l.add("col"+i+":"+lengthInMb + "MB");
+                    double lengthInMb = (length / 1024d) / 1024d;
+                    l.add("col" + i + ":" + lengthInMb + "MB");
                 }
                 log.trace("Populating CAS for document ID {} with column data of sizes {}", new String(data[0]), String.join(",", l));
             }
-            if (data != null)
+            boolean truncate = false;
+            if (truncationSize > 0) {
+                if(data[pkSize].length > truncationSize)
+                    truncate = true;
+            }
+            if (data != null && !truncate)
                 casPopulator.populateCas(data, jCas);
+            else if (truncate) {
+                // This document is too long. Set the document ID and some placeholder document text.
+                jCas.setDocumentText("This document was truncated due to exceedingly long text contents.");
+                List<String> pkElements = new ArrayList<>();
+                for (int i = 0; i < pkSize; i++) {
+                    pkElements.add(new String(data[i], StandardCharsets.UTF_8));
+                }
+                final Header header = new Header(jCas);
+                header.setDocId(pkElements.stream().collect(Collectors.joining(",")));
+                header.addToIndexes();
+
+                CasPopulator.storeMaxXmiIdAndSofaMappings(jCas, data, initializer.getStoreMaxXmiId());
+                DBReader.setDBProcessingMetaData(dbc, readDataTable, tableName, data, jCas);
+                log.debug("Truncating document with ID {} due to its text size of {} bytes which is greater than the given threshold of {} bytes.", pkElements, data[pkSize].length, truncationSize);
+            }
         } catch (CasPopulationException e) {
             throw new AnalysisEngineProcessException(e);
         }
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index 6fe2b6a03..007e3ee33 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,7 +6,6 @@
     <analysisEngineMetaData>
         <name>JCoRe XMI Database Multiplier</name>
         <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
-        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <copyright>JULIE Lab Jena, Germany</copyright>
         <configurationParameters>
@@ -17,6 +16,13 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>TruncateAtSize</name>
+                <description>Specify size in bytes of the XMI sofa string, i.e. the document text. If the text surpasses that size, the document is not populated from XMI but given some placeholder information. This can be necessary when large documents cannot be handled by subsequent components in the pipeline.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings>
             <nameValuePair>
@@ -33,8 +39,8 @@
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index b5cbf4f94..51791107a 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -22,7 +22,7 @@
             <dependency>
                 <groupId>de.julielab</groupId>
                 <artifactId>jcore-db-test-utilities</artifactId>
-                <version>2.5.1</version>
+                <version>2.6.0-SNAPSHOT</version>
             </dependency>
             <dependency>
                 <groupId>de.julielab</groupId>

From e9fd8c18f804d24e2ed797cc10e3eb42d1a28f6a Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 1 Aug 2022 11:22:48 +0200
Subject: [PATCH 223/269] Remove an index scan that was potentially
 inefficient.

The regarding code section came up in VisualVM & JConsole. Might have been a coincidence. It shouldn't be worse now anyway.
---
 .../ae/opennlp/chunk/ChunkAnnotator.java      | 35 +++++++++++--------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/jcore-opennlp-chunk-ae/src/main/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotator.java b/jcore-opennlp-chunk-ae/src/main/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotator.java
index ff1ba4fdc..42a163349 100644
--- a/jcore-opennlp-chunk-ae/src/main/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotator.java
+++ b/jcore-opennlp-chunk-ae/src/main/java/de/julielab/jcore/ae/opennlp/chunk/ChunkAnnotator.java
@@ -38,6 +38,7 @@
 import java.io.*;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -210,23 +211,26 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
 			FSIterator tokenIterator = tokenIndex.subiterator(sentence);
 
 			//get number of Tokens contained in Sentence and move iterator back to beginning
-			int numTokens = 0;
-			while (tokenIterator.isValid()){	
-				numTokens++;
-				tokenIterator.moveToNext();
-			}
-			tokenIterator.moveToFirst();			
-			Token[] tokenArray = new Token[numTokens];
-			String[] tokenTextArray = new String[numTokens];
-			String[] tagArray = new String[numTokens];
+//			int numTokens = 0;
+//			while (tokenIterator.isValid()){
+//				numTokens++;
+//				tokenIterator.moveToNext();
+//			}
+//			tokenIterator.moveToFirst();
+//			Token[] tokenArray = new Token[numTokens];
+//			String[] tokenTextArray = new String[numTokens];
+//			String[] tagArray = new String[numTokens];
+			java.util.List<Token> tokensInSentence = new ArrayList<>();
+			java.util.List<String> tokenTags = new ArrayList<>();
 
 			int i = 0;
 
 			// iterate over Tokens in current sentence
 			while (tokenIterator.hasNext()) {
 				Token token = (Token) tokenIterator.next();
-				tokenArray[i] = token;
-				tokenTextArray[i] = token.getCoveredText();
+				tokensInSentence.add(token);
+//				tokenArray[i] = token;
+//				tokenTextArray[i] = token.getCoveredText();
 				POSTag postag = null;
 				// if a POS TagSet preference exists try to get a correspondent POSTag for the current token
 				if (posTagSetPreference != null) {
@@ -241,14 +245,15 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
 					LOGGER.error("Token has no POS tag annotation: " + token.getCoveredText());
 					throw new AnalysisEngineProcessException();
 				}
-				tagArray[i] = postag.getValue();
+//				tagArray[i] = postag.getValue();
+				tokenTags.add(postag.getValue());
 				i++;
 			}
 
 			// OpenNLP Chunker predicts chunks
-			String[] chunks = chunker.chunk(tokenTextArray, tagArray);
-
-			createChunkAnnotations(chunks, tokenArray, aJCas);
+//			String[] chunks = chunker.chunk(tokenTextArray, tagArray);
+			String[] chunks = chunker.chunk(tokensInSentence.stream().map(Token::getCoveredText).toArray(String[]::new), tokenTags.toArray(String[]::new));
+			createChunkAnnotations(chunks, tokensInSentence.toArray(Token[]::new), aJCas);
 
 		}
 	}

From ec1203170cfa0c4c12423464c95ddf5805b90ed9 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 2 Aug 2022 17:37:36 +0200
Subject: [PATCH 224/269] Remove types from the IOB Consumer descriptor.

---
 jcore-iob-consumer/pom.xml                          | 10 +++++-----
 .../jcore/consumer/cas2iob/main/ToIOBConsumer.java  |  6 +++---
 .../consumer/cas2iob/desc/jcore-iob-consumer.xml    | 13 ++++++-------
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/jcore-iob-consumer/pom.xml b/jcore-iob-consumer/pom.xml
index b1a21c3b7..e4751ee20 100644
--- a/jcore-iob-consumer/pom.xml
+++ b/jcore-iob-consumer/pom.xml
@@ -37,11 +37,11 @@
             <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter-engine</artifactId>
         </dependency>
-        <dependency>
-            <groupId>commons-io</groupId>
-            <artifactId>commons-io</artifactId>
-            <scope>test</scope>
-        </dependency>
+<!--        <dependency>-->
+<!--            <groupId>commons-io</groupId>-->
+<!--            <artifactId>commons-io</artifactId>-->
+<!--            <scope>test</scope>-->
+<!--        </dependency>-->
     </dependencies>
     <name>JCoRe CAS to IOB Consumer</name>
     <organization>
diff --git a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
index c2e21d98d..030042a8a 100644
--- a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
+++ b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
@@ -75,7 +75,7 @@ public class ToIOBConsumer extends JCasAnnotator_ImplBase {
     private final String PARAGRAPH_END_MARK = "PARAGRAPH_END_MARKER"; // there will be 2 empty lines for each sentence marker
     @ConfigurationParameter(name = PARAM_OUTFOLDER, description = "Path to folder where IOB-files should be written to.")
     String outFolder = null;
-    @ConfigurationParameter(name = PARAM_TYPE_PATH, mandatory = false, description = "The path of the UIMA types, e.g. \"de.julielab.jcore.\" (with terminating \".\"!). It is prepended to the class names in labelNameMethods. This parameter may be null which is equivalent to the empty String \"\".")
+    @ConfigurationParameter(name = PARAM_TYPE_PATH, mandatory = false, description = "The path of the UIMA types, e.g. \"de.julielab.jcore.types.\" (with terminating \".\"!). It is prepended to the class names in labelNameMethods. This parameter may be null which is equivalent to the empty String \"\".")
     String typePath = null;
     @ConfigurationParameter(name = PARAM_LABELS, mandatory = false, description = "The labels NOT to be exported into IOB format. Label does here not refer to an UIMA type but to the specific label aquired by the labelNameMethod.")
     String[] labels = null;
@@ -84,9 +84,9 @@ public class ToIOBConsumer extends JCasAnnotator_ImplBase {
     int id = 1;
     @ConfigurationParameter(name = PARAM_MODE, mandatory = false, description = "This parameter determines whether the IOB or IO annotation schema should be used. The parameter defaults to IOB, the value is not case sensitive.", defaultValue = "IOB")
     private String mode = null;
-    @ConfigurationParameter(name = PARAM_LABEL_METHODS, description = "This is the primary parameter to define from which types IOB labels should be derived. The parameter expects pairs of UIMA-annotation-type-names and their corresponding method for extracting the annotation label. Format: &lt;annotationName&gt;[\\s=/\\\\|]&lt;method Name&gt;. The annotation name is fully qualified name of the UIMA type. For abbreviation purposes, the \"" + PARAM_TYPE_PATH + "\" parameter can be used to define a type prefix that will then be prepended to all UIMA type names given in this parameter. So, for example, the prefix \"de.julielab.jcore.types.\" will allow to use the \"specificType\" feature of the \"de.julielab.jcore.types.Gene\" type by providing \"Gene=getSpecificType\".  If the name of the annotation class itself is to be being used as label, only the class name is expected: &lt;annotationName&gt; (here, again, applies the use of the \"" + PARAM_TYPE_PATH + "\" parameter). You also may specify a mix of pairs and single class names. If you give the name extracting method for a class and have also specified its superclass as a single class name, the given method is used rather than the superclass name.")
+    @ConfigurationParameter(name = PARAM_LABEL_METHODS, description = "This is the primary parameter to define from which types IOB labels should be derived. The parameter expects pairs of UIMA-annotation-type-names and their corresponding method for extracting the annotation label. Format: <annotationName>[\\s=/\\\\|]<method Name>. The annotation name is fully qualified name of the UIMA type. For abbreviation purposes, the \"" + PARAM_TYPE_PATH + "\" parameter can be used to define a type prefix that will then be prepended to all UIMA type names given in this parameter. So, for example, the prefix \"de.julielab.jcore.types.\" will allow to use the \"specificType\" feature of the \"de.julielab.jcore.types.Gene\" type by providing \"Gene=getSpecificType\".  If the name of the annotation class itself is to be being used as label, only the class name is expected: <annotationName> (here, again, applies the use of the \"" + PARAM_TYPE_PATH + "\" parameter). You also may specify a mix of pairs and single class names. If you give the name extracting method for a class and have also specified its superclass as a single class name, the given method is used rather than the superclass name.")
     private String[] labelNameMethods;
-    @ConfigurationParameter(name = PARAM_IOB_LABEL_NAMES, mandatory = false, description = "Pairs of label names in UIMA (aquired by the methods given in labelNameMethods) and the name the label is supposed to get in the outcoming IOB file. Format: &lt;UIMA label name&gt;[\\s=/\\\\|]&lt;IOB label name&gt;")
+    @ConfigurationParameter(name = PARAM_IOB_LABEL_NAMES, mandatory = false, description = "Pairs of label names in UIMA (aquired by the methods given in labelNameMethods) and the name the label is supposed to get in the outcoming IOB file. Format: <UIMA label name>[\\s=/\\\\|]&lt;IOB label name&gt;")
     private String[] iobLabelNames;
     @ConfigurationParameter(name = PARAM_ADD_POS, mandatory = false, description = "If set to true and if annotations of (sub-)type de.julielab.jcore.types.POSTag are present in the CAS, the PoS tags will be added to the output file as the second column. Defaults to false.")
     private Boolean addPos;
diff --git a/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml b/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
index 72b818213..aa07c6d66 100644
--- a/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
+++ b/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
@@ -1,4 +1,4 @@
-<?xml version='1.0' encoding='UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,7 +6,6 @@
     <analysisEngineMetaData>
         <name>JCoRe IOB Writer</name>
         <description>This component help to write CAS entity or chunk annotations into a text file in IOB format.</description>
-        <version>2.6.0-SNAPSHOT</version>
         <configurationParameters>
             <configurationParameter>
                 <name>outFolder</name>
@@ -17,7 +16,7 @@
             </configurationParameter>
             <configurationParameter>
                 <name>typePath</name>
-                <description>The path of the UIMA types, e.g. "de.julielab.jcore." (with terminating "."!). It is prepended to the class names in labelNameMethods. This parameter may be null which is equivalent to the empty String "".</description>
+                <description>The path of the UIMA types, e.g. "de.julielab.jcore.types." (with terminating "."!). It is prepended to the class names in labelNameMethods. This parameter may be null which is equivalent to the empty String "".</description>
                 <type>String</type>
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
@@ -38,14 +37,14 @@
             </configurationParameter>
             <configurationParameter>
                 <name>labelNameMethods</name>
-                <description>This is the primary parameter to define from which types IOB labels should be derived. The parameter expects pairs of UIMA-annotation-type-names and their corresponding method for extracting the annotation label. Format: &amp;lt;annotationNAme&amp;gt;[\s=/\\|]&amp;lt;method Name&amp;gt;. The annotation name is fully qualified name of the UIMA type. For abbreviation purposes, the "typePath" parameter can be used to define a type prefix that will then be prepended to all UIMA type names given in this parameter. So, for example, the prefix "de.julielab.jcore.types." will allow to use the "specificType" feature of the "de.julielab.jcore.types.Gene" type by providing "Gene=getSpecificType".  If the name of the annotation class itself is to be being used as label, only the class name is expected: &amp;lt;annotationName&amp;gt; (here, again, applies the use of the "typePath" parameter). You also may specify a mix of pairs and single class names. If you give the name extracting method for a class and have also specified its superclass as a single class name, the given method is used rather than the superclass name.</description>
+                <description>This is the primary parameter to define from which types IOB labels should be derived. The parameter expects pairs of UIMA-annotation-type-names and their corresponding method for extracting the annotation label. Format: &lt;annotationName&gt;[\s=/\\|]&lt;method Name&gt;. The annotation name is fully qualified name of the UIMA type. For abbreviation purposes, the "typePath" parameter can be used to define a type prefix that will then be prepended to all UIMA type names given in this parameter. So, for example, the prefix "de.julielab.jcore.types." will allow to use the "specificType" feature of the "de.julielab.jcore.types.Gene" type by providing "Gene=getSpecificType".  If the name of the annotation class itself is to be being used as label, only the class name is expected: &lt;annotationName&gt; (here, again, applies the use of the "typePath" parameter). You also may specify a mix of pairs and single class names. If you give the name extracting method for a class and have also specified its superclass as a single class name, the given method is used rather than the superclass name.</description>
                 <type>String</type>
                 <multiValued>true</multiValued>
                 <mandatory>true</mandatory>
             </configurationParameter>
             <configurationParameter>
                 <name>iobLabelNames</name>
-                <description>Pairs of label names in UIMA (aquired by the methods given in labelNameMethods) and the name the label is supposed to get in the outcoming IOB file. Format: &amp;lt;UIMA label name&amp;gt;[\s=/\\|]&amp;lt;IOB label name&amp;gt;</description>
+                <description>Pairs of label names in UIMA (aquired by the methods given in labelNameMethods) and the name the label is supposed to get in the outcoming IOB file. Format: &lt;UIMA label name&gt;[\s=/\\|]&amp;lt;IOB label name&amp;gt;</description>
                 <type>String</type>
                 <multiValued>true</multiValued>
                 <mandatory>false</mandatory>
@@ -93,8 +92,8 @@
                 <import name="de.julielab.jcore.types.jcore-document-structure-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
+        <fsIndexCollection/>
+        <capabilities/>
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>

From 67f85fc007cdf4339d8cd202492255b99c5eb8e0 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 2 Aug 2022 17:37:57 +0200
Subject: [PATCH 225/269] Add the component ID to gene annotations.

---
 jcore-nlmgene-reader/README.md                   | 16 +++++++++++++++-
 .../jcore/reader/nlmgene/NLMGeneReader.java      | 14 ++++++++++----
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/jcore-nlmgene-reader/README.md b/jcore-nlmgene-reader/README.md
index 2cacbc00a..4b24b0fe9 100644
--- a/jcore-nlmgene-reader/README.md
+++ b/jcore-nlmgene-reader/README.md
@@ -31,4 +31,18 @@ de.julielab.jcore.{reader, ae, consumer}.NAME.desc.ARTIFACT-NAME
 | de.julielab.jcore.types.ace.TYPE | `+` |  |
 
 
-[1] Some Literature?
+NLM-Gene annotation code meanings (taken from the file `NLM-Gene-Annotation-Guidelines.docx` on the FTP server linked in the paper):
+
+* 000: Mention is not explicitly linked to a species; use the gene ID of the mention at another text position where the species is specified.
+* 111: The given ID is actually the ID of an ortholog of it because the gene does not yet have an ID in NCBI Gene. The used ID should stem from the article, if such an ortholog is mentioned there.
+* 222: This is a family/group/class of genes. Annotate with all the gene IDs of that family/group/class that appear in the same article.
+* 333: This ia a family/group/class but none of its members were used in the abstract. Use some family member gene that belongs to the main organism discussed in the article. This code is also used for references to protein domains.
+* 444: This is a protein complex. Analogous to families, use the ID of the subunits mentioned in the article.
+* 555: This is a protein complex without mentions of subunits in the same article. Use the ID of some subunit that belongs to the main organism of the abstract.
+
+Gene annotations with multiple IDs:
+* for enumerations with ellipsis, IDs are separated by semicolons
+* for other text phrases that have multiple IDs, IDs are separated by commas
+* for some IDs, their homologene-ID is also given, separated by a pipe (this does not seem to be documented anywhere; for this reason, the homologene-ID is stripped by this reader)
+
+[1] Islamaj, R., Wei, C. H., Cissel, D., Miliaras, N., Printseva, O., Rodionov, O., … Lu, Z. (2021). NLM-Gene, a richly annotated gold standard dataset for gene entities that addresses ambiguity and multi-species gene recognition. Journal of Biomedical Informatics, 118(March), 103779. https://doi.org/10.1016/j.jbi.2021.103779
diff --git a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
index 7e10f9081..f7aaba55f 100644
--- a/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
+++ b/jcore-nlmgene-reader/src/main/java/de/julielab/jcore/reader/nlmgene/NLMGeneReader.java
@@ -118,6 +118,7 @@ private void handleHeader(JCas jCas, BioCDocument document) {
     private void handleAnnotation(JCas jCas, BioCDocument document, BioCPassage p, StringBuilder textBuilder) {
         for (BioCAnnotation a : p.getAnnotations()) {
             final Gene g = new Gene(jCas, a.getTotalLocation().getOffset(), a.getTotalLocation().getOffset() + a.getTotalLocation().getLength());
+            g.setComponentId(getClass().getSimpleName());
             final Optional<String> typeInfon = a.getInfon("type");
             final Optional<String> codeInfon = a.getInfon("code");
             handleErrors(document, p, a, g, typeInfon, textBuilder);
@@ -134,12 +135,12 @@ private void handleSpecificType(Gene g, Optional<String> typeInfon, Optional<Str
     }
 
     private void handleErrors(BioCDocument document, BioCPassage p, BioCAnnotation a, Gene g, Optional<String> typeInfon, StringBuilder textBuilder) {
-        if (typeInfon.isPresent() && !(typeInfon.get().equals("Gene") || typeInfon.get().equals("GENERIF")))
-            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " was neither of type Gene nor GENERIF.");
+//        if (typeInfon.isPresent() && !(typeInfon.get().equals("Gene") || typeInfon.get().equals("GENERIF")))
+//            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " was neither of type Gene nor GENERIF. but '" + typeInfon.get() + "'");
         if (!typeInfon.isPresent())
             throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " does not specify a type.");
-        if (!textBuilder.substring(g.getBegin(), g.getEnd()).equals(a.getText().get()))
-            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " has the covered text " + textBuilder.substring(g.getBegin(), g.getEnd()) + " but should have the text " + a.getText().get() + " according to the BioC XML information.");
+//        if (!textBuilder.substring(g.getBegin(), g.getEnd()).equals(a.getText().get()))
+//            throw new IllegalStateException("The annotation " + a.getID() + " of passage " + p.getInfon("type").get() + " of document " + document.getID() + " has the covered text " + textBuilder.substring(g.getBegin(), g.getEnd()) + " but should have the text " + a.getText().get() + " according to the BioC XML information.");
     }
 
     private void handleGeneId(JCas jCas, BioCAnnotation a, Gene g) {
@@ -147,6 +148,11 @@ private void handleGeneId(JCas jCas, BioCAnnotation a, Gene g) {
         if (ncbiGeneId.isPresent()) {
             final ResourceEntry re = new ResourceEntry(jCas, g.getBegin(), g.getEnd());
             re.setEntryId(ncbiGeneId.get());
+            // for a few cases, the ID looks like this: 8074|10771 (gene name FGF23)
+            // it seems that the first number is the NCBI Gene ID and the second is the homologene ID. We omit the
+            // homologene ID for know, we don't use it
+            if (ncbiGeneId.get().contains("|"))
+                re.setEntryId(ncbiGeneId.get().split("\\|")[0]);
             re.setComponentId(getClass().getSimpleName());
             final FSArray resourceEntryList = new FSArray(jCas, 1);
             resourceEntryList.set(0, re);

From 02b34feb7b9f7946d5d68d8f82b3f7ce198c5adc Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 16 Aug 2022 17:11:14 +0200
Subject: [PATCH 226/269] Add a parameter for the gene type specification to
 the BioC GNP writer.

---
 .../consumer/gnp/BioCDocumentPopulator.java   |  8 ++++---
 .../consumer/gnp/GNormPlusFormatWriter.java   | 21 ++++++++++++++-----
 .../gnp/desc/jcore-gnp-bioc-writer.xml        | 15 ++++++++++++-
 .../gnp/BioCDocumentPopulatorTest.java        |  8 +++----
 4 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 23ca9529a..b26712495 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -18,9 +18,11 @@
 public class BioCDocumentPopulator {
     private final static Logger log = LoggerFactory.getLogger(BioCDocumentPopulator.class);
     private boolean addGenes;
+    private Class<? extends ConceptMention> geneTypeClass;
 
-    public BioCDocumentPopulator(boolean addGenes) {
+    public BioCDocumentPopulator(boolean addGenes, String geneTypeName) throws ClassNotFoundException {
         this.addGenes = addGenes;
+        geneTypeClass = (Class<? extends ConceptMention>) Class.forName(geneTypeName);
     }
 
     public BioCDocument populate(JCas jCas) {
@@ -92,8 +94,8 @@ public BioCDocument populate(JCas jCas) {
 
     private int addGenesToPassage(JCas jCas, Zone z, BioCPassage p, int annotationId) {
         if (p != null) {
-            Iterable<Gene> geneIt = JCasUtil.subiterate(jCas, Gene.class, z, false, true);
-            for (Gene g : geneIt) {
+            Iterable<? extends ConceptMention> geneIt = JCasUtil.subiterate(jCas, geneTypeClass, z, false, true);
+            for (ConceptMention g : geneIt) {
                 BioCAnnotation annotation = new BioCAnnotation(String.valueOf(annotationId++));
                 annotation.setText(g.getCoveredText());
                 String type = "Gene";
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
index 08f10fab4..a7cd8bce1 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -2,6 +2,7 @@
 
 import com.pengyifan.bioc.BioCCollection;
 import com.pengyifan.bioc.BioCDocument;
+import de.julielab.jcore.types.Gene;
 import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
@@ -10,6 +11,7 @@
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -25,6 +27,7 @@ public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
     public static final String PARAM_NUM_FILES_PER_DIR = "NumFilesPerDir";
     public static final String PARAM_BASE_DIR = "BaseDirectory";
     public static final String PARAM_ADD_GENES = "AddGenes";
+    public static final String PARAM_GENE_TYPE_NAME = "GeneTypeName";
     private final static Logger log = LoggerFactory.getLogger(GNormPlusFormatWriter.class);
     @ConfigurationParameter(name = PARAM_NUM_DOCS_PER_FILE, description = "The number of documents (i.e. CASes) that should be written into a single BioC XML file.")
     private int numDocsPerFile;
@@ -32,8 +35,10 @@ public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
     private int numDocsPerDir;
     @ConfigurationParameter(name = PARAM_BASE_DIR, description = "The base directory into which to create new directories that contain the actual BioC collection files.")
     private String baseDirectory;
-    @ConfigurationParameter(name=PARAM_ADD_GENES, mandatory = false, description = "false", defaultValue = "If set to true, all Gene annotations in the CAS will be added to the BioC documents.")
+    @ConfigurationParameter(name=PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = "If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the " + PARAM_GENE_TYPE_NAME + " parameter.")
     private boolean addGenes;
+    @ConfigurationParameter(name=PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = "The UIMA type denoting gene annotations that should be written into the BioC format when the " + PARAM_ADD_GENES + " parameter is set to true.")
+    private String geneTypeName;
 
     private BioCDocumentPopulator bioCDocumentPopulator;
     private BioCCollectionWriter bioCCollectionWriter;
@@ -44,16 +49,22 @@ public class GNormPlusFormatWriter extends JCasAnnotator_ImplBase {
      * creation. Here, descriptor parameters are read and initial setup is done.
      */
     @Override
-    public void initialize(final UimaContext aContext) {
+    public void initialize(final UimaContext aContext) throws ResourceInitializationException {
         numDocsPerFile = (int) aContext.getConfigParameterValue(PARAM_NUM_DOCS_PER_FILE);
         numDocsPerDir = (int) aContext.getConfigParameterValue(PARAM_NUM_FILES_PER_DIR);
         baseDirectory = (String) aContext.getConfigParameterValue(PARAM_BASE_DIR);
         addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
+        geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
 
-        bioCDocumentPopulator = new BioCDocumentPopulator(addGenes);
-        bioCCollectionWriter = new BioCCollectionWriter(numDocsPerDir, Path.of(baseDirectory));
+        try {
+            bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
+            bioCCollectionWriter = new BioCCollectionWriter(numDocsPerDir, Path.of(baseDirectory));
 
-        currentCollection = new BioCCollection("UTF-8", "1.0", new Date().toString(), true, "JCoRe GNormPlus BioC Writer", "PubTator.key");
+            currentCollection = new BioCCollection("UTF-8", "1.0", new Date().toString(), true, "JCoRe GNormPlus BioC Writer", "PubTator.key");
+        } catch (ClassNotFoundException e) {
+            log.error("Gene annotation class {} could not be found.", geneTypeName, e);
+            throw new ResourceInitializationException(e);
+        }
     }
 
     /**
diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
index 82e53378c..54b6a2046 100644
--- a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
+++ b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
@@ -31,11 +31,18 @@
             </configurationParameter>
             <configurationParameter>
                 <name>AddGenes</name>
-                <description>false</description>
+                <description>If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the GeneTypeName parameter.</description>
                 <type>Boolean</type>
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>GeneTypeName</name>
+                <description>The UIMA type denoting gene annotations that should be written into the BioC format when the AddGenes parameter is set to true.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings>
             <nameValuePair>
@@ -44,6 +51,12 @@
                     <boolean>false</boolean>
                 </value>
             </nameValuePair>
+            <nameValuePair>
+                <name>GeneTypeName</name>
+                <value>
+                    <string>de.julielab.jcore.types.Gene</string>
+                </value>
+            </nameValuePair>
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
index 25dc4e0ff..dc4af2060 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulatorTest.java
@@ -15,7 +15,7 @@
 class BioCDocumentPopulatorTest {
     @Test
     public void populate() throws Exception {
-        BioCDocumentPopulator populator = new BioCDocumentPopulator(false);
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(false, Gene.class.getCanonicalName());
         JCas jCas = TestDocumentGenerator.prepareCas(1);
         BioCDocument biocDoc = populator.populate(jCas);
         ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -42,7 +42,7 @@ public void populate() throws Exception {
 
     @Test
     public void populateWithGenes() throws Exception {
-        BioCDocumentPopulator populator = new BioCDocumentPopulator(true);
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(true, Gene.class.getCanonicalName());
         JCas jCas = TestDocumentGenerator.prepareCas(1);
         new Gene(jCas, 0, 4).addToIndexes();
         new Gene(jCas, 87, 96).addToIndexes();
@@ -66,7 +66,7 @@ public void populateWithGenes() throws Exception {
 
     @Test
     public void populateWithGeneFamilies() throws Exception {
-        BioCDocumentPopulator populator = new BioCDocumentPopulator(true);
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(true, Gene.class.getCanonicalName());
         JCas jCas = TestDocumentGenerator.prepareCas(1);
         Gene gene = new Gene(jCas, 0, 4);
         gene.setSpecificType("protein_familiy_or_group");
@@ -86,7 +86,7 @@ public void populateWithGeneFamilies() throws Exception {
 
     @Test
     public void populateWithGeneFamilies2() throws Exception {
-        BioCDocumentPopulator populator = new BioCDocumentPopulator(true);
+        BioCDocumentPopulator populator = new BioCDocumentPopulator(true, Gene.class.getCanonicalName());
         JCas jCas = TestDocumentGenerator.prepareCas(1);
         Gene gene = new Gene(jCas, 0, 4);
         gene.setSpecificType("FamilyName");

From 1a0c4e12263b657ac6389c59d6ec25a6c39ade65 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 16 Aug 2022 17:11:56 +0200
Subject: [PATCH 227/269] Remove double newlines from the IOBConsumer.

---
 .../consumer/cas2iob/main/ToIOBConsumer.java  |  7 +++---
 .../cas2iob/main/ToIOBConsumerTest.java       | 22 +++++++++++++++----
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
index 030042a8a..854990dc4 100644
--- a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
+++ b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
@@ -181,7 +181,6 @@ public void process(JCas jCas) {
                     bw.newLine();
                 } else if (token.getText().equals("") || token.getText().equals(PARAGRAPH_END_MARK)) {
                     bw.newLine();
-                    bw.newLine();
                 } else {
                     final Stream.Builder<String> sb = Stream.builder();
                     sb.accept(token.getText());
@@ -192,7 +191,8 @@ public void process(JCas jCas) {
                     bw.newLine();
                 }
             }
-
+            // newline at the very end; this makes it easy to concatenate multiple output IOB files into one larger file
+            bw.newLine();
             if (bw != null) {
                 bw.close();
             }
@@ -284,7 +284,7 @@ public IOToken[] convertToIOB(JCas jcas) {
                     // if we are at the first token, we need to add a sentence break mark which is
                     // later replaced by an empty line
                     if (i == 0 && overallSentCount > 0) {
-                        IOToken ioToken = null;
+                        IOToken ioToken;
                         //if (sentCount == 0) {
                         if (currentParagraph != lastPara) {
                             // add paragraph end before this sentence
@@ -385,6 +385,7 @@ private void tokenLabeling(TreeMap<Integer, IOToken> ioTokenMap, Iterator[] anno
 
     }
 
+
     /**
      * @param ioTokenMap
      * @param label
diff --git a/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java b/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java
index e1d926452..a7a8f111f 100644
--- a/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java
+++ b/jcore-iob-consumer/src/test/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumerTest.java
@@ -47,14 +47,20 @@ public void testWriteIOB() throws Exception {
         final JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-morpho-syntax-types",
                 "de.julielab.jcore.types.jcore-semantics-biology-types"
                 , "de.julielab.jcore.types.jcore-document-meta-types");
-        jCas.setDocumentText("BRCA influences cancer.");
+        jCas.setDocumentText("BRCA influences cancer. Our data suggest this.");
 
-        new Sentence(jCas, 0, jCas.getDocumentText().length()).addToIndexes();
+        new Sentence(jCas, 0, 23).addToIndexes();
+        new Sentence(jCas, 24, jCas.getDocumentText().length()).addToIndexes();
         new Gene(jCas, 0, 4).addToIndexes();
         new Token(jCas, 0, 4).addToIndexes();
         new Token(jCas, 5, 15).addToIndexes();
         new Token(jCas, 16, 22).addToIndexes();
         new Token(jCas, 22, 23).addToIndexes();
+        new Token(jCas, 24, 27).addToIndexes();
+        new Token(jCas, 28, 32).addToIndexes();
+        new Token(jCas, 33, 40).addToIndexes();
+        new Token(jCas, 41, 45).addToIndexes();
+        new Token(jCas, 45, 46).addToIndexes();
 
         final String outputDir = "src/test/resources/iob-output";
         final AnalysisEngine iobwriter = AnalysisEngineFactory.createEngine("de.julielab.jcore.consumer.cas2iob.desc.jcore-iob-consumer",
@@ -68,7 +74,14 @@ public void testWriteIOB() throws Exception {
         assertThat(IOUtils.readLines(new FileInputStream(file), "UTF-8")).containsExactly("BRCA	B_Gene",
                 "influences	O",
                 "cancer	O",
-                ".	O");
+                ".	O",
+                "",
+                "Our	O",
+                "data	O",
+                "suggest	O",
+                "this	O",
+                ".	O",
+                "");
     }
 
     @Test
@@ -115,7 +128,8 @@ public void testWriteIOBWithPos() throws Exception {
         assertThat(IOUtils.readLines(new FileInputStream(file), "UTF-8")).containsExactly("BRCA	NN	B-Gene",
                 "influences	VBZ	O",
                 "cancer	NN	O",
-                ".	.	O");
+                ".	.	O",
+                "");
 
     }
 }

From ca102c8eac8597d8696bcb2804fd12f0491de205 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 17 Aug 2022 11:05:04 +0200
Subject: [PATCH 228/269] Remove a helper paragraph annotation created by
 IOBConsumer.

---
 .../jcore/consumer/cas2iob/main/ToIOBConsumer.java       | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
index 854990dc4..de57edd6f 100644
--- a/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
+++ b/jcore-iob-consumer/src/main/java/de/julielab/jcore/consumer/cas2iob/main/ToIOBConsumer.java
@@ -117,7 +117,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         addPos = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_ADD_POS)).orElse(false);
 
         separator = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_COLUMN_SEPARATOR)).orElse("\t");
-        separator = separator.replaceAll("\\\\t", 	"\t");
+        separator = separator.replaceAll("\\\\t", "\t");
 
         iobMarkSeparator = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_IOB_MARK_SEPARATOR)).orElse("_");
 
@@ -236,8 +236,8 @@ public IOToken[] convertToIOB(JCas jcas) {
         while (paragraphIter.hasNext()) {
             paragraphs.add((Paragraph) paragraphIter.next());
         }
+        Paragraph dParagraph = null;
         if (paragraphs.isEmpty()) {
-            Paragraph dParagraph = null;
             try {
                 dParagraph = (Paragraph) JCoReAnnotationTools.getAnnotationByClassName(jcas, Paragraph.class.getName());
             } catch (ClassNotFoundException | SecurityException
@@ -249,6 +249,7 @@ public IOToken[] convertToIOB(JCas jcas) {
             }
             dParagraph.setBegin(0);
             dParagraph.setEnd(jcas.getDocumentText().length());
+            dParagraph.setComponentId(ToIOBConsumer.class.getCanonicalName());
             dParagraph.addToIndexes(jcas);
 
             paragraphs.add(dParagraph);
@@ -331,6 +332,10 @@ public IOToken[] convertToIOB(JCas jcas) {
             }
         }
 
+        // remove helper paragraph annotation
+        if (dParagraph != null)
+            dParagraph.removeFromIndexes();
+
         return ret;
     }
 

From 5795d06571cb5d566102636040836f7b33796c2f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 17 Aug 2022 11:08:06 +0200
Subject: [PATCH 229/269] Add the reader component ID to the Zone annotations
 created.

I.e. abstracttext, title, paragraph, section, ...
---
 .../src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 9788261de..2372ea74a 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -186,6 +186,7 @@ private void setDocumentText(JCas jCas, BioCDocument document) {
                             passageAnnotation = new Zone(jCas, offset, passageEnd);
                             break;
                     }
+                    passageAnnotation.setComponentId(GNormPlusFormatMultiplier.class.getCanonicalName());
                     passageAnnotation.addToIndexes();
                 }
             }

From c1eb1d3acabc023427adc090f51442ed4869a925 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 17 Aug 2022 11:09:04 +0200
Subject: [PATCH 230/269] Add the uima type underlying an output document
 structure as an infon.

This was useful for debugging and should not have side effects. It's just one more infon.
---
 .../jcore/consumer/gnp/BioCDocumentPopulator.java   |  1 +
 .../consumer/gnp/GNormPlusFormatWriterTest.java     | 13 +++++++++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index b26712495..7c8b3a413 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -125,6 +125,7 @@ private BioCPassage getPassageForAnnotation(Annotation a) {
         // GNormPlus doesn't seem to handle newlines well. It resulted in missing annotations when testing if the
         // output format is handled well by GNormPlus.
         p.setText(a.getCoveredText().replaceAll("\n", " "));
+        p.putInfon("uimatype", a.getClass().getCanonicalName());
         return p;
     }
 }
diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
index d2d9d0f40..388b36324 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
@@ -7,13 +7,14 @@
 import de.julielab.jcore.types.pubmed.Header;
 import org.apache.commons.io.FileUtils;
 import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
-import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
+import java.io.FileInputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
@@ -28,7 +29,7 @@ public class GNormPlusFormatWriterTest {
 
     private static final Path BASEDIR = Path.of("src", "test", "resources", "testoutput");
 
-    @AfterAll
+//    @AfterAll
     public static void cleanFinally() {
         FileUtils.deleteQuietly(BASEDIR.toFile());
     }
@@ -120,4 +121,12 @@ public void omitEmptyDocuments2() throws Exception {
         assertThat(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0_0.xml")).doesNotExist();
     }
 
+    @Test
+    public void muh() throws Exception {
+        final JCas jCas = TestDocumentGenerator.createTestJCas();
+        XmiCasDeserializer.deserialize(new FileInputStream("12486105.xmi"),  jCas.getCas());
+        final AnalysisEngine writer = getWriterInstance(1, 1);
+        writer.process(jCas);
+    }
+
 }

From 13efaeaf579a463f36bace21cd6f9cc6fbefbf50 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 17 Aug 2022 11:09:23 +0200
Subject: [PATCH 231/269] Add the test iob-output files to .gitignore.

---
 jcore-iob-consumer/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-iob-consumer/.gitignore b/jcore-iob-consumer/.gitignore
index 2960b6e4b..673a2bee7 100644
--- a/jcore-iob-consumer/.gitignore
+++ b/jcore-iob-consumer/.gitignore
@@ -1 +1 @@
-src/test/resources/iob-output/
+src/test/resources/iob-output/*

From bbf6c79f89dab049c154b17466cd65b65cf9f594 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 19 Aug 2022 14:17:49 +0200
Subject: [PATCH 232/269] Add a file reader mode to add title and abstract
 annotations.

The first line is the title, the rest is the abstract.
---
 jcore-file-reader/pom.xml                     |   1 +
 .../jcore/reader/file/main/FileReader.java    |  25 +-
 .../reader/file/desc/jcore-file-reader.xml    | 322 +++++++++---------
 3 files changed, 191 insertions(+), 157 deletions(-)

diff --git a/jcore-file-reader/pom.xml b/jcore-file-reader/pom.xml
index 179cc5647..58ddcfcc6 100644
--- a/jcore-file-reader/pom.xml
+++ b/jcore-file-reader/pom.xml
@@ -25,6 +25,7 @@
             <groupId>de.julielab</groupId>
             <artifactId>julielab-java-utilities</artifactId>
         </dependency>
+
         <dependency>
             <groupId>org.junit.jupiter</groupId>
             <artifactId>junit-jupiter-engine</artifactId>
diff --git a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
index 564ec30f0..f72f7fac1 100644
--- a/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
+++ b/jcore-file-reader/src/main/java/de/julielab/jcore/reader/file/main/FileReader.java
@@ -20,8 +20,7 @@
 import de.julielab.java.utilities.FileUtilities;
 import de.julielab.java.utilities.IOStreamUtilities;
 import de.julielab.jcore.types.Date;
-import de.julielab.jcore.types.Sentence;
-import de.julielab.jcore.types.Token;
+import de.julielab.jcore.types.*;
 import de.julielab.jcore.types.pubmed.Header;
 import org.apache.uima.analysis_engine.annotator.AnnotatorConfigurationException;
 import org.apache.uima.cas.CAS;
@@ -39,6 +38,7 @@
 import java.nio.file.FileVisitOption;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import java.util.List;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -85,6 +85,7 @@ public class FileReader extends CollectionReader_ImplBase {
      *
      */
     public static final String ORIG_FILES_EXT = "OriginalFileExt";
+    public static final String TITLE_ABSTRACT_SPLIT = "MakeTitleAbstractSplit";
     public static final String REMOVE_FILE_NAME_EXTENSION_FOR_DOC_ID = "RemoveFileNameExtensionForDocId";
     private final static Logger log = LoggerFactory.getLogger(FileReader.class);
     private ArrayList<File> files;
@@ -113,6 +114,8 @@ public class FileReader extends CollectionReader_ImplBase {
     private String origFileExt;
     @ConfigurationParameter(name = REMOVE_FILE_NAME_EXTENSION_FOR_DOC_ID, mandatory = false, defaultValue = "true")
     private boolean removeFileNameExtensionForDocId;
+    @ConfigurationParameter(name = TITLE_ABSTRACT_SPLIT, mandatory = false, defaultValue = "false", description = "Use the first input line as the title with a Title annotation and mark the rest with the AbstractText annotation. Defaults to false.")
+    private boolean titleAbstractSplit;
 
     /**
      * @see org.apache.uima.collection.CollectionReader_ImplBase#initialize()
@@ -190,6 +193,8 @@ public void initialize() throws ResourceInitializationException {
         if (!inputDirectory.exists())
             throw new ResourceInitializationException(AnnotatorConfigurationException.RESOURCE_NOT_FOUND, new Object[]{inputDirectory.getAbsolutePath()});
 
+        titleAbstractSplit = (boolean) Optional.ofNullable(getConfigParameterValue(TITLE_ABSTRACT_SPLIT)).orElse(false);
+
         fileIndex = 0;
         files = new ArrayList<File>();
 
@@ -332,6 +337,22 @@ public void getNext(CAS aCAS) throws CollectionException {
                 jcas.setDocumentText(text);
             }
 
+            if (titleAbstractSplit) {
+                String docText = jcas.getDocumentText();
+                final int firstNewlineIndex = docText.indexOf("\n");
+                if (firstNewlineIndex > 0) {
+                    final Title title = new Title(jcas, 0, firstNewlineIndex);
+                    title.setTitleType("document");
+                    title.setComponentId(getClass().getCanonicalName());
+                    title.addToIndexes();
+                }
+                if (firstNewlineIndex + 1 < docText.length()) {
+                    final AbstractText abstractText = new AbstractText(jcas, firstNewlineIndex + 1, docText.length());
+                    abstractText.setComponentId(getClass().getCanonicalName());
+                    abstractText.addToIndexes();
+                }
+            }
+
             if (useFilenameAsDocId) {
                 String filename = getFileName(file, removeFileNameExtensionForDocId);
                 log.trace("Setting the file name {} as docId to a new Header annotation.", filename);
diff --git a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
index bda1bb0e5..1297241d1 100644
--- a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
+++ b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
@@ -1,159 +1,171 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
-  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-  <implementationName>de.julielab.jcore.reader.file.main.FileReader</implementationName>
-  <processingResourceMetaData>
-    <name>JCoRe File Reader</name>
-    <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <implementationName>de.julielab.jcore.reader.file.main.FileReader</implementationName>
+    <processingResourceMetaData>
+        <name>JCoRe File Reader</name>
+        <description/>
+        <version>2.6.0-SNAPSHOT</version>
         <vendor>JULIELab Jena, Germany</vendor>
-    <configurationParameters>
-      <configurationParameter>
-        <name>InputDirectory</name>
-        <description>The directory where the text files reside.</description>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>true</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>UseFilenameAsDocId</name>
-        <description>If set to true, the filename is used as document ID (without extension).</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-    <configurationParameter>
-        <name>PublicatonDatesFile</name>
-        <description>The file that maps each article id (e.g. pubmed id) of the files in the input directory to the corresponding publication date (can be created using julielab/jules/ae/genemapper/utils/PubMedID2PublicationDate)</description>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-    <configurationParameter>
-        <name>AllowedFileExtensions</name>
-        <description>A list of file name extensions to restrict the read files in the InputDirectory. All files will be read if this parameter is left blank.</description>
-        <type>String</type>
-        <multiValued>true</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-    <configurationParameter>
-        <name>SentencePerLine</name>
-        <description>Whether the files are preprocessed and have only one sentence per line.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>FileNameSplitUnderscore</name>
-        <description>Whether the filenames are splitted on underscore as well as.</description>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>ReadSubDirs</name>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-    <configurationParameter>
-        <name>TokenByToken</name>
-        <type>Boolean</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-    <configurationParameter>
-        <name>OriginalFolder</name>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-        <name>OriginalFileExt</name>
-        <type>String</type>
-        <multiValued>false</multiValued>
-        <mandatory>false</mandatory>
-      </configurationParameter>
-      <configurationParameter>
-          <name>RemoveFileNameExtensionForDocId</name>
-          <type>Boolean</type>
-          <multiValued>false</multiValued>
-          <mandatory>false</mandatory>
-      </configurationParameter>
-    </configurationParameters>
-    <configurationParameterSettings>
-      <nameValuePair>
-        <name>UseFilenameAsDocId</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>InputDirectory</name>
-        <value>
-          <string>data/files</string>
-        </value>
-      </nameValuePair>
-    <nameValuePair>
-        <name>SentencePerLine</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>FileNameSplitUnderscore</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-        <name>ReadSubDirs</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-    <nameValuePair>
-        <name>TokenByToken</name>
-        <value>
-          <boolean>false</boolean>
-        </value>
-      </nameValuePair>
-    <nameValuePair>
-        <name>OriginalFileExt</name>
-        <value>
-          <string>txt</string>
-        </value>
-      </nameValuePair>
-      <nameValuePair>
-          <name>RemoveFileNameExtensionForDocId</name>
-          <value>
-              <boolean>true</boolean>
-          </value>
-      </nameValuePair>
-    </configurationParameterSettings>
-    <typeSystemDescription>
-      <imports>
-        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-        <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-      </imports>
-    </typeSystemDescription>
-    <typePriorities />
-    <fsIndexCollection />
-    <capabilities>
-      <capability>
-        <inputs />
-        <outputs>
-          <type allAnnotatorFeatures="true">de.julielab.jcore.types.pubmed.Header</type>
-          <type allAnnotatorFeatures="true">de.julielab.jcore.types.Date</type>
-        </outputs>
-        <languagesSupported />
-      </capability>
-    </capabilities>
-    <operationalProperties>
-      <modifiesCas>true</modifiesCas>
-      <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-      <outputsNewCASes>true</outputsNewCASes>
-    </operationalProperties>
-  </processingResourceMetaData>
-  <resourceManagerConfiguration />
+        <configurationParameters>
+            <configurationParameter>
+                <name>InputDirectory</name>
+                <description>The directory where the text files reside.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>UseFilenameAsDocId</name>
+                <description>If set to true, the filename is used as document ID (without extension).</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>PublicatonDatesFile</name>
+                <description>The file that maps each article id (e.g. pubmed id) of the files in the input directory to
+                    the corresponding publication date (can be created using
+                    julielab/jules/ae/genemapper/utils/PubMedID2PublicationDate)
+                </description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AllowedFileExtensions</name>
+                <description>A list of file name extensions to restrict the read files in the InputDirectory. All files
+                    will be read if this parameter is left blank.
+                </description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>SentencePerLine</name>
+                <description>Whether the files are preprocessed and have only one sentence per line.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>FileNameSplitUnderscore</name>
+                <description>Whether the filenames are splitted on underscore as well as.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>ReadSubDirs</name>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>TokenByToken</name>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OriginalFolder</name>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OriginalFileExt</name>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>RemoveFileNameExtensionForDocId</name>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>MakeTitleAbstractSplit</name>
+                <description>Use the first input line as the title with a Title annotation and mark the rest with the AbstractText annotation. Defaults to false.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>UseFilenameAsDocId</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>InputDirectory</name>
+                <value>
+                    <string>data/files</string>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>SentencePerLine</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>FileNameSplitUnderscore</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>ReadSubDirs</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>TokenByToken</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>OriginalFileExt</name>
+                <value>
+                    <string>txt</string>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>RemoveFileNameExtensionForDocId</name>
+                <value>
+                    <boolean>true</boolean>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+            </imports>
+        </typeSystemDescription>
+        <typePriorities/>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs>
+                    <type allAnnotatorFeatures="true">de.julielab.jcore.types.pubmed.Header</type>
+                    <type allAnnotatorFeatures="true">de.julielab.jcore.types.Date</type>
+                </outputs>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </processingResourceMetaData>
+    <resourceManagerConfiguration/>
 </collectionReaderDescription>
\ No newline at end of file

From 6bfc7905dd82dd38f36471f14a04a6303775ab20 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 24 Aug 2022 15:13:19 +0200
Subject: [PATCH 233/269] First version of the annotator.

---
 jcore-gnormplus-ae/LICENSE                    |  26 ++
 jcore-gnormplus-ae/README.md                  |  34 ++
 jcore-gnormplus-ae/component.meta             |  20 +
 jcore-gnormplus-ae/pom.xml                    |  70 ++++
 .../ae/gnormplus/GNormPlusAnnotator.java      | 141 +++++++
 .../ae/gnormplus/config/setup_do_ner.txt      |  31 ++
 .../ae/gnormplus/desc/jcore-gnormplus-ae.xml  |  82 ++++
 .../ae/gnormplus/GNormPlusAnnotatorTest.java  |  20 +
 .../jcore/reader/BioCCasPopulator.java        |  46 ++-
 .../consumer/gnp/GNormPlusFormatWriter.java   |   3 +-
 .../src/test/resources/iob-output/1.iob       |   4 -
 pom.xml                                       | 351 ++++++++++++------
 12 files changed, 700 insertions(+), 128 deletions(-)
 create mode 100644 jcore-gnormplus-ae/LICENSE
 create mode 100644 jcore-gnormplus-ae/README.md
 create mode 100644 jcore-gnormplus-ae/component.meta
 create mode 100644 jcore-gnormplus-ae/pom.xml
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
 create mode 100644 jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt
 create mode 100644 jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml
 create mode 100644 jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java
 delete mode 100644 jcore-iob-consumer/src/test/resources/iob-output/1.iob

diff --git a/jcore-gnormplus-ae/LICENSE b/jcore-gnormplus-ae/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-gnormplus-ae/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-gnormplus-ae/README.md b/jcore-gnormplus-ae/README.md
new file mode 100644
index 000000000..3d5fbc90a
--- /dev/null
+++ b/jcore-gnormplus-ae/README.md
@@ -0,0 +1,34 @@
+# JCoRe GNormPlus Annotator
+
+**Descriptor Path**:
+```
+de.julielab.jcore.ae.gnormplus.desc.jcore-gnormplus-ae
+```
+
+Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-gnormplus-ae/component.meta b/jcore-gnormplus-ae/component.meta
new file mode 100644
index 000000000..fddbdcc18
--- /dev/null
+++ b/jcore-gnormplus-ae/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "ae"
+    ],
+    "description": "Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.",
+    "descriptors": [
+        {
+            "category": "ae",
+            "location": "de.julielab.jcore.ae.gnormplus.desc.jcore-gnormplus-ae"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-gnormplus-ae",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe GNormPlus Annotator"
+}
diff --git a/jcore-gnormplus-ae/pom.xml b/jcore-gnormplus-ae/pom.xml
new file mode 100644
index 000000000..5ede30204
--- /dev/null
+++ b/jcore-gnormplus-ae/pom.xml
@@ -0,0 +1,70 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-gnormplus-ae</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <version>2.6.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-gnp-bioc-writer</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-gnp-bioc-reader</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>julielab-gnormplus</artifactId>
+            <version>1.0.0-SNAPSHOT</version>
+        </dependency>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe GNormPlus Annotator</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-gnormplus-ae</url>
+    <description>Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.</description>
+    <licenses>
+        <license>
+            <name>BSD 2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
+</project>
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
new file mode 100644
index 000000000..6c95ecc5c
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
@@ -0,0 +1,141 @@
+package de.julielab.jcore.ae.gnormplus;
+
+import GNormPluslib.GNormPlus;
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.BioCDocument;
+import com.pengyifan.bioc.io.BioCCollectionWriter;
+import de.julielab.java.utilities.FileUtilities;
+import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
+import de.julielab.jcore.reader.BioCCasPopulator;
+import de.julielab.jcore.types.Gene;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Date;
+import java.util.Optional;
+
+@ResourceMetaData(name = "JCoRe GNormPlus Annotator", description = "Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {})
+public class GNormPlusAnnotator extends JCasAnnotator_ImplBase {
+
+    public static final String PARAM_ADD_GENES = "AddGenes";
+    public static final String PARAM_GENE_TYPE_NAME = "GeneTypeName";
+    public static final String PARAM_GNP_SETUP_FILE = "GNormPlusSetupFile";
+    public static final String PARAM_FOCUS_SPECIES = "FocusSpecies";
+    public static final String PARAM_OUTPUT_DIR = "OutputDirectory";
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusAnnotator.class);
+    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = "If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the " + PARAM_GENE_TYPE_NAME + " parameter.")
+    private boolean addGenes;
+    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = "The UIMA type denoting gene annotations that should be written into the BioC format when the " + PARAM_ADD_GENES + " parameter is set to true.")
+    private String geneTypeName;
+    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = "File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.")
+    private String setupFile;
+    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = "If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.")
+    private String focusSpecies;
+    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = "Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.")
+    private String outputDirectory;
+
+    private BioCDocumentPopulator bioCDocumentPopulator;
+
+    /**
+     * This method is called a single time by the framework at component
+     * creation. Here, descriptor parameters are read and initial setup is done.
+     */
+    @Override
+    public void initialize(final UimaContext aContext) throws ResourceInitializationException {
+        addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
+        geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
+        setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnormplus/config/setup_default.txt");
+        focusSpecies = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FOCUS_SPECIES)).orElse("");
+        outputDirectory = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_OUTPUT_DIR)).orElse("");
+
+        synchronized (GNormPlus.class) {
+            try {
+                final InputStream setupFileStream = FileUtilities.findResource(setupFile);
+                GNormPlus.loadConfiguration(setupFileStream, focusSpecies);
+                GNormPlus.loadResources(focusSpecies, System.currentTimeMillis());
+            } catch (IOException e) {
+                log.error("Could not find resource {}", setupFile);
+                throw new ResourceInitializationException(e);
+            }
+        }
+        try {
+            bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
+        } catch (ClassNotFoundException e) {
+            log.error("Gene annotation class {} could not be found.", geneTypeName, e);
+            throw new ResourceInitializationException(e);
+        }
+
+        try {
+            if (!outputDirectory.isBlank())
+                Files.createDirectories(Path.of(outputDirectory));
+        } catch (IOException e) {
+            log.error("Could not create the output directory {}", outputDirectory);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    /**
+     * This method is called for each document going through the component. This
+     * is where the actual work happens.
+     */
+    @Override
+    public void process(final JCas aJCas) throws AnalysisEngineProcessException {
+        final BioCDocument bioCDocument = bioCDocumentPopulator.populate(aJCas);
+        final BioCCollection bioCCollection = new BioCCollection();
+        bioCCollection.setDate(new Date().toString());
+        bioCCollection.setEncoding("UTF-8");
+        bioCCollection.setKey("BioC.key");
+        bioCCollection.setSource("JULIE Lab GNormPlus");
+        bioCCollection.addDocument(bioCDocument);
+        final Path filePath = Path.of("tmp", bioCDocument.getID() + ".xml");
+        final Path outputFilePath = Path.of(outputDirectory.isBlank() ? "tmp" : outputDirectory, bioCDocument.getID() + "processed.xml");
+        try {
+            if (!Files.exists(filePath.getParent()))
+                Files.createDirectory(filePath.getParent());
+            try (BioCCollectionWriter w = new BioCCollectionWriter(filePath)) {
+                w.writeCollection(bioCCollection);
+            }
+            GNormPlus.processFile(filePath.toString(), filePath.getFileName().toString(), outputFilePath.toString(), System.currentTimeMillis(), "Test");
+        } catch (IOException | XMLStreamException e) {
+            log.error("Could not process document {}", bioCDocument.getID());
+            throw new AnalysisEngineProcessException(e);
+        }
+
+        try {
+            final BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(outputFilePath);
+            bioCCasPopulator.populateWithNextDocument(aJCas, true);
+        } catch (XMLStreamException|IOException e) {
+            log.error("Could not read GNormPlus output file {}");
+            throw new AnalysisEngineProcessException(e);
+        }
+        try {
+            Files.delete(filePath);
+        } catch (IOException e) {
+            log.error("Could not delete temporary file {}", filePath);
+            throw new AnalysisEngineProcessException(e);
+        }
+        try {
+            if (!outputDirectory.isBlank())
+                Files.delete(outputFilePath);
+        } catch (IOException e) {
+            log.error("Could not delete temporary file {}", outputFilePath);
+            throw new AnalysisEngineProcessException(e);
+        }
+
+    }
+
+}
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt
new file mode 100644
index 000000000..d6443b9d6
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt
@@ -0,0 +1,31 @@
+#===Annotation
+#Attribution setting:
+#FocusSpecies = Taxonomy ID
+#	All: All species
+#	9606: Human
+#	4932: yeast
+#	7227: Fly
+#	10090: Mouse
+#	10116: Rat
+#	7955: Zebrafish
+#	3702: Arabidopsis thaliana
+#open: True
+#close: False
+
+[Focus Species]
+	FocusSpecies = All
+	FilterAntibody = True
+[Dictionary & Model]
+	DictionaryFolder = Dictionary
+	GNRModel = Dictionary/GNR.Model
+	SCModel = Dictionary/SimConcept.Model
+	GeneIDMatch = True
+	HomologeneID = False
+[Modules]
+	GeneSpeciesRecognitionOnly = False
+	SpeciesAssignmentOnly = False
+	GeneNormalizationOnly = False
+[Others]
+	Normalization2Protein = False
+	ShowUnNormalizedMention = False
+	DeleteTmp = True
\ No newline at end of file
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml
new file mode 100644
index 000000000..a9dae0449
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml
@@ -0,0 +1,82 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>de.julielab.jcore.ae.gnormplus.GNormPlusAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>JCoRe GNormPlus Annotator</name>
+    <description>Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.</description>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>AddGenes</name>
+        <description>If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the GeneTypeName parameter.</description>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>GeneTypeName</name>
+        <description>The UIMA type denoting gene annotations that should be written into the BioC format when the AddGenes parameter is set to true.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>GNormPlusSetupFile</name>
+        <description>File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>FocusSpecies</name>
+        <description>If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+      <configurationParameter>
+        <name>OutputDirectory</name>
+        <description>Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.</description>
+        <type>String</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings>
+      <nameValuePair>
+        <name>AddGenes</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
+      <nameValuePair>
+        <name>GeneTypeName</name>
+        <value>
+          <string>de.julielab.jcore.types.Gene</string>
+        </value>
+      </nameValuePair>
+    </configurationParameterSettings>
+    <typeSystemDescription>
+      <imports>
+        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+        <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+      </imports>
+    </typeSystemDescription>
+    <fsIndexCollection/>
+    <capabilities>
+      <capability>
+        <inputs/>
+        <outputs/>
+        <languagesSupported/>
+      </capability>
+    </capabilities>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java b/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java
new file mode 100644
index 000000000..daf863fae
--- /dev/null
+++ b/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java
@@ -0,0 +1,20 @@
+
+package de.julielab.jcore.ae.gnormplus;
+
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Unit tests for jcore-gnormplus-ae.
+ *
+ */
+public class GNormPlusAnnotatorTest{
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusAnnotatorTest.class);
+
+    @Test
+    public void testAnnotator() {
+        // TODO
+    }
+}
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 2372ea74a..f35fa5559 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -32,10 +32,19 @@ public class BioCCasPopulator {
     private Map<String, String> sofaMaps;
     private int pos;
 
+    /**
+     * This constructor is used when the GNormPlusMultiplier/Reader is used to read files that directly correspond to
+     * JeDIS database documents and should be written back into the database. Then we need some information about
+     * the database and the state of the document.
+     * @param biocCollectionPath The BioC documents to read that have equivalents in the JeDIS database.
+     * @param costosysConfiguration The CoStoSys configuration to connect to the JeDIS database.
+     * @param documentsTable The name of the database table that stores the documents.
+     * @throws XMLStreamException
+     * @throws IOException
+     * @throws SQLException
+     */
     public BioCCasPopulator(Path biocCollectionPath, Path costosysConfiguration, String documentsTable) throws XMLStreamException, IOException, SQLException {
-        try (BioCCollectionReader bioCCollectionReader = new BioCCollectionReader(biocCollectionPath)) {
-            bioCCollection = bioCCollectionReader.readCollection();
-        }
+        this(biocCollectionPath);
         if (costosysConfiguration != null) {
             maxXmiIdMap = new HashMap<>();
             sofaMaps = new HashMap<>();
@@ -47,6 +56,18 @@ public BioCCasPopulator(Path biocCollectionPath, Path costosysConfiguration, Str
         pos = 0;
     }
 
+    /**
+     * This constructor is used when GNormPlus BioC files - or only the contained annotatoins - should be read into a CAS without the need to synchronize to a JeDIS database.
+     * @param biocCollectionPath The BioC documents to read that have equivalents in the JeDIS database.
+     * @throws XMLStreamException
+     * @throws IOException
+     */
+    public BioCCasPopulator(Path biocCollectionPath) throws XMLStreamException, IOException {
+        try (BioCCollectionReader bioCCollectionReader = new BioCCollectionReader(biocCollectionPath)) {
+            bioCCollection = bioCCollectionReader.readCollection();
+        }
+    }
+
     private void retrieveXmiMetaData(String documentsTable, DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
         log.debug("Retrieving the max XMI IDs for the current BioC collection of size {} from the database.", bioCCollection.getDocmentCount());
         Statement stmt = conn.createStatement();
@@ -76,12 +97,23 @@ private void retrieveXmiMetaData(String documentsTable, DataBaseConnector dbc, C
     }
 
     public void populateWithNextDocument(JCas jCas) {
+        populateWithNextDocument(jCas, false);
+    }
+
+    /**
+     * Populate the given CAS either with the complete contents of the next BioC document or only with its annotations.
+     * @param jCas The CAS to add data to. Can be empty when it should be populated with the BioC document text or it already may have a text when it only should be filled with the annotations of the BioC document.
+     * @param onlyAddAnnotations Whether to add only annotations from the next BioC document instead of its whole textual contents.
+     */
+    public void populateWithNextDocument(JCas jCas, boolean onlyAddAnnotations) {
         BioCDocument document = bioCCollection.getDocument(pos++);
-        setDocumentId(jCas, document);
-        setDocumentText(jCas, document);
-        setMaxXmiId(jCas, document);
+        if (!onlyAddAnnotations) {
+            setDocumentId(jCas, document);
+            setDocumentText(jCas, document);
+            setMaxXmiId(jCas, document);
+        }
         Iterator<BioCAnnotation> allAnnotations = Stream.concat(document.getAnnotations().stream(), document.getPassages().stream().map(BioCPassage::getAnnotations).flatMap(Collection::stream)).iterator();
-        for (BioCAnnotation annotation : (Iterable<BioCAnnotation>)() ->allAnnotations) {
+        for (BioCAnnotation annotation : (Iterable<BioCAnnotation>) () -> allAnnotations) {
             Optional<String> type = annotation.getInfon("type");
             if (!type.isPresent())
                 throw new IllegalArgumentException("BioCDocument " + document.getID() + " has an annotation that does not specify its type: " + annotation);
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
index a7cd8bce1..df2803243 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriter.java
@@ -92,8 +92,9 @@ public void process(final JCas jCas) throws AnalysisEngineProcessException {
     public void collectionProcessComplete() throws AnalysisEngineProcessException {
         super.collectionProcessComplete();
         try {
+            System.out.println("CollectionProcessComplete: Writing BioC collection of size " + currentCollection.getDocmentCount());
                 bioCCollectionWriter.writeBioCCollection(currentCollection);
-        } catch (Exception e) {
+        } catch (Throwable e) {
             log.error("Could not write final batch of BioCDocuments.", e);
             throw new AnalysisEngineProcessException(e);
         }
diff --git a/jcore-iob-consumer/src/test/resources/iob-output/1.iob b/jcore-iob-consumer/src/test/resources/iob-output/1.iob
deleted file mode 100644
index 5d171bb3b..000000000
--- a/jcore-iob-consumer/src/test/resources/iob-output/1.iob
+++ /dev/null
@@ -1,4 +0,0 @@
-BRCA	NN	B-Gene
-influences	VBZ	O
-cancer	NN	O
-.	.	O
diff --git a/pom.xml b/pom.xml
index c57f51dd7..f7f2eabfc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,468 +1,587 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-        
+          
+  
   
   
   <modelVersion>4.0.0</modelVersion>
-        
+          
+  
   
   
   <parent>
-                
+                    
+    
     
     
     <groupId>de.julielab</groupId>
-                
+                    
+    
     
     
     <artifactId>jcore-parent</artifactId>
-                
+                    
+    
     
     
     <version>2.5.2-SNAPSHOT</version>
-              
+                  
+  
   
   
   </parent>
-        
+          
+  
   
   
   <artifactId>jcore-base</artifactId>
-        
+          
+  
   
   
   <packaging>pom</packaging>
-        
+          
+  
   
   
   <name>JCoRe Base</name>
-        
+          
+  
   
   
   <description>The POM for the JCoRe Base projects.</description>
-        
+          
+  
   
   
   <version>2.6.0-SNAPSHOT</version>
-        
+          
+  
   
   
   <organization>
-                
+                    
+    
     
     
     <name>JULIE Lab, Germany</name>
-                
+                    
+    
     
     
     <url>http://www.julielab.de</url>
-              
+                  
+  
   
   
   </organization>
-        
+          
+  
   
   
   <licenses>
-                
+                    
+    
     
     
     <license>
-                        
+                              
+      
       
       
       <name>BSD-2-Clause</name>
-                        
+                              
+      
       
       
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-                      
+                            
+    
     
     
     </license>
-              
+                  
+  
   
   
   </licenses>
-        
+          
+  
   
   
   <url>https://github.com/JULIELab/jcore-base</url>
-        
+          
+  
   
   
   <dependencies>
-                
+                    
+    
     
     
     <dependency>
-                        
+                              
+      
       
       
       <groupId>org.apache.uima</groupId>
-                        
+                              
+      
       
       
       <artifactId>uimaj-core</artifactId>
-                        
+                              
+      
       
       
       <version>${uima-version}</version>
-                      
+                            
+    
     
     
     </dependency>
-                
+                    
+    
     
     
     <dependency>
-                        
+                              
+      
       
       
       <groupId>org.apache.uima</groupId>
-                        
+                              
+      
       
       
       <artifactId>uimafit-core</artifactId>
-                        
+                              
+      
       
       
       <version>${uimafit-version}</version>
-                      
+                            
+    
     
     
     </dependency>
-              
+                  
+  
   
   
   </dependencies>
-        
+          
+  
   
   
   <modules>
-                
+                    
+    
     
     
     <module>jcore-annotation-adder-ae</module>
-                
+                    
+    
     
     
     <module>jcore-ace-reader</module>
-                
+                    
+    
     
     
     <module>jcore-acronym-ae</module>
-                
+                    
+    
     
     
     <module>jcore-acronym-writer</module>
-                
+                    
+    
     
     
     <module>jcore-banner-ae</module>
-                
+                    
+    
     
     
     <module>jcore-bc2gm-reader</module>
-                
+                    
+    
     
     
     <module>jcore-bc2gmformat-writer</module>
-                
+                    
+    
     
     
     <module>jcore-biolemmatizer-ae</module>
-                
+                    
+    
     
     
     <module>jcore-bionlpformat-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-bionlpformat-reader</module>
-                
+                    
+    
     
     
     <module>jcore-biosem-ae</module>
-                
+                    
+    
     
     
     <module>jcore-conll-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-coordination-baseline-ae</module>
-                
+                    
+    
     
     
     <module>jcore-cord19-reader</module>
-                
+                    
+    
     
     
     <module>jcore-coreference-writer</module>
-                
+                    
+    
     
     
     <module>jcore-ct-reader</module>
-                
+                    
+    
     
     
     <module>jcore-db-checkpoint-ae</module>
-                
+                    
+    
     
     
     <module>jcore-descriptor-creator</module>
-                
+                    
+    
     
     
     <module>jcore-dta-reader</module>
-                
+                    
+    
     
     
     <module>jcore-ec-code-ae</module>
-                
+                    
+    
     
     
     <module>jcore-elasticsearch-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-embedding-writer</module>
-                
+                    
+    
     
     
     <module>jcore-event-flattener-ae</module>
-                
+                    
+    
     
     
     <module>jcore-feature-value-replacement-ae</module>
-                
+                    
+    
     
     
     <module>jcore-file-reader</module>
-                
+                    
+    
     
     
     <module>jcore-flair-ner-ae</module>
-                
+                    
+    
     
     
     <module>jcore-flair-token-embedding-ae</module>
-                
+                    
+    
     
     
     <module>jcore-flow-controllers</module>
-                
+                    
+    
     
     
     <module>jcore-gnp-bioc-reader</module>
-                
+                    
+    
     
     
     <module>jcore-gnp-bioc-writer</module>
-                
+                    
+    
     
     
     <module>jcore-iexml-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-iexml-reader</module>
-                
+                    
+    
     
     
     <module>jcore-ign-reader</module>
-                
+                    
+    
     
     
     <module>jcore-iob-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-jnet-ae</module>
-                
+                    
+    
     
     
     <module>jcore-jpos-ae</module>
-                
+                    
+    
     
     
     <module>jcore-jsbd-ae</module>
-                
+                    
+    
     
     
     <module>jcore-jtbd-ae</module>
-                
+                    
+    
     
     
     <module>jcore-julielab-entity-evaluator-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-likelihood-assignment-ae</module>
-                
+                    
+    
     
     
     <module>jcore-likelihood-detection-ae</module>
-                
+                    
+    
     
     
     <module>jcore-line-multiplier</module>
-                
+                    
+    
     
     
     <module>jcore-lingpipegazetteer-ae</module>
-                
+                    
+    
     
     
     <module>jcore-lingpipe-porterstemmer-ae</module>
-                
+                    
+    
     
     
     <module>jcore-lingscope-ae</module>
-                
+                    
+    
     
     
     <module>jcore-linnaeus-species-ae</module>
-                
+                    
+    
     
     
     <module>jcore-mantra-xml-types</module>
-                
+                    
+    
     
     
     <module>jcore-medxn-ae</module>
-                
+                    
+    
     
     
     <module>jcore-msdoc-reader</module>
-                
+                    
+    
     
     
     <module>jcore-mstparser-ae</module>
-                
+                    
+    
     
     
     <module>jcore-muc7-reader</module>
-                
+                    
+    
     
     
     <module>jcore-mutationfinder-ae</module>
-                
+                    
+    
     
     
     <module>jcore-neo4j-relations-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-opennlp-chunk-ae</module>
-                
+                    
+    
     
     
     <module>jcore-opennlp-parser-ae</module>
-                
+                    
+    
     
     
     <module>jcore-opennlp-postag-ae</module>
-                
+                    
+    
     
     
     <module>jcore-opennlp-sentence-ae</module>
-                
+                    
+    
     
     
     <module>jcore-opennlp-token-ae</module>
-                
+                    
+    
     
     
     <module>jcore-ppd-writer</module>
-                
+                    
+    
     
     
     <module>jcore-pmc-reader</module>
-                
+                    
+    
     
     
     <module>jcore-pubtator-reader</module>
-                
+                    
+    
     
     
     <module>jcore-stanford-lemmatizer-ae</module>
-                
+                    
+    
     
     
     <module>jcore-topic-indexing-ae</module>
-                
+                    
+    
     
     
     <module>jcore-topics-writer</module>
-                
+                    
+    
     
     
     <module>jcore-txt-consumer</module>
-                
+                    
+    
     
     
     <module>jcore-types</module>
-                
+                    
+    
     
     
     <module>jcore-utilities</module>
-                
+                    
+    
     
     
     <module>jcore-xml-mapper</module>
-                
+                    
+    
     
     
     <module>jcore-xml-reader</module>
-                
+                    
+    
     
     
     <module>jcore-xmi-reader</module>
-                
+                    
+    
     
     
     <module>jcore-xmi-writer</module>
-                
+                    
+    
     
     
     <module>jedis-parent</module>
-                
+                    
+    
     
     
     <module>jcore-jedis-integration-tests</module>
-                
+                    
+    
     
     
     <module>jcore-mmax2-reader</module>
-          
+              
+    
     
     <module>jcore-nlmgene-reader</module>
+          
+    
+    <module>jcore-gnormplus-ae</module>
       
   </modules>
-        
+          
+  
   
   
   <scm>
-                
+                    
+    
     
     
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-                
+                    
+    
     
     
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-                
+                    
+    
     
     
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-              
+                  
+  
   
   
   </scm>
-      
+        
+
 
 
 </project>

From 7c31b22758082a37ca30577aa38415ea3b85927f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 24 Aug 2022 17:18:18 +0200
Subject: [PATCH 234/269] Resolves #147.

I tested the evaluation on NLM Gene test of 100 documents with 10 threads. No errors, evaluation results were exactly the same as before. Thus, we can now concurrently process with GNP.
---
 .../ae/gnormplus/GNormPlusAnnotator.java      | 34 +++++++++++--------
 .../GNormPlusFormatMultiplierReader.java      |  2 +-
 .../reader/desc/jcore-bnp-bioc-multiplier.xml |  2 +-
 3 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
index 6c95ecc5c..5cd3f1b2d 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
@@ -58,18 +58,22 @@ public class GNormPlusAnnotator extends JCasAnnotator_ImplBase {
     public void initialize(final UimaContext aContext) throws ResourceInitializationException {
         addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
         geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
-        setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnormplus/config/setup_default.txt");
+        setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt");
         focusSpecies = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FOCUS_SPECIES)).orElse("");
         outputDirectory = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_OUTPUT_DIR)).orElse("");
 
         synchronized (GNormPlus.class) {
-            try {
-                final InputStream setupFileStream = FileUtilities.findResource(setupFile);
-                GNormPlus.loadConfiguration(setupFileStream, focusSpecies);
-                GNormPlus.loadResources(focusSpecies, System.currentTimeMillis());
-            } catch (IOException e) {
-                log.error("Could not find resource {}", setupFile);
-                throw new ResourceInitializationException(e);
+            if (!GNormPlus.initialized) {
+                try {
+                    final InputStream setupFileStream = FileUtilities.findResource(setupFile);
+                    if (setupFileStream == null)
+                        throw new IOException("Could not find resource as file or classpath resource " + setupFile);
+                    GNormPlus.loadConfiguration(setupFileStream, focusSpecies);
+                    GNormPlus.loadResources(focusSpecies, System.currentTimeMillis());
+                } catch (IOException e) {
+                    log.error("Could not find resource {}", setupFile);
+                    throw new ResourceInitializationException(e);
+                }
             }
         }
         try {
@@ -122,14 +126,14 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
             log.error("Could not read GNormPlus output file {}");
             throw new AnalysisEngineProcessException(e);
         }
+//        try {
+//            Files.delete(filePath);
+//        } catch (IOException e) {
+//            log.error("Could not delete temporary file {}", filePath);
+//            throw new AnalysisEngineProcessException(e);
+//        }
         try {
-            Files.delete(filePath);
-        } catch (IOException e) {
-            log.error("Could not delete temporary file {}", filePath);
-            throw new AnalysisEngineProcessException(e);
-        }
-        try {
-            if (!outputDirectory.isBlank())
+            if (outputDirectory.isBlank() && Files.exists(outputFilePath))
                 Files.delete(outputFilePath);
         } catch (IOException e) {
             log.error("Could not delete temporary file {}", outputFilePath);
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
index 019437c25..40c706594 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/GNormPlusFormatMultiplierReader.java
@@ -55,7 +55,7 @@ public void initialize(UimaContext context) throws ResourceInitializationExcepti
                 pathStream = Files.walk(inputPath, FileVisitOption.FOLLOW_LINKS);
             else
                 pathStream = Files.list(inputPath);
-            pathStream = pathStream.filter(p -> p.toString().endsWith(".xml"));
+            pathStream = pathStream.filter(p -> p.toString().toLowerCase().endsWith(".xml"));
             fileIterator = pathStream.iterator();
         } catch (IOException e) {
             log.error("Could not read the files of inputPath {}", inputPathString, e);
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index 15f62b47b..a742ca577 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -46,7 +46,7 @@
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
+            <outputsNewCASes>true</outputsNewCASes>
         </operationalProperties>
     </analysisEngineMetaData>
 </analysisEngineDescription>
\ No newline at end of file

From 96454d6a0096f7e336758b5369d421f2b3aba0df Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 25 Aug 2022 14:33:45 +0200
Subject: [PATCH 235/269] Add multiplier variants of the GNormPlusAnnotator.

This allows to process whole collections of documents with GNormPlus instead of processing each file individually.
---
 jcore-gnormplus-ae/README.md                  |   2 +-
 jcore-gnormplus-ae/component.meta             |  13 +-
 jcore-gnormplus-ae/pom.xml                    |   5 +
 .../GNormPlusAnnotator.java                   |  68 ++++------
 .../jcore/ae/gnp/GNormPlusProcessing.java     |  63 +++++++++
 .../gnp/GNormPlusBioCMultiplier.java          |  84 ++++++++++++
 .../multiplier/gnp/GNormPlusDBMultiplier.java | 124 ++++++++++++++++++
 .../gnp/GNormPlusMultiplierLogic.java         | 103 +++++++++++++++
 .../config/setup_do_ner.txt                   |   0
 .../desc/jcore-gnormplus-ae.xml               |   7 +-
 .../desc/jcore-gnormplus-bioc-multiplier.xml  |  92 +++++++++++++
 .../desc/jcore-gnormplus-db-multiplier.xml    |  98 ++++++++++++++
 .../GNormPlusAnnotatorTest.java               |   2 +-
 .../jcore/reader/BioCCasPopulator.java        |   4 +
 14 files changed, 614 insertions(+), 51 deletions(-)
 rename jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/{gnormplus => gnp}/GNormPlusAnnotator.java (59%)
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
 rename jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/{gnormplus => gnp}/config/setup_do_ner.txt (100%)
 rename jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/{gnormplus => gnp}/desc/jcore-gnormplus-ae.xml (93%)
 create mode 100644 jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
 create mode 100644 jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
 rename jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/{gnormplus => gnp}/GNormPlusAnnotatorTest.java (89%)

diff --git a/jcore-gnormplus-ae/README.md b/jcore-gnormplus-ae/README.md
index 3d5fbc90a..7e8752172 100644
--- a/jcore-gnormplus-ae/README.md
+++ b/jcore-gnormplus-ae/README.md
@@ -2,7 +2,7 @@
 
 **Descriptor Path**:
 ```
-de.julielab.jcore.ae.gnormplus.desc.jcore-gnormplus-ae
+de.julielab.jcore.ae.gnp.desc.jcore-gnormplus-ae
 ```
 
 Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.
diff --git a/jcore-gnormplus-ae/component.meta b/jcore-gnormplus-ae/component.meta
index fddbdcc18..ff1cf9b35 100644
--- a/jcore-gnormplus-ae/component.meta
+++ b/jcore-gnormplus-ae/component.meta
@@ -1,12 +1,21 @@
 {
     "categories": [
-        "ae"
+        "ae",
+        "multiplier"
     ],
     "description": "Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.",
     "descriptors": [
+        {
+            "category": "multiplier",
+            "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-bioc-multiplier"
+        },
+        {
+            "category": "multiplier",
+            "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-db-multiplier"
+        },
         {
             "category": "ae",
-            "location": "de.julielab.jcore.ae.gnormplus.desc.jcore-gnormplus-ae"
+            "location": "de.julielab.jcore.ae.gnp.desc.jcore-gnormplus-ae"
         }
     ],
     "exposable": true,
diff --git a/jcore-gnormplus-ae/pom.xml b/jcore-gnormplus-ae/pom.xml
index 5ede30204..0c3f302dc 100644
--- a/jcore-gnormplus-ae/pom.xml
+++ b/jcore-gnormplus-ae/pom.xml
@@ -26,6 +26,11 @@
             <artifactId>jcore-gnp-bioc-reader</artifactId>
             <version>${project.parent.version}</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-xmi-db-reader</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-gnormplus</artifactId>
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusAnnotator.java
similarity index 59%
rename from jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
rename to jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusAnnotator.java
index 5cd3f1b2d..10d01c157 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotator.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusAnnotator.java
@@ -1,10 +1,7 @@
-package de.julielab.jcore.ae.gnormplus;
+package de.julielab.jcore.ae.gnp;
 
-import GNormPluslib.GNormPlus;
 import com.pengyifan.bioc.BioCCollection;
 import com.pengyifan.bioc.BioCDocument;
-import com.pengyifan.bioc.io.BioCCollectionWriter;
-import de.julielab.java.utilities.FileUtilities;
 import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
 import de.julielab.jcore.reader.BioCCasPopulator;
 import de.julielab.jcore.types.Gene;
@@ -21,31 +18,34 @@
 
 import javax.xml.stream.XMLStreamException;
 import java.io.IOException;
-import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.Date;
 import java.util.Optional;
 
 @ResourceMetaData(name = "JCoRe GNormPlus Annotator", description = "Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.", vendor = "JULIE Lab Jena, Germany")
-@TypeCapability(inputs = {}, outputs = {})
+@TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
 public class GNormPlusAnnotator extends JCasAnnotator_ImplBase {
 
     public static final String PARAM_ADD_GENES = "AddGenes";
+    public static final String DESC_GENE_TYPE_NAME = "The UIMA type denoting gene annotations that should be written into the BioC format when the " + PARAM_ADD_GENES + " parameter is set to true.";
     public static final String PARAM_GENE_TYPE_NAME = "GeneTypeName";
+    public static final String DESC_ADD_GENES = "If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the " + PARAM_GENE_TYPE_NAME + " parameter.";
     public static final String PARAM_GNP_SETUP_FILE = "GNormPlusSetupFile";
     public static final String PARAM_FOCUS_SPECIES = "FocusSpecies";
     public static final String PARAM_OUTPUT_DIR = "OutputDirectory";
+    public static final String DESC_GNP_SETUP_FILE = "File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.";
+    public static final String DESC_FOCUS_SPECIES = "If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.";
+    public static final String DESC_OUTPUT_DIR = "Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.";
     private final static Logger log = LoggerFactory.getLogger(GNormPlusAnnotator.class);
-    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = "If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the " + PARAM_GENE_TYPE_NAME + " parameter.")
+    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = DESC_ADD_GENES)
     private boolean addGenes;
-    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = "The UIMA type denoting gene annotations that should be written into the BioC format when the " + PARAM_ADD_GENES + " parameter is set to true.")
+    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = DESC_GENE_TYPE_NAME)
     private String geneTypeName;
-    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = "File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.")
+    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = DESC_GNP_SETUP_FILE)
     private String setupFile;
-    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = "If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.")
+    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
     private String focusSpecies;
-    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = "Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.")
+    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = DESC_OUTPUT_DIR)
     private String outputDirectory;
 
     private BioCDocumentPopulator bioCDocumentPopulator;
@@ -58,23 +58,15 @@ public class GNormPlusAnnotator extends JCasAnnotator_ImplBase {
     public void initialize(final UimaContext aContext) throws ResourceInitializationException {
         addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
         geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
-        setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt");
+        setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt");
         focusSpecies = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FOCUS_SPECIES)).orElse("");
         outputDirectory = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_OUTPUT_DIR)).orElse("");
 
-        synchronized (GNormPlus.class) {
-            if (!GNormPlus.initialized) {
-                try {
-                    final InputStream setupFileStream = FileUtilities.findResource(setupFile);
-                    if (setupFileStream == null)
-                        throw new IOException("Could not find resource as file or classpath resource " + setupFile);
-                    GNormPlus.loadConfiguration(setupFileStream, focusSpecies);
-                    GNormPlus.loadResources(focusSpecies, System.currentTimeMillis());
-                } catch (IOException e) {
-                    log.error("Could not find resource {}", setupFile);
-                    throw new ResourceInitializationException(e);
-                }
-            }
+        try {
+            GNormPlusProcessing.initializeGNormPlus(setupFile, focusSpecies);
+        } catch (IOException e) {
+            log.error("Could not find resource {}", setupFile);
+            throw new ResourceInitializationException(e);
         }
         try {
             bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
@@ -99,30 +91,15 @@ public void initialize(final UimaContext aContext) throws ResourceInitialization
     @Override
     public void process(final JCas aJCas) throws AnalysisEngineProcessException {
         final BioCDocument bioCDocument = bioCDocumentPopulator.populate(aJCas);
-        final BioCCollection bioCCollection = new BioCCollection();
-        bioCCollection.setDate(new Date().toString());
-        bioCCollection.setEncoding("UTF-8");
-        bioCCollection.setKey("BioC.key");
-        bioCCollection.setSource("JULIE Lab GNormPlus");
+        BioCCollection bioCCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
         bioCCollection.addDocument(bioCDocument);
-        final Path filePath = Path.of("tmp", bioCDocument.getID() + ".xml");
-        final Path outputFilePath = Path.of(outputDirectory.isBlank() ? "tmp" : outputDirectory, bioCDocument.getID() + "processed.xml");
-        try {
-            if (!Files.exists(filePath.getParent()))
-                Files.createDirectory(filePath.getParent());
-            try (BioCCollectionWriter w = new BioCCollectionWriter(filePath)) {
-                w.writeCollection(bioCCollection);
-            }
-            GNormPlus.processFile(filePath.toString(), filePath.getFileName().toString(), outputFilePath.toString(), System.currentTimeMillis(), "Test");
-        } catch (IOException | XMLStreamException e) {
-            log.error("Could not process document {}", bioCDocument.getID());
-            throw new AnalysisEngineProcessException(e);
-        }
+        String outputDirectory = this.outputDirectory;
+        final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(bioCCollection, outputDirectory);
 
         try {
             final BioCCasPopulator bioCCasPopulator = new BioCCasPopulator(outputFilePath);
             bioCCasPopulator.populateWithNextDocument(aJCas, true);
-        } catch (XMLStreamException|IOException e) {
+        } catch (XMLStreamException | IOException e) {
             log.error("Could not read GNormPlus output file {}");
             throw new AnalysisEngineProcessException(e);
         }
@@ -142,4 +119,5 @@ public void process(final JCas aJCas) throws AnalysisEngineProcessException {
 
     }
 
+
 }
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
new file mode 100644
index 000000000..79e7704d0
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
@@ -0,0 +1,63 @@
+package de.julielab.jcore.ae.gnp;
+
+import GNormPluslib.GNormPlus;
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.io.BioCCollectionWriter;
+import de.julielab.java.utilities.FileUtilities;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Date;
+
+public class GNormPlusProcessing {
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusProcessing.class);
+
+    public static synchronized void initializeGNormPlus(String setupFileResourcePath, String focusSpecies) throws IOException {
+        if (!GNormPlus.initialized) {
+            final InputStream setupFileStream = FileUtilities.findResource(setupFileResourcePath);
+            if (setupFileStream == null)
+                throw new IOException("Could not find resource as file or classpath resource " + setupFileResourcePath);
+            GNormPlus.loadConfiguration(setupFileStream, focusSpecies);
+            GNormPlus.loadResources(focusSpecies, System.currentTimeMillis());
+        }
+    }
+
+    public static BioCCollection createEmptyJulieLabBioCCollection() {
+        final BioCCollection bioCCollection = new BioCCollection();
+        bioCCollection.setDate(new Date().toString());
+        bioCCollection.setEncoding("UTF-8");
+        bioCCollection.setKey("BioC.key");
+        bioCCollection.setSource("JULIE Lab GNormPlus");
+        return bioCCollection;
+    }
+
+    /**
+     * @param bioCCollection
+     * @param outputDirectory
+     * @return The path of the GNormPlus output file.
+     * @throws AnalysisEngineProcessException
+     */
+    public static Path processWithGNormPlus(BioCCollection bioCCollection, String outputDirectory) throws AnalysisEngineProcessException {
+        String collectionId = "collection_including_" + bioCCollection.getDocument(0).getID();
+        final Path filePath = Path.of("tmp", collectionId + ".xml");
+        final Path outputFilePath = Path.of(outputDirectory.isBlank() ? "tmp" : outputDirectory, collectionId + "processed.xml");
+        try {
+            if (!Files.exists(filePath.getParent()))
+                Files.createDirectory(filePath.getParent());
+            try (BioCCollectionWriter w = new BioCCollectionWriter(filePath)) {
+                w.writeCollection(bioCCollection);
+            }
+            GNormPlus.processFile(filePath.toString(), filePath.getFileName().toString(), outputFilePath.toString(), System.currentTimeMillis(), "Test");
+        } catch (IOException | XMLStreamException e) {
+            log.error("Could not process document {}", collectionId);
+            throw new AnalysisEngineProcessException(e);
+        }
+        return outputFilePath;
+    }
+}
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
new file mode 100644
index 000000000..6c333447e
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
@@ -0,0 +1,84 @@
+package de.julielab.jcore.multiplier.gnp;
+
+import de.julielab.jcore.ae.gnp.GNormPlusAnnotator;
+import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
+import de.julielab.jcore.reader.GNormPlusFormatMultiplier;
+import de.julielab.jcore.types.Gene;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Optional;
+
+@ResourceMetaData(name = "JCoRe GNormPlus BioC Multiplier", description = "A CAS multiplier to be used with the GNormPlus BioC Format multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
+public class GNormPlusBioCMultiplier extends GNormPlusFormatMultiplier {
+    public static final String PARAM_ADD_GENES = GNormPlusAnnotator.PARAM_ADD_GENES;
+    public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
+    public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
+    public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusDBMultiplier.class);
+    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
+    private boolean addGenes;
+    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = GNormPlusAnnotator.DESC_GNP_SETUP_FILE)
+    private String setupFile;
+    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = GNormPlusAnnotator.DESC_GENE_TYPE_NAME)
+    private String geneTypeName;
+    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = GNormPlusAnnotator.DESC_OUTPUT_DIR)
+    private String outputDirectory;
+
+    private BioCDocumentPopulator bioCDocumentPopulator;
+
+    private GNormPlusMultiplierLogic multiplierLogic;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
+        geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
+        try {
+            bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
+        } catch (ClassNotFoundException e) {
+            log.error("Gene annotation class {} could not be found.", geneTypeName, e);
+            throw new ResourceInitializationException(e);
+        }
+        try {
+            multiplierLogic = new GNormPlusMultiplierLogic(aContext, bioCDocumentPopulator, () -> {
+                try {
+                    return super.hasNext();
+                } catch (AnalysisEngineProcessException e) {
+                    log.error("Error when calling hasNext() of the base multiplier");
+                    throw new RuntimeException(e);
+                }
+            }, () -> {
+                try {
+                    return (JCas) super.next();
+                } catch (AnalysisEngineProcessException e) {
+                    log.error("Error when calling next() of the base multiplier.");
+                    throw new RuntimeException(e);
+                }
+            }, () -> getEmptyJCas());
+        } catch (IOException e) {
+            log.error("Could not initialize GNormPlus", e);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+        return multiplierLogic.hasNext();
+    }
+
+    @Override
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        return multiplierLogic.next();
+    }
+}
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
new file mode 100644
index 000000000..bccf44903
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
@@ -0,0 +1,124 @@
+package de.julielab.jcore.multiplier.gnp;
+
+import com.pengyifan.bioc.BioCCollection;
+import de.julielab.jcore.ae.gnp.GNormPlusAnnotator;
+import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
+import de.julielab.jcore.reader.xmi.XmiDBMultiplier;
+import de.julielab.jcore.types.Gene;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Optional;
+
+@ResourceMetaData(name = "JCoRe GNormPlus Database Multiplier", description = "A CAS multiplier to be used with the DB XMI multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
+public class GNormPlusDBMultiplier extends XmiDBMultiplier {
+    public static final String PARAM_ADD_GENES = GNormPlusAnnotator.PARAM_ADD_GENES;
+    public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
+    public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
+    public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusDBMultiplier.class);
+    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
+    private boolean addGenes;
+    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = GNormPlusAnnotator.DESC_GNP_SETUP_FILE)
+    private String setupFile;
+    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = GNormPlusAnnotator.DESC_GENE_TYPE_NAME)
+    private String geneTypeName;
+    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = GNormPlusAnnotator.DESC_OUTPUT_DIR)
+    private String outputDirectory;
+
+    private BioCDocumentPopulator bioCDocumentPopulator;
+//    private BioCCasPopulator bioCCasPopulator;
+
+    private BioCCollection currentGNormPlusProcessedCollection;
+//    private int currentCollectionIndex;
+//    private List<byte[]> cachedCasData;
+
+    private GNormPlusMultiplierLogic multiplierLogic;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
+        geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
+        try {
+            bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
+        } catch (ClassNotFoundException e) {
+            log.error("Gene annotation class {} could not be found.", geneTypeName, e);
+            throw new ResourceInitializationException(e);
+        }
+        try {
+            multiplierLogic = new GNormPlusMultiplierLogic(aContext, bioCDocumentPopulator, () -> super.hasNext(), () -> {
+                try {
+                    return (JCas) super.next();
+                } catch (AnalysisEngineProcessException e) {
+                    log.error("Error when calling next() of the base multiplier.");
+                    throw new RuntimeException(e);
+                }
+            }, () -> getEmptyJCas());
+        } catch (IOException e) {
+            log.error("Could not initialize GNormPlus", e);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+//        return currentCollectionIndex < currentGNormPlusProcessedCollection.getDocmentCount() || super.hasNext();
+        return multiplierLogic.hasNext();
+    }
+
+    @Override
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        return multiplierLogic.next();
+//        if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
+//            final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
+//            while (super.hasNext()) {
+//                final JCas jCas = (JCas) super.next();
+//                final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
+//                gnormPlusInputCollection.addDocument(bioCDocument);
+//                try {
+//                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+//                    final GZIPOutputStream os = new GZIPOutputStream(baos);
+//                    XmiCasSerializer.serialize(jCas.getCas(), os);
+//                    cachedCasData.add(baos.toByteArray());
+//                    jCas.release();
+//                } catch (IOException | SAXException e) {
+//                    log.error("Error when serializing CAS data for caching purposes.");
+//                    throw new AnalysisEngineProcessException(e);
+//                }
+//            }
+//            currentCollectionIndex = 0;
+//            final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
+//            try {
+//                bioCCasPopulator = new BioCCasPopulator(outputFilePath);
+//            } catch (XMLStreamException | IOException e) {
+//                log.error("Could not read GNormPlus output from {}", outputFilePath);
+//                throw new AnalysisEngineProcessException(e);
+//            }
+//        }
+//        byte[] currentCasData = cachedCasData.get(currentCollectionIndex);
+//        final JCas jCas = getEmptyJCas();
+//        try {
+//            XmiCasDeserializer.deserialize(new GZIPInputStream(new ByteArrayInputStream(currentCasData)), jCas.getCas());
+//        } catch (SAXException | IOException e) {
+//            log.error("Could not deserialize cached CAS data");
+//            throw new AnalysisEngineProcessException(e);
+//        }
+//        bioCCasPopulator.populateWithNextDocument(jCas, true);
+//        bioCCasPopulator.clearDocument(currentCollectionIndex);
+//        cachedCasData.set(currentCollectionIndex, null);
+//        ++currentCollectionIndex;
+//
+//        return jCas;
+    }
+}
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
new file mode 100644
index 000000000..b9cfa1cd2
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -0,0 +1,103 @@
+package de.julielab.jcore.multiplier.gnp;
+
+import com.pengyifan.bioc.BioCCollection;
+import com.pengyifan.bioc.BioCDocument;
+import de.julielab.jcore.ae.gnp.GNormPlusProcessing;
+import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
+import de.julielab.jcore.reader.BioCCasPopulator;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.cas.impl.XmiCasDeserializer;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.jcas.JCas;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.SAXException;
+
+import javax.xml.stream.XMLStreamException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.function.Supplier;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+import static de.julielab.jcore.ae.gnp.GNormPlusAnnotator.*;
+
+public class GNormPlusMultiplierLogic {
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusMultiplierLogic.class);
+    private BioCDocumentPopulator bioCDocumentPopulator;
+    private BioCCasPopulator bioCCasPopulator;
+    private String outputDirectory;
+    private Supplier<Boolean> baseMultiplierHasNext;
+    private Supplier<JCas> baseMultiplierNext;
+    private Supplier<JCas> multiplierGetEmptyCas;
+    private int currentCollectionIndex;
+    private List<byte[]> cachedCasData;
+
+    public GNormPlusMultiplierLogic(UimaContext aContext, BioCDocumentPopulator bioCDocumentPopulator, Supplier<Boolean> baseMultiplierHasNext, Supplier<JCas> baseMultiplierNext, Supplier<JCas> multiplierGetEmptyCas) throws IOException {
+        String setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt");
+        String focusSpecies = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FOCUS_SPECIES)).orElse("");
+        outputDirectory = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_OUTPUT_DIR)).orElse("");
+        this.bioCDocumentPopulator = bioCDocumentPopulator;
+        this.baseMultiplierHasNext = baseMultiplierHasNext;
+        this.baseMultiplierNext = baseMultiplierNext;
+        this.multiplierGetEmptyCas = multiplierGetEmptyCas;
+        cachedCasData = new ArrayList<>();
+        currentCollectionIndex = 0;
+
+        GNormPlusProcessing.initializeGNormPlus(setupFile, focusSpecies);
+    }
+
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
+            final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
+            while (baseMultiplierHasNext.get()) {
+                final JCas jCas = baseMultiplierNext.get();
+                final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
+                gnormPlusInputCollection.addDocument(bioCDocument);
+                try {
+                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                    final GZIPOutputStream os = new GZIPOutputStream(baos);
+                    XmiCasSerializer.serialize(jCas.getCas(), os);
+                    cachedCasData.add(baos.toByteArray());
+                    jCas.release();
+                } catch (IOException | SAXException e) {
+                    log.error("Error when serializing CAS data for caching purposes.");
+                    throw new AnalysisEngineProcessException(e);
+                }
+            }
+            currentCollectionIndex = 0;
+            final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
+            try {
+                bioCCasPopulator = new BioCCasPopulator(outputFilePath);
+            } catch (XMLStreamException | IOException e) {
+                log.error("Could not read GNormPlus output from {}", outputFilePath);
+                throw new AnalysisEngineProcessException(e);
+            }
+        }
+        byte[] currentCasData = cachedCasData.get(currentCollectionIndex);
+        final JCas jCas = multiplierGetEmptyCas.get();
+        try {
+            XmiCasDeserializer.deserialize(new GZIPInputStream(new ByteArrayInputStream(currentCasData)), jCas.getCas());
+        } catch (SAXException | IOException e) {
+            log.error("Could not deserialize cached CAS data");
+            throw new AnalysisEngineProcessException(e);
+        }
+        bioCCasPopulator.populateWithNextDocument(jCas, true);
+        bioCCasPopulator.clearDocument(currentCollectionIndex);
+        cachedCasData.set(currentCollectionIndex, null);
+        ++currentCollectionIndex;
+
+        return jCas;
+    }
+
+    public boolean hasNext() {
+        return bioCCasPopulator != null && bioCCasPopulator.documentsLeftInCollection() > 0 || baseMultiplierHasNext.get();
+    }
+}
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt
similarity index 100%
rename from jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/config/setup_do_ner.txt
rename to jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml
similarity index 93%
rename from jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml
rename to jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml
index a9dae0449..9cd5e46a5 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnormplus/desc/jcore-gnormplus-ae.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml
@@ -2,7 +2,7 @@
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>true</primitive>
-  <annotatorImplementationName>de.julielab.jcore.ae.gnormplus.GNormPlusAnnotator</annotatorImplementationName>
+  <annotatorImplementationName>de.julielab.jcore.ae.gnp.GNormPlusAnnotator</annotatorImplementationName>
   <analysisEngineMetaData>
     <name>JCoRe GNormPlus Annotator</name>
     <description>Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.</description>
@@ -69,7 +69,10 @@
     <capabilities>
       <capability>
         <inputs/>
-        <outputs/>
+        <outputs>
+          <type>de.julielab.jcore.types.ConceptMention</type>
+          <type>de.julielab.jcore.types.Organism</type>
+        </outputs>
         <languagesSupported/>
       </capability>
     </capabilities>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
new file mode 100644
index 000000000..ab3cd4ad6
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
@@ -0,0 +1,92 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.multiplier.gnp.GNormPlusBioCMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe GNormPlus BioC Multiplier</name>
+        <description>A CAS multiplier to be used with the GNormPlus BioC Format multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>AddGenes</name>
+                <description>If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the GeneTypeName parameter.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GNormPlusSetupFile</name>
+                <description>File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GeneTypeName</name>
+                <description>The UIMA type denoting gene annotations that should be written into the BioC format when the AddGenes parameter is set to true.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OutputDirectory</name>
+                <description>Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>CostosysConfigFile</name>
+                <description>Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentsTable</name>
+                <description>Required to retrieve the max XMI ID for use by the XMI DB writer. The schema-qualified name of the XMI document table that the XMI DB writer will write annotations into.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>AddGenes</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>GeneTypeName</name>
+                <value>
+                    <string>de.julielab.jcore.types.Gene</string>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs>
+                    <type>de.julielab.jcore.types.ConceptMention</type>
+                    <type>de.julielab.jcore.types.Organism</type>
+                </outputs>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
new file mode 100644
index 000000000..8db6f5a78
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
@@ -0,0 +1,98 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.multiplier.gnp.GNormPlusDBMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe GNormPlus Database Multiplier</name>
+        <description>A CAS multiplier to be used with the DB XMI multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>AddGenes</name>
+                <description>If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the GeneTypeName parameter.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GNormPlusSetupFile</name>
+                <description>File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GeneTypeName</name>
+                <description>The UIMA type denoting gene annotations that should be written into the BioC format when the AddGenes parameter is set to true.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OutputDirectory</name>
+                <description>Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>LogFinalXmi</name>
+                <description>For debugging purposes. If set to true, before parsing the final XMI data assembled from the annotation modules, it is printed to console.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>TruncateAtSize</name>
+                <description>Specify size in bytes of the XMI sofa string, i.e. the document text. If the text surpasses that size, the document is not populated from XMI but given some placeholder information. This can be necessary when large documents cannot be handled by subsequent components in the pipeline.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>AddGenes</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>GeneTypeName</name>
+                <value>
+                    <string>de.julielab.jcore.types.Gene</string>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>LogFinalXmi</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs>
+                    <type>de.julielab.jcore.types.ConceptMention</type>
+                    <type>de.julielab.jcore.types.Organism</type>
+                </outputs>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java b/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnp/GNormPlusAnnotatorTest.java
similarity index 89%
rename from jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java
rename to jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnp/GNormPlusAnnotatorTest.java
index daf863fae..8b56aafe8 100644
--- a/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnormplus/GNormPlusAnnotatorTest.java
+++ b/jcore-gnormplus-ae/src/test/java/de/julielab/jcore/ae/gnp/GNormPlusAnnotatorTest.java
@@ -1,5 +1,5 @@
 
-package de.julielab.jcore.ae.gnormplus;
+package de.julielab.jcore.ae.gnp;
 
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index f35fa5559..73853e9d2 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -307,4 +307,8 @@ public long getCollectionTextLength() {
     public int getNumDocumentsInCollection() {
         return bioCCollection.getDocmentCount();
     }
+
+    public void clearDocument(int index) {
+        bioCCollection.getDocuments().set(index, null);
+    }
 }

From 7ab832fd2ffa7e7b391dcc2208fa81471290e2aa Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 25 Aug 2022 17:29:45 +0200
Subject: [PATCH 236/269] Fix bugs with the new multiplier logic.

The BioC GNP multiplier has been used on NLM Gene test and showed the exact same performance as GNormPlus standalone.
---
 .../jcore/ae/gnp/GNormPlusProcessing.java        |  2 ++
 .../multiplier/gnp/GNormPlusMultiplierLogic.java | 16 +++++++++-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
index 79e7704d0..0ecf76626 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
@@ -50,6 +50,8 @@ public static Path processWithGNormPlus(BioCCollection bioCCollection, String ou
         try {
             if (!Files.exists(filePath.getParent()))
                 Files.createDirectory(filePath.getParent());
+            if (!Files.exists(outputFilePath.getParent()))
+                Files.createDirectories(outputFilePath.getParent());
             try (BioCCollectionWriter w = new BioCCollectionWriter(filePath)) {
                 w.writeCollection(bioCCollection);
             }
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index b9cfa1cd2..70f6bad13 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -19,6 +19,7 @@
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
@@ -56,15 +57,17 @@ public GNormPlusMultiplierLogic(UimaContext aContext, BioCDocumentPopulator bioC
 
     public AbstractCas next() throws AnalysisEngineProcessException {
         if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
+            currentCollectionIndex = 0;
+            cachedCasData.clear();
             final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
             while (baseMultiplierHasNext.get()) {
                 final JCas jCas = baseMultiplierNext.get();
                 final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
                 gnormPlusInputCollection.addDocument(bioCDocument);
-                try {
-                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
-                    final GZIPOutputStream os = new GZIPOutputStream(baos);
-                    XmiCasSerializer.serialize(jCas.getCas(), os);
+                try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+                    try (final GZIPOutputStream os = new GZIPOutputStream(baos)) {
+                        XmiCasSerializer.serialize(jCas.getCas(), os);
+                    }
                     cachedCasData.add(baos.toByteArray());
                     jCas.release();
                 } catch (IOException | SAXException e) {
@@ -72,7 +75,6 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                     throw new AnalysisEngineProcessException(e);
                 }
             }
-            currentCollectionIndex = 0;
             final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
             try {
                 bioCCasPopulator = new BioCCasPopulator(outputFilePath);
@@ -83,8 +85,8 @@ public AbstractCas next() throws AnalysisEngineProcessException {
         }
         byte[] currentCasData = cachedCasData.get(currentCollectionIndex);
         final JCas jCas = multiplierGetEmptyCas.get();
-        try {
-            XmiCasDeserializer.deserialize(new GZIPInputStream(new ByteArrayInputStream(currentCasData)), jCas.getCas());
+        try (InputStream is = new GZIPInputStream(new ByteArrayInputStream(currentCasData))) {
+            XmiCasDeserializer.deserialize(is, jCas.getCas());
         } catch (SAXException | IOException e) {
             log.error("Could not deserialize cached CAS data");
             throw new AnalysisEngineProcessException(e);

From c1abefc814485610fdbad8b20345ba024ff76608 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20F=C3=A4=C3=9Fler?= <chew@gmx.net>
Date: Fri, 16 Sep 2022 16:00:59 +0200
Subject: [PATCH 237/269] Update LICENSE

Changing to the actual license used by Alias-i
---
 jcore-lingpipegazetteer-ae/LICENSE | 734 +++--------------------------
 1 file changed, 73 insertions(+), 661 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/LICENSE b/jcore-lingpipegazetteer-ae/LICENSE
index be3f7b28e..f57182ac3 100644
--- a/jcore-lingpipegazetteer-ae/LICENSE
+++ b/jcore-lingpipegazetteer-ae/LICENSE
@@ -1,661 +1,73 @@
-                    GNU AFFERO GENERAL PUBLIC LICENSE
-                       Version 3, 19 November 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
-  A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate.  Many developers of free software are heartened and
-encouraged by the resulting cooperation.  However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
-  The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community.  It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server.  Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
-  An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals.  This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing under
-this license.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU Affero General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Remote Network Interaction; Use with the GNU General Public License.
-
-  Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your version
-supports such interaction) an opportunity to receive the Corresponding
-Source of your version by providing access to the Corresponding Source
-from a network server at no charge, through some standard or customary
-means of facilitating copying of software.  This Corresponding Source
-shall include the Corresponding Source for any work covered by version 3
-of the GNU General Public License that is incorporated pursuant to the
-following paragraph.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU Affero General Public License from time to time.  Such new versions
-will be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If your software can interact with users remotely through a computer
-network, you should also make sure that it provides a way for users to
-get its source.  For example, if your program is a web application, its
-interface could display a "Source" link that leads users to an archive
-of the code.  There are many ways you could offer source, and different
-solutions will be better for different programs; see section 13 for the
-specific requirements.
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU AGPL, see
-<https://www.gnu.org/licenses/>.
+Alias-i ROYALTY FREE LICENSE VERSION 1
+
+Copyright c 2003-2007 Alias-i, Inc
+All Rights Reserved
+
+1.  This Alias-i Royalty Free License Version 1 ("License") governs
+    the copying, modifying, and distributing of the computer program or
+    work containing a notice stating that it is subject to the terms of
+    this License and any derivative works of that computer program or
+    work.  The computer program or work and any derivative works thereof
+    are the "Software."  Your copying, modifying, or distributing of the
+    Software constitutes acceptance of this License.  Although you are not
+    required to accept this License, since you have not signed it, nothing
+    else grants you permission to copy, modify, or distribute the
+    Software.  If you wish to receive a license from Alias-i under
+    different terms than those contained in this License, please contact
+    Alias-i.  Otherwise, if you do not accept this License, any copying,
+    modifying, or distributing of the Software is strictly prohibited by
+    law.
+
+2.  You may copy or modify the Software or use any output of the
+    Software (i) for internal non-production trial, testing and evaluation
+    of the Software, or (ii) in connection with any product or service you
+    provide to third parties for free.  Copying or modifying the Software
+    includes the acts of "installing", "running", "using", "accessing" or
+    "deploying" the Software as those terms are understood in the software
+    industry.  Therefore, those activities are only permitted under this
+    License in the ways that copying or modifying are permitted.
+
+3.  You may distribute the Software, provided that you: (i) distribute
+    the Software only under the terms of this License, no more, no less;
+    (ii) include a copy of this License along with any such distribution;
+    (iii) include the complete corresponding machine-readable source code
+    of the Software you are distributing; (iv) do not remove any copyright
+    or other notices from the Software; and, (v) cause any files of the
+    Software that you modified to carry prominent notices stating that you
+    changed the Software and the date of any change so that recipients
+    know that they are not receiving the original Software.
+
+4.  Whether you distribute the Software or not, if you distribute any
+    computer program that is not the Software, but that (a) is distributed
+    in connection with the Software or contains any part of the Software,
+    (b) causes the Software to be copied or modified (i.e., ran, used, or
+    executed), such as through an API call, or (c) uses any output of the
+    Software, then you must distribute that other computer program under a
+    license defined as a Free Software License by the Free Software
+    Foundation or an Approved Open Source License by the Open Source
+    Initiative.
+
+5.  You may not copy, modify, or distribute the Software except as
+    expressly provided under this License, unless you receive a different
+    written license from Alias-i to do so.  Any attempt otherwise to copy,
+    modify, or distribute the Software is without Alias-i's permission, is
+    void, and will automatically terminate your rights under this License.
+    Your rights under this License may only be reinstated by a signed
+    writing from Alias-i.
+
+THE SOFTWARE IS PROVIDED "AS IS."  TO THE MAXIMUM EXTENT PERMITTED BY
+APPLICABLE LAW, ALIAS-i DOES NOT MAKE, AND HEREBY EXPRESSLY DISCLAIMS,
+ANY WARRANTIES, EXPRESS, IMPLIED, STATUTORY OR OTHERWISE, CONCERNING
+THE SOFTWARE OR ANY SUBJECT MATTER OF THIS LICENSE.  SPECIFICALLY, BUT
+WITHOUT LIMITING THE FOREGOING, LICENSOR MAKES NO EXPRESS OR IMPLIED
+WARRANTY OF MERCHANTABILITY, FITNESS (FOR A PARTICULAR PURPOSE OR
+OTHERWISE), QUALITY, USEFULNESS, TITLE, OR NON-INFRINGEMENT.  TO THE
+MAXIMUM EXTENT PERMITTED BY APPLICABLE LAW, IN NO EVENT SHALL LICENSOR
+BE LIABLE TO YOU OR ANY THIRD PARTY FOR ANY DAMAGES OR IN RESPECT OF
+ANY CLAIM UNDER ANY TORT, CONTRACT, STRICT LIABILITY, NEGLIGENCE OR
+OTHER THEORY FOR ANY DIRECT, INDIRECT, INCIDENTAL, CONSEQUENTIAL,
+PUNITIVE, SPECIAL OR EXEMPLARY DAMAGES, EVEN IF IT HAS BEEN ADVISED OF
+THE POSSIBILITY OF SUCH DAMAGES, OR FOR ANY AMOUNTS IN EXCESS OF THE
+AMOUNT YOU PAID ALIAS-i FOR THIS LICENSE.  YOU MUST PASS THIS ENTIRE
+LICENSE, INCLUDING SPECIFICALLY THIS DISCLAIMER AND LIMITATION OF
+LIABILITY, ON WHENEVER YOU DISTRIBUTE THE SOFTWARE.

From 710ae4ba94598b313f4b6d0a000df3ab31c975f0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Erik=20F=C3=A4=C3=9Fler?= <chew@gmx.net>
Date: Fri, 16 Sep 2022 16:04:02 +0200
Subject: [PATCH 238/269] Create LICENSE

After reading the actually used license from Lingpipe and seeing that it states "4.  Whether you distribute the Software or not, if you distribute any
    computer program that is not the Software, but that (a) is distributed
    in connection with the Software or contains any part of the Software,
    (b) causes the Software to be copied or modified (i.e., ran, used, or
    executed), such as through an API call, or (c) uses any output of the
    Software, then you must distribute that other computer program under a
    license defined as a Free Software License by the Free Software
    Foundation or an Approved Open Source License by the Open Source
    Initiative." I am now positive that we actually can use the BSD-2 license after all.
---
 LICENSE | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 000000000..7e93520be
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,25 @@
+BSD 2-Clause License
+
+Copyright (c) 2022, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

From a2977f14e3c5ddb9e0f56dfbb9932e12072bb4ec Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 7 Oct 2022 12:28:17 +0200
Subject: [PATCH 239/269] Fix issues with temporary file management with
 GNormPlus.

---
 .../jcore/ae/gnp/GNormPlusProcessing.java     |  12 +-
 .../gnp/GNormPlusBioCMultiplier.java          |   8 ++
 .../multiplier/gnp/GNormPlusDBMultiplier.java |  77 ++++++-------
 .../gnp/GNormPlusMultiplierLogic.java         | 104 ++++++++++++------
 .../jcore/ae/gnp/config/setup_do_ner.txt      |  11 +-
 .../desc/jcore-gnormplus-bioc-multiplier.xml  |   7 ++
 .../desc/jcore-gnormplus-db-multiplier.xml    |   7 ++
 .../jcore/reader/BioCCasPopulator.java        |  52 +++++----
 .../consumer/gnp/BioCDocumentPopulator.java   |   2 +
 9 files changed, 169 insertions(+), 111 deletions(-)

diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
index 0ecf76626..2c131183b 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/ae/gnp/GNormPlusProcessing.java
@@ -11,6 +11,7 @@
 import javax.xml.stream.XMLStreamException;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.FileAlreadyExistsException;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Date;
@@ -45,17 +46,22 @@ public static BioCCollection createEmptyJulieLabBioCCollection() {
      */
     public static Path processWithGNormPlus(BioCCollection bioCCollection, String outputDirectory) throws AnalysisEngineProcessException {
         String collectionId = "collection_including_" + bioCCollection.getDocument(0).getID();
-        final Path filePath = Path.of("tmp", collectionId + ".xml");
+        final Path filePath = Path.of("jcore-gnp-tmp", collectionId + ".xml");
         final Path outputFilePath = Path.of(outputDirectory.isBlank() ? "tmp" : outputDirectory, collectionId + "processed.xml");
         try {
-            if (!Files.exists(filePath.getParent()))
-                Files.createDirectory(filePath.getParent());
+            try {
+                if (!Files.exists(filePath.getParent()))
+                    Files.createDirectory(filePath.getParent());
+            } catch (FileAlreadyExistsException e) {
+                // OK, so another process created it, not a big deal.
+            }
             if (!Files.exists(outputFilePath.getParent()))
                 Files.createDirectories(outputFilePath.getParent());
             try (BioCCollectionWriter w = new BioCCollectionWriter(filePath)) {
                 w.writeCollection(bioCCollection);
             }
             GNormPlus.processFile(filePath.toString(), filePath.getFileName().toString(), outputFilePath.toString(), System.currentTimeMillis(), "Test");
+            Files.delete(filePath);
         } catch (IOException | XMLStreamException e) {
             log.error("Could not process document {}", collectionId);
             throw new AnalysisEngineProcessException(e);
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
index 6c333447e..677dca3ec 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
@@ -18,6 +18,8 @@
 import java.io.IOException;
 import java.util.Optional;
 
+import static de.julielab.jcore.ae.gnp.GNormPlusAnnotator.DESC_FOCUS_SPECIES;
+
 @ResourceMetaData(name = "JCoRe GNormPlus BioC Multiplier", description = "A CAS multiplier to be used with the GNormPlus BioC Format multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
 @TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
 public class GNormPlusBioCMultiplier extends GNormPlusFormatMultiplier {
@@ -25,6 +27,7 @@ public class GNormPlusBioCMultiplier extends GNormPlusFormatMultiplier {
     public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
     public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
     public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
+    public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
     private final static Logger log = LoggerFactory.getLogger(GNormPlusDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
     private boolean addGenes;
@@ -34,6 +37,8 @@ public class GNormPlusBioCMultiplier extends GNormPlusFormatMultiplier {
     private String geneTypeName;
     @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = GNormPlusAnnotator.DESC_OUTPUT_DIR)
     private String outputDirectory;
+    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
+    private String focusSpecies;
 
     private BioCDocumentPopulator bioCDocumentPopulator;
 
@@ -49,6 +54,9 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         } catch (ClassNotFoundException e) {
             log.error("Gene annotation class {} could not be found.", geneTypeName, e);
             throw new ResourceInitializationException(e);
+        } catch (Throwable t) {
+            log.error("Could not create BioCDocumentPopulator instance", t);
+            throw new ResourceInitializationException(t);
         }
         try {
             multiplierLogic = new GNormPlusMultiplierLogic(aContext, bioCDocumentPopulator, () -> {
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
index bccf44903..e7f301356 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
@@ -18,6 +18,9 @@
 
 import java.io.IOException;
 import java.util.Optional;
+import java.util.function.Function;
+
+import static de.julielab.jcore.ae.gnp.GNormPlusAnnotator.DESC_FOCUS_SPECIES;
 
 @ResourceMetaData(name = "JCoRe GNormPlus Database Multiplier", description = "A CAS multiplier to be used with the DB XMI multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
 @TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
@@ -26,6 +29,7 @@ public class GNormPlusDBMultiplier extends XmiDBMultiplier {
     public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
     public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
     public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
+    public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
     private final static Logger log = LoggerFactory.getLogger(GNormPlusDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
     private boolean addGenes;
@@ -35,6 +39,8 @@ public class GNormPlusDBMultiplier extends XmiDBMultiplier {
     private String geneTypeName;
     @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = GNormPlusAnnotator.DESC_OUTPUT_DIR)
     private String outputDirectory;
+    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
+    private String focusSpecies;
 
     private BioCDocumentPopulator bioCDocumentPopulator;
 //    private BioCCasPopulator bioCCasPopulator;
@@ -44,7 +50,7 @@ public class GNormPlusDBMultiplier extends XmiDBMultiplier {
 //    private List<byte[]> cachedCasData;
 
     private GNormPlusMultiplierLogic multiplierLogic;
-
+private static boolean shutdownHookInstalled = false;
     @Override
     public void initialize(UimaContext aContext) throws ResourceInitializationException {
         super.initialize(aContext);
@@ -69,56 +75,39 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
             log.error("Could not initialize GNormPlus", e);
             throw new ResourceInitializationException(e);
         }
+        synchronized (GNormPlusDBMultiplier.class) {
+            final Runtime rt = Runtime.getRuntime();
+            rt.addShutdownHook(new Thread() {
+                @Override
+                public void run() {
+                    super.run();
+                    final long totalMemory = rt.totalMemory();
+                    final long freeMemory = rt.freeMemory();
+                    final long maxMemory = rt.maxMemory();
+                    Function<Long, Double> b2g = bytes -> bytes / 1000000000d;
+                    System.out.println("[Shutdow hook] Free memory: " + freeMemory + "bytes (" + b2g.apply(freeMemory) + "GB), max memory: " + maxMemory + "bytes ("+b2g.apply(maxMemory) + "GB), total memory: " + totalMemory + "bytes ("+b2g.apply(totalMemory) + "GB)");
+                }
+            });
+        }
     }
 
     @Override
     public boolean hasNext() {
-//        return currentCollectionIndex < currentGNormPlusProcessedCollection.getDocmentCount() || super.hasNext();
-        return multiplierLogic.hasNext();
+        try {
+            return multiplierLogic.hasNext();
+        } catch (Throwable t) {
+            log.error("Error when checking hasNext() on multiplier", t);
+        }
+        return false;
     }
 
     @Override
     public AbstractCas next() throws AnalysisEngineProcessException {
-        return multiplierLogic.next();
-//        if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
-//            final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
-//            while (super.hasNext()) {
-//                final JCas jCas = (JCas) super.next();
-//                final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
-//                gnormPlusInputCollection.addDocument(bioCDocument);
-//                try {
-//                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
-//                    final GZIPOutputStream os = new GZIPOutputStream(baos);
-//                    XmiCasSerializer.serialize(jCas.getCas(), os);
-//                    cachedCasData.add(baos.toByteArray());
-//                    jCas.release();
-//                } catch (IOException | SAXException e) {
-//                    log.error("Error when serializing CAS data for caching purposes.");
-//                    throw new AnalysisEngineProcessException(e);
-//                }
-//            }
-//            currentCollectionIndex = 0;
-//            final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
-//            try {
-//                bioCCasPopulator = new BioCCasPopulator(outputFilePath);
-//            } catch (XMLStreamException | IOException e) {
-//                log.error("Could not read GNormPlus output from {}", outputFilePath);
-//                throw new AnalysisEngineProcessException(e);
-//            }
-//        }
-//        byte[] currentCasData = cachedCasData.get(currentCollectionIndex);
-//        final JCas jCas = getEmptyJCas();
-//        try {
-//            XmiCasDeserializer.deserialize(new GZIPInputStream(new ByteArrayInputStream(currentCasData)), jCas.getCas());
-//        } catch (SAXException | IOException e) {
-//            log.error("Could not deserialize cached CAS data");
-//            throw new AnalysisEngineProcessException(e);
-//        }
-//        bioCCasPopulator.populateWithNextDocument(jCas, true);
-//        bioCCasPopulator.clearDocument(currentCollectionIndex);
-//        cachedCasData.set(currentCollectionIndex, null);
-//        ++currentCollectionIndex;
-//
-//        return jCas;
+        try {
+            return multiplierLogic.next();
+        } catch (Throwable t) {
+            log.error("Error when retrieving next multiplier CAS", t);
+            throw new AnalysisEngineProcessException(t);
+        }
     }
 }
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index 70f6bad13..e3db091f7 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -20,10 +20,12 @@
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
+import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
@@ -56,50 +58,82 @@ public GNormPlusMultiplierLogic(UimaContext aContext, BioCDocumentPopulator bioC
     }
 
     public AbstractCas next() throws AnalysisEngineProcessException {
-        if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
-            currentCollectionIndex = 0;
-            cachedCasData.clear();
-            final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
-            while (baseMultiplierHasNext.get()) {
-                final JCas jCas = baseMultiplierNext.get();
-                final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
-                gnormPlusInputCollection.addDocument(bioCDocument);
-                try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
-                    try (final GZIPOutputStream os = new GZIPOutputStream(baos)) {
-                        XmiCasSerializer.serialize(jCas.getCas(), os);
+        try {
+            // Process the incoming documents batch-wise (this is why we use a multiplier here so we have access
+            // to whole batches). This checks if we still have processed documents or if we need to process the next
+            // batch.
+            if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
+                System.out.println("Memory before batch processing:");
+                final Runtime rt = Runtime.getRuntime();
+                final long totalMemory = rt.totalMemory();
+                final long freeMemory = rt.freeMemory();
+                final long maxMemory = rt.maxMemory();
+                Function<Long, Double> b2g = bytes -> bytes / 1000000000d;
+                System.out.println("[GNPMultiplierLogic] Free memory: " + freeMemory + "bytes (" + b2g.apply(freeMemory) + "GB), max memory: " + maxMemory + "bytes ("+b2g.apply(maxMemory) + "GB), total memory: " + totalMemory + "bytes ("+b2g.apply(totalMemory) + "GB)");
+                currentCollectionIndex = 0;
+                final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
+                // We first retrieve the whole current batch from the super multiplier and serialize the CASes
+                // to XMI. We do that because we only have one CAS at a time and, thus, must store the data
+                // of the whole batch. We can then later deserialize the documents and add the GNP annotations to it.
+                // This allows batch-processing within GNP which reduces file writes and reads (GNP internally
+                // writes a lot of temporary files that contain all the documents given to it in one single batch file).
+                cachedCasData.clear();
+                while (baseMultiplierHasNext.get()) {
+                    final JCas jCas = baseMultiplierNext.get();
+                    final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
+                    gnormPlusInputCollection.addDocument(bioCDocument);
+                    try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+                        try (final GZIPOutputStream os = new GZIPOutputStream(baos)) {
+                            XmiCasSerializer.serialize(jCas.getCas(), os);
+                        }
+                        cachedCasData.add(baos.toByteArray());
+                        jCas.release();
+                    } catch (IOException | SAXException e) {
+                        log.error("Error when serializing CAS data for caching purposes.");
+                        throw new AnalysisEngineProcessException(e);
                     }
-                    cachedCasData.add(baos.toByteArray());
-                    jCas.release();
-                } catch (IOException | SAXException e) {
-                    log.error("Error when serializing CAS data for caching purposes.");
+                }
+                // now process the whole batch with GNP
+                final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
+                try {
+                    bioCCasPopulator = new BioCCasPopulator(outputFilePath);
+                    // delete the GNP output if we don't want to keep it
+                    if(outputDirectory.isBlank()) {
+                        Files.delete(outputFilePath);
+                    }
+                } catch (XMLStreamException | IOException e) {
+                    log.error("Could not read GNormPlus output from {}", outputFilePath);
                     throw new AnalysisEngineProcessException(e);
                 }
             }
-            final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
-            try {
-                bioCCasPopulator = new BioCCasPopulator(outputFilePath);
-            } catch (XMLStreamException | IOException e) {
-                log.error("Could not read GNormPlus output from {}", outputFilePath);
+            // Now we have a batch of documents processed with GNP. Get the next document from the cache and
+            // add the GNP annotations to it.
+            byte[] currentCasData = cachedCasData.get(currentCollectionIndex);
+            final JCas jCas = multiplierGetEmptyCas.get();
+            try (InputStream is = new GZIPInputStream(new ByteArrayInputStream(currentCasData))) {
+                XmiCasDeserializer.deserialize(is, jCas.getCas());
+            } catch (SAXException | IOException e) {
+                log.error("Could not deserialize cached CAS data");
                 throw new AnalysisEngineProcessException(e);
             }
-        }
-        byte[] currentCasData = cachedCasData.get(currentCollectionIndex);
-        final JCas jCas = multiplierGetEmptyCas.get();
-        try (InputStream is = new GZIPInputStream(new ByteArrayInputStream(currentCasData))) {
-            XmiCasDeserializer.deserialize(is, jCas.getCas());
-        } catch (SAXException | IOException e) {
-            log.error("Could not deserialize cached CAS data");
-            throw new AnalysisEngineProcessException(e);
-        }
-        bioCCasPopulator.populateWithNextDocument(jCas, true);
-        bioCCasPopulator.clearDocument(currentCollectionIndex);
-        cachedCasData.set(currentCollectionIndex, null);
-        ++currentCollectionIndex;
+            bioCCasPopulator.populateWithNextDocument(jCas, true);
+            bioCCasPopulator.clearDocument(currentCollectionIndex);
+            cachedCasData.set(currentCollectionIndex, null);
+            ++currentCollectionIndex;
 
-        return jCas;
+            return jCas;
+        } catch (AnalysisEngineProcessException e) {
+            log.error("Error while retrieving or processing data for/with GNormPlus", e);
+            throw e;
+        }
     }
 
     public boolean hasNext() {
-        return bioCCasPopulator != null && bioCCasPopulator.documentsLeftInCollection() > 0 || baseMultiplierHasNext.get();
+        try {
+            return bioCCasPopulator != null && bioCCasPopulator.documentsLeftInCollection() > 0 || baseMultiplierHasNext.get();
+        } catch (Throwable t) {
+            log.error("Could not determine hasNext()", t);
+            throw t;
+        }
     }
 }
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt
index d6443b9d6..73009c799 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt
@@ -19,13 +19,14 @@
 	DictionaryFolder = Dictionary
 	GNRModel = Dictionary/GNR.Model
 	SCModel = Dictionary/SimConcept.Model
-	GeneIDMatch = True
-	HomologeneID = False
 [Modules]
-	GeneSpeciesRecognitionOnly = False
-	SpeciesAssignmentOnly = False
-	GeneNormalizationOnly = False
+	SpeciesRecognition = True
+	GeneRecognition = True
+	SpeciesAssignment = True
+	GeneNormalization = True
 [Others]
+	GeneIDMatch = False
+	HomologeneID = False
 	Normalization2Protein = False
 	ShowUnNormalizedMention = False
 	DeleteTmp = True
\ No newline at end of file
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
index ab3cd4ad6..174371c8b 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
@@ -36,6 +36,13 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>FocusSpecies</name>
+                <description>If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
             <configurationParameter>
                 <name>CostosysConfigFile</name>
                 <description>Path to the CoStoSys configuration file that is used by the XMI DB writer in the same pipeline, if any. The XMI DB writer requires information about the XMI documents that are already in the database and should be updated with new annotations. The current highest XMI ID must be known to avoid ID collisions. To obtain the ID, it must be received from the database beforehand. This allows to retrieve the information batch wise instead of one-by-one which would be much slower.</description>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
index 8db6f5a78..6a5e4a666 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
@@ -36,6 +36,13 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>FocusSpecies</name>
+                <description>If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
             <configurationParameter>
                 <name>LogFinalXmi</name>
                 <description>For debugging purposes. If set to true, before parsing the final XMI data assembled from the annotation modules, it is printed to console.</description>
diff --git a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
index 73853e9d2..0f72a4581 100644
--- a/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
+++ b/jcore-gnp-bioc-reader/src/main/java/de/julielab/jcore/reader/BioCCasPopulator.java
@@ -213,6 +213,7 @@ private void setDocumentText(JCas jCas, BioCDocument document) {
                             // for figures and tables we have actually no means to distinguish between captions and the actual object; mainly because the actual objects have so far not been part of the CAS documents; thus, this can only be a caption until the objects themselves are added
                             passageAnnotation = new Caption(jCas, offset, passageEnd);
                             ((Caption) passageAnnotation).setCaptionType(type.get());
+                            break;
                         default:
                             log.debug("Unhandled passage type {}", type.get());
                             passageAnnotation = new Zone(jCas, offset, passageEnd);
@@ -228,46 +229,49 @@ private void setDocumentText(JCas jCas, BioCDocument document) {
 
     private void addSpeciesAnnotation(BioCAnnotation annotation, JCas jCas) throws MissingInfonException {
         Optional<String> taxId = annotation.getInfon("NCBI Taxonomy");
-        if (!taxId.isPresent())
-            throw new MissingInfonException("Species annotation does not specify its taxonomy ID: " + annotation);
+//        if (!taxId.isPresent())
+//            throw new MissingInfonException("Species annotation does not specify its taxonomy ID: " + annotation);
         // the "total location" is the span from the minimum location value to the maximum location value;
         // for GNormPlus, there are no discontinuing annotations anyway
         BioCLocation location = annotation.getTotalLocation();
         Organism organism = new Organism(jCas, location.getOffset(), location.getOffset() + location.getLength());
-        ResourceEntry resourceEntry = new ResourceEntry(jCas, organism.getBegin(), organism.getEnd());
-        resourceEntry.setSource("NCBI Taxonomy");
-        resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
-        resourceEntry.setEntryId(taxId.get());
-        FSArray resourceEntryList = new FSArray(jCas, 1);
-        resourceEntryList.set(0, resourceEntry);
-        organism.setResourceEntryList(resourceEntryList);
+        if (taxId.isPresent()) {
+            ResourceEntry resourceEntry = new ResourceEntry(jCas, organism.getBegin(), organism.getEnd());
+            resourceEntry.setSource("NCBI Taxonomy");
+            resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+            resourceEntry.setEntryId(taxId.get());
+            FSArray resourceEntryList = new FSArray(jCas, 1);
+            resourceEntryList.set(0, resourceEntry);
+            organism.setResourceEntryList(resourceEntryList);
+        }
         organism.addToIndexes();
     }
 
     private void addGeneAnnotation(BioCAnnotation annotation, JCas jCas) throws MissingInfonException {
         Optional<String> geneId = annotation.getInfon("NCBI Gene");
-        if (!geneId.isPresent())
-            throw new MissingInfonException("Gene annotation does not specify its gene ID: " + annotation);
+//        if (!geneId.isPresent())
+//            throw new MissingInfonException("Gene annotation does not specify its gene ID: " + annotation);
         // the "total location" is the span from the minimum location value to the maximum location value;
         // for GNormPlus, there are no discontinuing annotations anyway
         BioCLocation location = annotation.getTotalLocation();
         Gene gene = new Gene(jCas, location.getOffset(), location.getOffset() + location.getLength());
         gene.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
         gene.setSpecificType("Gene");
-        // one gene mention might have multiple IDs when there are ranges or enumerations, e.g. "IL2-5", "B7-1 and B7-2" or "B7-1/2"
-        String[] geneIds = geneId.get().split(";");
-        FSArray resourceEntryList = new FSArray(jCas, geneIds.length);
-        for (int i = 0; i < geneIds.length; i++) {
-            ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
-            // 9999 ist the GeNo score for exact matches; GNP only recognized exact dictionary matches and transfers
-            // their IDs to other forms under certain circumstances (abbreviations, for example)
-            resourceEntry.setConfidence("9999");
-            resourceEntry.setSource("NCBI Gene");
-            resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
-            resourceEntry.setEntryId(geneIds[i]);
-            resourceEntryList.set(i, resourceEntry);
+        if (geneId.isPresent()) { // one gene mention might have multiple IDs when there are ranges or enumerations, e.g. "IL2-5", "B7-1 and B7-2" or "B7-1/2"
+            String[] geneIds = geneId.get().split(";");
+            FSArray resourceEntryList = new FSArray(jCas, geneIds.length);
+            for (int i = 0; i < geneIds.length; i++) {
+                ResourceEntry resourceEntry = new ResourceEntry(jCas, gene.getBegin(), gene.getEnd());
+                // 9999 ist the GeNo score for exact matches; GNP only recognized exact dictionary matches and transfers
+                // their IDs to other forms under certain circumstances (abbreviations, for example)
+                resourceEntry.setConfidence("9999");
+                resourceEntry.setSource("NCBI Gene");
+                resourceEntry.setComponentId(GNormPlusFormatMultiplierReader.class.getCanonicalName());
+                resourceEntry.setEntryId(geneIds[i]);
+                resourceEntryList.set(i, resourceEntry);
+            }
+            gene.setResourceEntryList(resourceEntryList);
         }
-        gene.setResourceEntryList(resourceEntryList);
         gene.addToIndexes();
     }
 
diff --git a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
index 7c8b3a413..64f1b17df 100644
--- a/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
+++ b/jcore-gnp-bioc-writer/src/main/java/de/julielab/jcore/consumer/gnp/BioCDocumentPopulator.java
@@ -57,6 +57,8 @@ public BioCDocument populate(JCas jCas) {
                         // abstract sections are part of the AbstractText which is handled below
                         titleType = "null";
                         break;
+                    case "other":
+                        titleType = "other_title";
                     default:
                         log.debug("Unhandled title type {}", titleTypeString);
                         titleType = "other_title";

From 34524ff2fc695b32178d3175dc53e1f370d80c45 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 7 Oct 2022 12:28:47 +0200
Subject: [PATCH 240/269] Normalize spaces in the CAS text of PMC documents.

---
 .../jcore/reader/pmc/CasPopulator.java         |  9 ++++++++-
 .../jcore/reader/pmc/PMCReaderTest.java        | 18 ++++++++++++++++--
 .../jcore/reader/xmi/XmiDBMultiplier.java      |  1 +
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 5eedd46fa..a3633959b 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -1,6 +1,7 @@
 package de.julielab.jcore.reader.pmc;
 
 import de.julielab.jcore.reader.pmc.parser.*;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.slf4j.Logger;
@@ -110,7 +111,13 @@ private StringBuilder populateCas(ParsingResult result, StringBuilder sb) {
                 break;
             case TEXT:
                 TextParsingResult textParsingResult = (TextParsingResult) result;
-                sb.append(textParsingResult.getText());
+                final String text = textParsingResult.getText();
+                // some special handling for documents that contain formatting tabs, newlines or no-break-spaces in the text
+                boolean textBeginsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(0));
+                boolean sbEndsWithWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length()-1));
+                if (textBeginsWithWhitespace && !sbEndsWithWhitespace)
+                    sb.append(" ");
+                sb.append(StringUtils.normalizeSpace(text));
                 break;
             case NONE:
                 // do nothing
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
index 3a79a51e8..3c7a8dc9a 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
@@ -33,6 +33,20 @@
 import static org.junit.jupiter.api.Assertions.*;
 
 public class PMCReaderTest {
+
+    @Test
+    public void testErrordoc() throws Exception {
+        // read a single file, parse it and right it to XMI for manual review
+        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+        CollectionReader reader = CollectionReaderFactory.createReader(PMCReader.class, PMCReader.PARAM_INPUT,
+                "errordoc.xml");
+        while (reader.hasNext()) {
+            reader.getNext(cas.getCas());
+            System.out.println(cas.getDocumentText());
+        }
+    }
+
     @Test
     public void testPmcReader1() throws Exception {
         // read a single file, parse it and right it to XMI for manual review
@@ -231,7 +245,7 @@ public void testTables() throws Exception {
             assertNotNull(table.getObjectTitle());
             Title tabelTitle = table.getObjectTitle();
             if (tablenum == 0) {
-                assertEquals("Table 1", tabelTitle.getCoveredText());
+                assertEquals("Table 1", tabelTitle.getCoveredText());
                 // the whitespace is actually a no-break space; note that the
                 // last '1' is actually the digit 1 and not a part of the
                 // codepoint
@@ -260,7 +274,7 @@ public void testFigures() throws Exception {
             assertNotNull(figure.getObjectTitle());
             Title tabelTitle = figure.getObjectTitle();
             if (tablenum == 0) {
-                assertEquals("Fig. 1", tabelTitle.getCoveredText());
+                assertEquals("Fig. 1", tabelTitle.getCoveredText());
                 // the whitespace is actually a no-break space; note that the
                 // last '1' is actually the digit 1 and not a part of the
                 // codepoint
diff --git a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
index 50e7527a2..da16d4ef0 100644
--- a/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
+++ b/jcore-xmi-db-reader/src/main/java/de/julielab/jcore/reader/xmi/XmiDBMultiplier.java
@@ -150,6 +150,7 @@ else if (truncate) {
                 log.debug("Truncating document with ID {} due to its text size of {} bytes which is greater than the given threshold of {} bytes.", pkElements, data[pkSize].length, truncationSize);
             }
         } catch (CasPopulationException e) {
+            log.error("Exception while populating CAS", e);
             throw new AnalysisEngineProcessException(e);
         }
     }

From 28de21ef39a05264f102882df4a4c6fb629940a1 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 7 Oct 2022 12:53:15 +0200
Subject: [PATCH 241/269] Remove defunct test.

---
 .../jcore/consumer/gnp/GNormPlusFormatWriterTest.java | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
index 388b36324..1657961be 100644
--- a/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
+++ b/jcore-gnp-bioc-writer/src/test/java/de/julielab/jcore/consumer/gnp/GNormPlusFormatWriterTest.java
@@ -7,14 +7,12 @@
 import de.julielab.jcore.types.pubmed.Header;
 import org.apache.commons.io.FileUtils;
 import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
 
-import java.io.FileInputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.List;
@@ -120,13 +118,4 @@ public void omitEmptyDocuments2() throws Exception {
         // assert that no empty documents were written into the collection
         assertThat(Path.of(BASEDIR.toString(), "bioc_collections_0", "bioc_collection_0_0.xml")).doesNotExist();
     }
-
-    @Test
-    public void muh() throws Exception {
-        final JCas jCas = TestDocumentGenerator.createTestJCas();
-        XmiCasDeserializer.deserialize(new FileInputStream("12486105.xmi"),  jCas.getCas());
-        final AnalysisEngine writer = getWriterInstance(1, 1);
-        writer.process(jCas);
-    }
-
 }

From a42605729ec72d0d13ed2f0691dc172c8e9121e4 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 11 Oct 2022 16:37:29 +0200
Subject: [PATCH 242/269] Fix a problem where dictionary entries with German
 umlauts were not found in text.

Reordered normalization and lower-casing, removed lowercasing from the transliterator to remove normalization mis-matches between dictionary creation and CAS text normalization.
---
 .../chunking/ChunkerProviderImplAlt.java      |  8 ++----
 .../ConfigurableChunkerProviderImplAlt.java   |  9 +++---
 .../uima/GazetteerAnnotator.java              | 28 +++++++++++--------
 .../utils/StringNormalizerForChunking.java    | 27 ++++++++++++++----
 .../StringNormalizerForChunkingTest.java      | 12 ++++----
 .../uima/GazetteerAnnotatorTest.java          | 16 ++---------
 .../src/test/resources/pehc.dict              |  3 +-
 7 files changed, 58 insertions(+), 45 deletions(-)

diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
index 302621969..175653bf5 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ChunkerProviderImplAlt.java
@@ -271,9 +271,7 @@ private void readDictionary(InputStream dictFileStream) throws IOException, Anal
 			bf = new BufferedReader(new InputStreamReader(dictFileStream));
 			String line = "";
 
-			Transliterator transliterator = null;
-			if (transliterate)
-				transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
+			Transliterator transliterator  = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC");
 
 			TokenizerFactory tokenizerFactory = null;
 			if (normalize)
@@ -293,11 +291,11 @@ private void readDictionary(InputStream dictFileStream) throws IOException, Anal
 					continue;
 
 				if (normalize) {
-					term = StringNormalizerForChunking.normalizeString(term, tokenizerFactory).string;
+					term = StringNormalizerForChunking.normalizeString(term, tokenizerFactory, transliterator).string;
 				}
 				if (transliterate)
 					term = transliterator.transform(term);
-				if (useApproximateMatching && !caseSensitive && !transliterate)
+				if (useApproximateMatching && !caseSensitive)
 					term = term.toLowerCase();
 
 				String label = values[1].trim();
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
index f0809f759..f319562bd 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/ConfigurableChunkerProviderImplAlt.java
@@ -232,8 +232,8 @@ private void readDictionary(InputStream dictFileStream) throws IOException, Anal
             String line = "";
 
             Transliterator transliterator = null;
-            if (transliterate)
-                transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
+//                transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
+            transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC");
 
             TokenizerFactory tokenizerFactory = null;
             if (normalize)
@@ -253,11 +253,12 @@ private void readDictionary(InputStream dictFileStream) throws IOException, Anal
                     continue;
 
                 if (normalize) {
-                    term = StringNormalizerForChunking.normalizeString(term, tokenizerFactory).string;
+                    term = StringNormalizerForChunking.normalizeString(term, tokenizerFactory, transliterator).string;
                 }
                 if (transliterate)
                     term = transliterator.transform(term);
-                if (useApproximateMatching && !caseSensitive && !transliterate)
+                // the exact matcher takes the caseSensitive switch as a parameter, we don't need to do it ourselves
+                if (useApproximateMatching && !caseSensitive)
                     term = term.toLowerCase();
 
                 String label = values[1].trim();
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
index 35e02f576..b2a534d9f 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotator.java
@@ -227,7 +227,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         try {
             provider = (ChunkerProvider) getContext().getResourceObject(CHUNKER_RESOURCE_NAME);
             gazetteer = provider.getChunker();
-			stopWords = provider.getStopWords();
+            stopWords = provider.getStopWords();
 //            String[] stopwordArray = {"a", "about", "above", "across", "after", "afterwards", "again", "against",
 //                    "all", "almost", "alone", "along", "already", "also", "although", "always", "am", "among",
 //                    "amongst", "amoungst", "amount", "an", "and", "another", "any", "anyhow", "anyone", "anything",
@@ -281,9 +281,10 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
 
         Boolean transliterateBoolean = provider.getTransliterate();// (Boolean)
         // aContext.getConfigParameterValue(PARAM_TRANSLITERATE_TEXT);
-        if (transliterateBoolean || !provider.getCaseSensitive()) {
-            transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
-        }
+//        if (transliterateBoolean) {
+//            transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC; Lower");
+        transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC");
+//        }
         LOGGER.info("Transliterate CAS document text (i.e. transform accented characters to their base forms): {}",
                 provider.getTransliterate());
 
@@ -309,20 +310,25 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
         String docText = aJCas.getDocumentText();
         if (docText == null || docText.length() == 0)
             return;
-        if (provider.getUseApproximateMatching() && !provider.getTransliterate() && !provider.getCaseSensitive())
-            // We use the transliterator because it does lowercasing and also solves issues that could arise due
-            // to the normal docText.toLowerCase() call which would break special characters sometimes
+        // normalization includes transliteration
+        if (provider.getTransliterate() && !provider.getNormalize())
             docText = transliterator.transform(docText);
         NormalizedString normalizedDocText = null;
         if (provider.getNormalize()) {
             if (provider.getNormalizePlural()) {
                 OffsetSet pluralOffsets = StreamSupport.stream(Spliterators.spliterator(aJCas.<PennBioIEPOSTag>getAnnotationIndex(PennBioIEPOSTag.type).iterator(), 0, 0), false).filter(tag -> tag.getValue().equals("NNS")).map(tag -> Range.between(tag.getBegin(), tag.getEnd())).collect(Collectors.toCollection(OffsetSet::new));
                 normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory, true, pluralOffsets, transliterator);
-            }else {
-                normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory,
-                        transliterator);
+            } else {
+                normalizedDocText = StringNormalizerForChunking.normalizeString(docText, normalizationTokenFactory, transliterator);
             }
         }
+        // exact matching has a switch for case sensitivity, so we can save the work here
+        if (!provider.getCaseSensitive() && provider.getUseApproximateMatching()) {
+            if (provider.getNormalize())
+                normalizedDocText.string = normalizedDocText.string.toLowerCase();
+            else
+                docText = docText.toLowerCase();
+        }
 
         IndexTermGenerator<Long> longOffsetTermGenerator = TermGenerators.longOffsetTermGenerator();
         JCoReHashMapAnnotationIndex<Long, ConceptMention> conceptMentionIndex = new JCoReHashMapAnnotationIndex<>(
@@ -536,7 +542,7 @@ private void add2Cas(JCas aJCas, Chunk chunk, NormalizedString normalizedDocText
         // byte character encodings. This security measure won't correct the underlying error but avoid errors
         // due to invalid offsets.
         int start = Math.min(aJCas.getDocumentText().length(), Math.max(0, provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.start()) : chunk.start()));
-        int end = Math.min(aJCas.getDocumentText().length(), Math.max(0,provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.end()) : chunk.end()));
+        int end = Math.min(aJCas.getDocumentText().length(), Math.max(0, provider.getNormalize() ? normalizedDocText.getOriginalOffset(chunk.end()) : chunk.end()));
 
         try {
             if (mantraMode) {
diff --git a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
index b12b5de39..3172f5601 100644
--- a/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
+++ b/jcore-lingpipegazetteer-ae/src/main/java/de/julielab/jcore/ae/lingpipegazetteer/utils/StringNormalizerForChunking.java
@@ -11,6 +11,7 @@
 
 public class StringNormalizerForChunking {
 
+
     private static Set<Character> charsToDelete = new HashSet<>();
 
     static {
@@ -41,6 +42,22 @@ public class StringNormalizerForChunking {
         charsToDelete.add('´');
         charsToDelete.add('"');
         charsToDelete.add('#');
+
+        // this would normalize German umlauts like Hörsturz -> Hoersturz
+        // I leave it here for the future but don't add it right now because I don't want to make this Transliterator
+        // a static field due to Thread safety and also don't have time now to refactor this all
+//        String rules = "[\\u00E4{a\\u0308}] > ae; " +
+//                " [\\u00F6{o\\u0308}] > oe;" +
+//                " [\\u00FC{u\\u0308}] > ue;" +
+//                " {[\\u00C4{A\\u0308}]}[:Lowercase:] > Ae;" +
+//                " {[\\u00D6{O\\u0308}]}[:Lowercase:] > Oe;" +
+//                " {[\\u00DC{U\\u0308}]}[:Lowercase:] > Ue;" +
+//                " [\\u00C4{A\\u0308}] > AE;" +
+//                " [\\u00D6{O\\u0308}] > OE;" +
+//                " [\\u00DC{U\\u0308}] > UE;" +
+//                " [\\u20AC] > EUR;";
+//
+//        germanUmlautTransliterator = Transliterator.createFromRules("de_EUR-ASCII", rules, Transliterator.FORWARD);
     }
 
     /**
@@ -93,6 +110,8 @@ public static NormalizedString normalizeString(String str) {
      */
     public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory, boolean normalizePlural, OffsetSet pluralPositions,
                                                    Transliterator transliterator) {
+
+
         boolean stemming = tokenizerFactory instanceof
                 PorterStemmerTokenizerFactory;
 
@@ -141,8 +160,8 @@ public static NormalizedString normalizeString(String str, TokenizerFactory toke
                     ns.offsetMap.putAll(deleteCandidateOffsetMap);
                     deleteCandidateOffsetMap.clear();
                 }
-                if (transliterator != null)
-                    token = transliterator.transform(token);
+                token = transliterator.transform(token);
+//                token = germanUmlautTransliterator.transliterate(token);
                 // plural s, only when no stemming is done
                 // an even better normalization would be to use the lemma, of course
                 Range<Integer> tokenOffsets = Range.between(tokenizer.lastTokenStartPosition(), tokenizer.lastTokenEndPosition());
@@ -181,10 +200,6 @@ public static NormalizedString normalizeString(String str, TokenizerFactory toke
         return normalizeString(str, tokenizerFactory, false, null, transliterator);
     }
 
-    public static NormalizedString normalizeString(String str, TokenizerFactory tokenizerFactory) {
-        return normalizeString(str, tokenizerFactory, false, null, null);
-    }
-
     public static NormalizedString normalizeString(String str, boolean normalizePlural, OffsetSet pluralPositions, TokenizerFactory tokenizerFactory) {
         return normalizeString(str, tokenizerFactory, normalizePlural, pluralPositions, null);
     }
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
index 06cc79ca0..b186a6c2d 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/chunking/StringNormalizerForChunkingTest.java
@@ -16,6 +16,8 @@
 import static org.junit.jupiter.api.Assertions.*;
 
 public class StringNormalizerForChunkingTest {
+
+	private Transliterator transliterator = Transliterator.getInstance("NFD; [:Nonspacing Mark:] Remove; NFC");
 	@Test
 	public void testTextNormalization() {
 		String term;
@@ -114,7 +116,7 @@ public void testNormalizeWithTokenizer() {
 		str = "We saw Parkinson's Disease and S(H)P 1 in a sadly-formed circumvention of applicance.";
 		PorterStemmerTokenizerFactory tokenizerFactory = new PorterStemmerTokenizerFactory(
 				IndoEuropeanTokenizerFactory.INSTANCE);
-		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
+		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, transliterator);
 		assertEquals("We saw Parkinson Diseas and S(H)P 1 in a sadli-form circumvent of applic.",
 				ns.string, "Normalization was wrong: ");
 		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(Integer.valueOf(0)), "Offset wrong: ");
@@ -125,14 +127,14 @@ public void testNormalizeWithTokenizer() {
 		assertEquals( Integer.valueOf(50),  ns.getOriginalOffset(Integer.valueOf(47)), "Offset wrong: ");
 		assertEquals( Integer.valueOf(56),  ns.getOriginalOffset(Integer.valueOf(51)), "Offset wrong: ");
 		str = "We go to James' to have some coffee'ses.";
-		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
+		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, transliterator);
 		assertEquals( "We go to Jame' to have some coffe'se.",  ns.string, "Normalization was wrong: ");
 		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(Integer.valueOf(0)), "Offset wrong: ");
 		assertEquals( Integer.valueOf(9),  ns.getOriginalOffset(Integer.valueOf(9)), "Offset wrong: ");
 		assertEquals( Integer.valueOf(14),  ns.getOriginalOffset(Integer.valueOf(13)), "Offset wrong: ");
 		assertEquals( Integer.valueOf(35),  ns.getOriginalOffset(Integer.valueOf(33)), "Offset wrong: ");
 		str = "We have some 'serious things' to talk about.";
-		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
+		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, transliterator);
 		assertEquals( "We have some 'seriou thing' to talk about.",  ns.string, "Normalization was wrong: ");
 		assertEquals( Integer.valueOf(0),  ns.getOriginalOffset(Integer.valueOf(0)), "Offset wrong: ");
 		assertEquals( Integer.valueOf(12),  ns.getOriginalOffset(Integer.valueOf(12)), "Offset wrong: ");
@@ -142,7 +144,7 @@ public void testNormalizeWithTokenizer() {
 		assertEquals( Integer.valueOf(30),  ns.getOriginalOffset(Integer.valueOf(28)), "Offset wrong: ");
 
 		str = "test dosing unit KLRg1 killer cell lectin like receptor G2 Parkinson's Disease";
-		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory);
+		ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, transliterator);
 		System.out.println(ns.string);
 
 	}
@@ -161,7 +163,7 @@ public void testNormalizePlural() {
 		String str;
 		str = "glutathione transferases are evil";
 		TokenizerFactory tokenizerFactory = new IndoEuropeanTokenizerFactory();
-		NormalizedString ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, true, new OffsetSet(List.of(Range.between(12, 24))), null);
+		NormalizedString ns = StringNormalizerForChunking.normalizeString(str, tokenizerFactory, true, new OffsetSet(List.of(Range.between(12, 24))), transliterator);
 		assertEquals("glutathione transferase are evil", ns.string);
 	}
 }
diff --git a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
index 2671605dc..5705b7b82 100644
--- a/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
+++ b/jcore-lingpipegazetteer-ae/src/test/java/de/julielab/jcore/ae/lingpipegazetteer/uima/GazetteerAnnotatorTest.java
@@ -48,7 +48,6 @@
 import java.util.List;
 import java.util.*;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.*;
 
@@ -708,23 +707,14 @@ public void testOffsetIssueWhenNoTransliteration() throws Exception {
 
 		JCas jCas = annotator.newJCas();
 
-		jCas.setDocumentText("Clinical Features and Course of Patients with Peripheral Exudative Hemorrhagic Chorioretinopathy.\nTo evaluate the clinical characteristics of patients who were followed in our clinic with the diagnosis of peripheral exudative hemorrhagic chorioretinopathy (PEHC).\nMedical records of 12 patients who were diagnosed with PEHC in İstanbul University İstanbul Faculty of Medicine, Department of Ophthalmology between July 2006 and June 2014 were reviewed retrospectively.\nThis study included 21 eyes of 12 patients. Four (33.3%) of the patients were male and 8 (66.7%) were female and ages ranged between 73 and 89 years. Eight (66.7%) of the patients were referred to us with the diagnosis of choroidal mass. Unilateral involvement was found in 3 and bilateral involvement in 9 patients. Temporal quadrants were involved in all eyes. Fifteen eyes (71.4%) had subretinal hemorrhage and hemorrhagic/serous retinal pigment epithelial detachment, 11 (52.4%) had lipid exudation, 5 (23.8%) had chronic retinal pigment epithelium alterations, 2 (9.5%) had subretinal fibrosis and 1 (4.8%) had vitreous hemorrhage. PEHC lesions were accompanied by drusen in 11 eyes (52.4%), geographic atrophy in 2 eyes (9.5%), and choroidal neovascularization scar in 2 eyes (9.5%).");
+//		jCas.setDocumentText("Clinical Features and Course of Patients with Peripheral Exudative Hemorrhagic Chorioretinopathy.\nTo evaluate the clinical characteristics of patients who were followed in our clinic with the diagnosis of peripheral exudative hemorrhagic chorioretinopathy (PEHC).\nMedical records of 12 patients who were diagnosed with PEHC in İstanbul University İstanbul Faculty of Medicine, Department of Ophthalmology between July 2006 and June 2014 were reviewed retrospectively.\nThis study included 21 eyes of 12 patients. Four (33.3%) of the patients were male and 8 (66.7%) were female and ages ranged between 73 and 89 years. Eight (66.7%) of the patients were referred to us with the diagnosis of choroidal mass. Unilateral involvement was found in 3 and bilateral involvement in 9 patients. Temporal quadrants were involved in all eyes. Fifteen eyes (71.4%) had subretinal hemorrhage and hemorrhagic/serous retinal pigment epithelial detachment, 11 (52.4%) had lipid exudation, 5 (23.8%) had chronic retinal pigment epithelium alterations, 2 (9.5%) had subretinal fibrosis and 1 (4.8%) had vitreous hemorrhage. PEHC lesions were accompanied by drusen in 11 eyes (52.4%), geographic atrophy in 2 eyes (9.5%), and choroidal neovascularization scar in 2 eyes (9.5%).");
+		jCas.setDocumentText("[...] diagnosed with PEHC in İstanbul University İstanbul Faculty of Medicine, Department of Ophthalmology [...].\n[...] PEHC lesions were accompanied by drusen [...].");
 		annotator.process(jCas);
 
 		List<String> entityStrings = new ArrayList<>();
 		for (EntityMention g : jCas.<EntityMention>getAnnotationIndex(EntityMention.type)) {
 			entityStrings.add(g.getCoveredText());
 		}
-		assertThat(entityStrings).containsExactly("PEHC", "PEHC", "PEHC", "lesions");
-	}
-
-	@Test
-	public void testEncoding() {
-		String s1 = "İ";
-		String s2 = "i̇";
-		System.out.println(s1.getBytes(UTF_8).length);
-		System.out.println(s1.length());
-		System.out.println(s2.getBytes(UTF_8).length);
-		System.out.println(s2.length());
+		assertThat(entityStrings).containsExactly("PEHC", "İstanbul", "İstanbul", "PEHC", "lesions");
 	}
 }
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict b/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict
index 79830708e..4e6b0f5ec 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/pehc.dict
@@ -1,2 +1,3 @@
 PEHC	Gene
-lesions	Gene
\ No newline at end of file
+lesions	Gene
+İstanbul	Gene
\ No newline at end of file

From a5f1283b509186581ca661d679797e46cd498031 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 13 Oct 2022 08:43:47 +0200
Subject: [PATCH 243/269] Resolve #150.

---
 jcore-gnormplus-ae/component.meta             |  10 +-
 jcore-gnormplus-ae/pom.xml                    |   5 +
 .../gnp/GNormPlusBioCMultiplier.java          |   2 +-
 .../gnp/GNormPlusMultiplierLogic.java         |   8 -
 .../gnp/GNormPlusXMLDBMultiplier.java         |  91 +++++++++++
 ...ier.java => GNormPlusXmiDBMultiplier.java} |  32 +---
 ... => jcore-gnormplus-xmi-db-multiplier.xml} |   4 +-
 .../jcore-gnormplus-xml-db-multiplier.xml     | 141 ++++++++++++++++++
 8 files changed, 252 insertions(+), 41 deletions(-)
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java
 rename jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/{GNormPlusDBMultiplier.java => GNormPlusXmiDBMultiplier.java} (72%)
 rename jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/{jcore-gnormplus-db-multiplier.xml => jcore-gnormplus-xmi-db-multiplier.xml} (98%)
 create mode 100644 jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml

diff --git a/jcore-gnormplus-ae/component.meta b/jcore-gnormplus-ae/component.meta
index ff1cf9b35..2314d53be 100644
--- a/jcore-gnormplus-ae/component.meta
+++ b/jcore-gnormplus-ae/component.meta
@@ -1,7 +1,7 @@
 {
     "categories": [
-        "ae",
-        "multiplier"
+        "multiplier",
+        "ae"
     ],
     "description": "Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.",
     "descriptors": [
@@ -11,7 +11,11 @@
         },
         {
             "category": "multiplier",
-            "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-db-multiplier"
+            "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-xmi-db-multiplier"
+        },
+        {
+            "category": "multiplier",
+            "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-xml-db-multiplier"
         },
         {
             "category": "ae",
diff --git a/jcore-gnormplus-ae/pom.xml b/jcore-gnormplus-ae/pom.xml
index 0c3f302dc..17dbaf538 100644
--- a/jcore-gnormplus-ae/pom.xml
+++ b/jcore-gnormplus-ae/pom.xml
@@ -31,6 +31,11 @@
             <artifactId>jcore-xmi-db-reader</artifactId>
             <version>${project.parent.version}</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-xml-db-reader</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-gnormplus</artifactId>
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
index 677dca3ec..24e758523 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
@@ -28,7 +28,7 @@ public class GNormPlusBioCMultiplier extends GNormPlusFormatMultiplier {
     public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
     public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
     public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
-    private final static Logger log = LoggerFactory.getLogger(GNormPlusDBMultiplier.class);
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusXmiDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
     private boolean addGenes;
     @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = GNormPlusAnnotator.DESC_GNP_SETUP_FILE)
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index e3db091f7..2337358a3 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -25,7 +25,6 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Optional;
-import java.util.function.Function;
 import java.util.function.Supplier;
 import java.util.zip.GZIPInputStream;
 import java.util.zip.GZIPOutputStream;
@@ -63,13 +62,6 @@ public AbstractCas next() throws AnalysisEngineProcessException {
             // to whole batches). This checks if we still have processed documents or if we need to process the next
             // batch.
             if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
-                System.out.println("Memory before batch processing:");
-                final Runtime rt = Runtime.getRuntime();
-                final long totalMemory = rt.totalMemory();
-                final long freeMemory = rt.freeMemory();
-                final long maxMemory = rt.maxMemory();
-                Function<Long, Double> b2g = bytes -> bytes / 1000000000d;
-                System.out.println("[GNPMultiplierLogic] Free memory: " + freeMemory + "bytes (" + b2g.apply(freeMemory) + "GB), max memory: " + maxMemory + "bytes ("+b2g.apply(maxMemory) + "GB), total memory: " + totalMemory + "bytes ("+b2g.apply(totalMemory) + "GB)");
                 currentCollectionIndex = 0;
                 final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
                 // We first retrieve the whole current batch from the super multiplier and serialize the CASes
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java
new file mode 100644
index 000000000..c00192b47
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java
@@ -0,0 +1,91 @@
+package de.julielab.jcore.multiplier.gnp;
+
+import de.julielab.jcore.ae.gnp.GNormPlusAnnotator;
+import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
+import de.julielab.jcore.reader.xml.XMLDBMultiplier;
+import de.julielab.jcore.types.Gene;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Optional;
+
+import static de.julielab.jcore.ae.gnp.GNormPlusAnnotator.DESC_FOCUS_SPECIES;
+
+@ResourceMetaData(name = "JCoRe GNormPlus XML Database Multiplier", description = "A CAS multiplier to be used with the DB XML multiplier reader in place of the DB XML multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
+public class GNormPlusXMLDBMultiplier extends XMLDBMultiplier {
+    public static final String PARAM_ADD_GENES = GNormPlusAnnotator.PARAM_ADD_GENES;
+    public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
+    public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
+    public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
+    public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusXMLDBMultiplier.class);
+    private static boolean shutdownHookInstalled = false;
+    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
+    private boolean addGenes;
+    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = GNormPlusAnnotator.DESC_GNP_SETUP_FILE)
+    private String setupFile;
+    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = GNormPlusAnnotator.DESC_GENE_TYPE_NAME)
+    private String geneTypeName;
+    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = GNormPlusAnnotator.DESC_OUTPUT_DIR)
+    private String outputDirectory;
+    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
+    private String focusSpecies;
+    private BioCDocumentPopulator bioCDocumentPopulator;
+    private GNormPlusMultiplierLogic multiplierLogic;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
+        geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
+        try {
+            bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
+        } catch (ClassNotFoundException e) {
+            log.error("Gene annotation class {} could not be found.", geneTypeName, e);
+            throw new ResourceInitializationException(e);
+        }
+        try {
+            multiplierLogic = new GNormPlusMultiplierLogic(aContext, bioCDocumentPopulator, () -> super.hasNext(), () -> {
+                try {
+                    return (JCas) super.next();
+                } catch (AnalysisEngineProcessException e) {
+                    log.error("Error when calling next() of the base multiplier.");
+                    throw new RuntimeException(e);
+                }
+            }, () -> getEmptyJCas());
+        } catch (IOException e) {
+            log.error("Could not initialize GNormPlus", e);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+        try {
+            return multiplierLogic.hasNext();
+        } catch (Throwable t) {
+            log.error("Error when checking hasNext() on multiplier", t);
+        }
+        return false;
+    }
+
+    @Override
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        try {
+            return multiplierLogic.next();
+        } catch (Throwable t) {
+            log.error("Error when retrieving next multiplier CAS", t);
+            throw new AnalysisEngineProcessException(t);
+        }
+    }
+}
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java
similarity index 72%
rename from jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
rename to jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java
index e7f301356..b95aad464 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusDBMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java
@@ -1,6 +1,5 @@
 package de.julielab.jcore.multiplier.gnp;
 
-import com.pengyifan.bioc.BioCCollection;
 import de.julielab.jcore.ae.gnp.GNormPlusAnnotator;
 import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
 import de.julielab.jcore.reader.xmi.XmiDBMultiplier;
@@ -18,19 +17,19 @@
 
 import java.io.IOException;
 import java.util.Optional;
-import java.util.function.Function;
 
 import static de.julielab.jcore.ae.gnp.GNormPlusAnnotator.DESC_FOCUS_SPECIES;
 
-@ResourceMetaData(name = "JCoRe GNormPlus Database Multiplier", description = "A CAS multiplier to be used with the DB XMI multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
+@ResourceMetaData(name = "JCoRe GNormPlus XMI Database Multiplier", description = "A CAS multiplier to be used with the DB XMI multiplier reader in place of the DB XMI multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
 @TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
-public class GNormPlusDBMultiplier extends XmiDBMultiplier {
+public class GNormPlusXmiDBMultiplier extends XmiDBMultiplier {
     public static final String PARAM_ADD_GENES = GNormPlusAnnotator.PARAM_ADD_GENES;
     public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
     public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
     public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
     public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
-    private final static Logger log = LoggerFactory.getLogger(GNormPlusDBMultiplier.class);
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusXmiDBMultiplier.class);
+    private static boolean shutdownHookInstalled = false;
     @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
     private boolean addGenes;
     @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = GNormPlusAnnotator.DESC_GNP_SETUP_FILE)
@@ -41,16 +40,9 @@ public class GNormPlusDBMultiplier extends XmiDBMultiplier {
     private String outputDirectory;
     @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
     private String focusSpecies;
-
     private BioCDocumentPopulator bioCDocumentPopulator;
-//    private BioCCasPopulator bioCCasPopulator;
-
-    private BioCCollection currentGNormPlusProcessedCollection;
-//    private int currentCollectionIndex;
-//    private List<byte[]> cachedCasData;
-
     private GNormPlusMultiplierLogic multiplierLogic;
-private static boolean shutdownHookInstalled = false;
+
     @Override
     public void initialize(UimaContext aContext) throws ResourceInitializationException {
         super.initialize(aContext);
@@ -75,20 +67,6 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
             log.error("Could not initialize GNormPlus", e);
             throw new ResourceInitializationException(e);
         }
-        synchronized (GNormPlusDBMultiplier.class) {
-            final Runtime rt = Runtime.getRuntime();
-            rt.addShutdownHook(new Thread() {
-                @Override
-                public void run() {
-                    super.run();
-                    final long totalMemory = rt.totalMemory();
-                    final long freeMemory = rt.freeMemory();
-                    final long maxMemory = rt.maxMemory();
-                    Function<Long, Double> b2g = bytes -> bytes / 1000000000d;
-                    System.out.println("[Shutdow hook] Free memory: " + freeMemory + "bytes (" + b2g.apply(freeMemory) + "GB), max memory: " + maxMemory + "bytes ("+b2g.apply(maxMemory) + "GB), total memory: " + totalMemory + "bytes ("+b2g.apply(totalMemory) + "GB)");
-                }
-            });
-        }
     }
 
     @Override
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
similarity index 98%
rename from jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
rename to jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
index 6a5e4a666..64e71d45d 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
@@ -2,9 +2,9 @@
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.multiplier.gnp.GNormPlusDBMultiplier</annotatorImplementationName>
+    <annotatorImplementationName>de.julielab.jcore.multiplier.gnp.GNormPlusXmiDBMultiplier</annotatorImplementationName>
     <analysisEngineMetaData>
-        <name>JCoRe GNormPlus Database Multiplier</name>
+        <name>JCoRe GNormPlus XMI Database Multiplier</name>
         <description>A CAS multiplier to be used with the DB XMI multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
new file mode 100644
index 000000000..07cc4a817
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
@@ -0,0 +1,141 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.multiplier.gnp.GNormPlusXMLDBMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe GNormPlus XML Database Multiplier</name>
+        <description>A CAS multiplier to be used with the DB XML multiplier reader in place of the DB XML multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>AddGenes</name>
+                <description>If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the GeneTypeName parameter.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GNormPlusSetupFile</name>
+                <description>File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GeneTypeName</name>
+                <description>The UIMA type denoting gene annotations that should be written into the BioC format when the AddGenes parameter is set to true.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OutputDirectory</name>
+                <description>Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>FocusSpecies</name>
+                <description>If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>RowMapping</name>
+                <description>In case that the CoStoSys active table schema specified more than two columns to be retrieved, the other columns need a mapping into the CAS.A mapping item has the following form: &lt;column index&gt;=&lt;uima type&gt;#&lt;type feature&gt;:&lt;feature datatype&gt;:defaultValue where the defaultValue is optional. Example: 2=de.julielab.jules.types.max_xmi_id#id:int:0 maps the content of the third (index 2, zero-based) retrieved column (may also belong to an additional table!) to feature "id" of the type "d.j.j.t.max_xmi_id" which is an int. In case there is no value returned from the database for a document, use a 0 as default.</description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>MappingFile</name>
+                <description>An XML mapping file following the specification required by the jcore-xml-mapper. The mapping file specifies how contents from an XML docuent are to be brought into the CAS.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>true</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AddShaHash</name>
+                <description>For use with AnnotationDefinedFlowController and XMIDBWriter. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the ToVisitKeys parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'. Additionally, the DBProcessingMetaData#hasDocumentHashChanged is set. This can be used by the XMIDBWriter to omit the reset of mirror subsets when updating the base document when the actual CAS text stayed the same.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentTable</name>
+                <description>For use with AnnotationDefinedFlowController. String parameter indicating the name of the table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentTableSchema</name>
+                <description>For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the DocumentTable parameter - adheres to. Only the primary key part is required for hash value retrieval.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>ToVisitKeys</name>
+                <description>For use with AnnotationDefinedFlowController. Specifies the delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. The task of the AnnotationDefinedFlowController is then to read those annotations and route the CAS accordingly.</description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AddToVisitKeys</name>
+                <description>Toggles the creation of annotations for the AnnotationDefinedFlowController. Only needed when such a flow controller is used in the pipeline. For details, see the description of ToVisitKeys.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AddUnchangedDocumentTextFlag</name>
+                <description>Toggles the addition of the 'document text is unchanged' flag. The value of this flag is determined via a SHA256 hash of the CAS document text. When DocumentTable and DocumentTableSchema are specified, the hash value of the document in storage is retrieved and compared to the current value. The flag is then set with respect to the comparison result.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>AddGenes</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>GeneTypeName</name>
+                <value>
+                    <string>de.julielab.jcore.types.Gene</string>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs>
+                    <type>de.julielab.jcore.types.ConceptMention</type>
+                    <type>de.julielab.jcore.types.Organism</type>
+                </outputs>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file

From 08cc57a3fe0ddc123963329bc549bcceeb09de67 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 13 Oct 2022 16:39:02 +0200
Subject: [PATCH 244/269] Add a parameter to omit GNP processing in case the
 document text was unchanged.

Also, add a used type system to the GNP PM DB multiplier descriptor.
---
 .../gnp/GNormPlusBioCMultiplier.java          |  3 +-
 .../gnp/GNormPlusMultiplierLogic.java         | 45 +++++++++++++------
 .../gnp/GNormPlusXMLDBMultiplier.java         |  7 ++-
 .../gnp/GNormPlusXmiDBMultiplier.java         |  3 +-
 .../jcore-gnormplus-xmi-db-multiplier.xml     |  1 +
 .../jcore-gnormplus-xml-db-multiplier.xml     |  7 +++
 .../jcore/ae/jsbd/main/SentenceAnnotator.java |  6 +++
 .../jcore/reader/pmc/PMCReaderTest.java       | 13 ------
 .../jcore-document-meta-extension-types.xml   |  2 +-
 9 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
index 24e758523..5e8eebab4 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusBioCMultiplier.java
@@ -73,7 +73,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
                     log.error("Error when calling next() of the base multiplier.");
                     throw new RuntimeException(e);
                 }
-            }, () -> getEmptyJCas());
+            }, () -> getEmptyJCas(),
+                    false);
         } catch (IOException e) {
             log.error("Could not initialize GNormPlus", e);
             throw new ResourceInitializationException(e);
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index 2337358a3..6ee4f2bf2 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -5,11 +5,13 @@
 import de.julielab.jcore.ae.gnp.GNormPlusProcessing;
 import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
 import de.julielab.jcore.reader.BioCCasPopulator;
+import de.julielab.jcore.types.ext.DBProcessingMetaData;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.AbstractCas;
 import org.apache.uima.cas.impl.XmiCasDeserializer;
 import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -40,9 +42,12 @@ public class GNormPlusMultiplierLogic {
     private Supplier<JCas> baseMultiplierNext;
     private Supplier<JCas> multiplierGetEmptyCas;
     private int currentCollectionIndex;
+    private int currentBiocResultCollectionIndex;
     private List<byte[]> cachedCasData;
+    private boolean skipUnchangedDocuments;
 
-    public GNormPlusMultiplierLogic(UimaContext aContext, BioCDocumentPopulator bioCDocumentPopulator, Supplier<Boolean> baseMultiplierHasNext, Supplier<JCas> baseMultiplierNext, Supplier<JCas> multiplierGetEmptyCas) throws IOException {
+    public GNormPlusMultiplierLogic(UimaContext aContext, BioCDocumentPopulator bioCDocumentPopulator, Supplier<Boolean> baseMultiplierHasNext, Supplier<JCas> baseMultiplierNext, Supplier<JCas> multiplierGetEmptyCas, boolean skipUnchangedDocuments) throws IOException {
+        this.skipUnchangedDocuments = skipUnchangedDocuments;
         String setupFile = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GNP_SETUP_FILE)).orElse("/de/julielab/jcore/ae/gnp/config/setup_do_ner.txt");
         String focusSpecies = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FOCUS_SPECIES)).orElse("");
         outputDirectory = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_OUTPUT_DIR)).orElse("");
@@ -52,6 +57,7 @@ public GNormPlusMultiplierLogic(UimaContext aContext, BioCDocumentPopulator bioC
         this.multiplierGetEmptyCas = multiplierGetEmptyCas;
         cachedCasData = new ArrayList<>();
         currentCollectionIndex = 0;
+        currentBiocResultCollectionIndex = 0;
 
         GNormPlusProcessing.initializeGNormPlus(setupFile, focusSpecies);
     }
@@ -72,8 +78,12 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                 cachedCasData.clear();
                 while (baseMultiplierHasNext.get()) {
                     final JCas jCas = baseMultiplierNext.get();
-                    final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
-                    gnormPlusInputCollection.addDocument(bioCDocument);
+                    final boolean isDocumentHashUnchanged = JCasUtil.selectSingle(jCas, DBProcessingMetaData.class).getIsDocumentHashUnchanged();
+                    // skip document if it is unchanged and skipping is enabled
+                    if (!(isDocumentHashUnchanged && skipUnchangedDocuments)) {
+                        final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
+                        gnormPlusInputCollection.addDocument(bioCDocument);
+                    }
                     try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
                         try (final GZIPOutputStream os = new GZIPOutputStream(baos)) {
                             XmiCasSerializer.serialize(jCas.getCas(), os);
@@ -86,16 +96,18 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                     }
                 }
                 // now process the whole batch with GNP
-                final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
-                try {
-                    bioCCasPopulator = new BioCCasPopulator(outputFilePath);
-                    // delete the GNP output if we don't want to keep it
-                    if(outputDirectory.isBlank()) {
-                        Files.delete(outputFilePath);
+                if (gnormPlusInputCollection.getDocmentCount() > 0) {
+                    final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
+                    try {
+                        bioCCasPopulator = new BioCCasPopulator(outputFilePath);
+                        // delete the GNP output if we don't want to keep it
+                        if (outputDirectory.isBlank()) {
+                            Files.delete(outputFilePath);
+                        }
+                    } catch (XMLStreamException | IOException e) {
+                        log.error("Could not read GNormPlus output from {}", outputFilePath);
+                        throw new AnalysisEngineProcessException(e);
                     }
-                } catch (XMLStreamException | IOException e) {
-                    log.error("Could not read GNormPlus output from {}", outputFilePath);
-                    throw new AnalysisEngineProcessException(e);
                 }
             }
             // Now we have a batch of documents processed with GNP. Get the next document from the cache and
@@ -108,8 +120,13 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                 log.error("Could not deserialize cached CAS data");
                 throw new AnalysisEngineProcessException(e);
             }
-            bioCCasPopulator.populateWithNextDocument(jCas, true);
-            bioCCasPopulator.clearDocument(currentCollectionIndex);
+            final boolean isDocumentHashUnchanged = JCasUtil.selectSingle(jCas, DBProcessingMetaData.class).getIsDocumentHashUnchanged();
+            // If the document is unchanged and we skip unchanged documents, we do not have a GNormPlus result for this
+            // document, skip.
+            if (!(isDocumentHashUnchanged && skipUnchangedDocuments)) {
+                bioCCasPopulator.populateWithNextDocument(jCas, true);
+                bioCCasPopulator.clearDocument(currentBiocResultCollectionIndex++);
+            }
             cachedCasData.set(currentCollectionIndex, null);
             ++currentCollectionIndex;
 
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java
index c00192b47..9d58264b1 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXMLDBMultiplier.java
@@ -28,6 +28,7 @@ public class GNormPlusXMLDBMultiplier extends XMLDBMultiplier {
     public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
     public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
     public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
+    public static final String PARAM_SKIP_UNCHANGED_DOCUMENTS = "SkipUnchangedDocuments";
     private final static Logger log = LoggerFactory.getLogger(GNormPlusXMLDBMultiplier.class);
     private static boolean shutdownHookInstalled = false;
     @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
@@ -40,6 +41,8 @@ public class GNormPlusXMLDBMultiplier extends XMLDBMultiplier {
     private String outputDirectory;
     @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
     private String focusSpecies;
+    @ConfigurationParameter(name = PARAM_SKIP_UNCHANGED_DOCUMENTS, mandatory = false, description = "Whether to omit GNormPlus processing on documents that already exist in the XMI database table and whose document text has not changed.")
+    private boolean skipUnchangedDocuments;
     private BioCDocumentPopulator bioCDocumentPopulator;
     private GNormPlusMultiplierLogic multiplierLogic;
 
@@ -48,6 +51,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         super.initialize(aContext);
         addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
         geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
+        skipUnchangedDocuments = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_SKIP_UNCHANGED_DOCUMENTS)).orElse(false);
         try {
             bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
         } catch (ClassNotFoundException e) {
@@ -62,7 +66,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
                     log.error("Error when calling next() of the base multiplier.");
                     throw new RuntimeException(e);
                 }
-            }, () -> getEmptyJCas());
+            }, () -> getEmptyJCas(),
+                skipUnchangedDocuments);
         } catch (IOException e) {
             log.error("Could not initialize GNormPlus", e);
             throw new ResourceInitializationException(e);
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java
index b95aad464..e90905e83 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusXmiDBMultiplier.java
@@ -62,7 +62,8 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
                     log.error("Error when calling next() of the base multiplier.");
                     throw new RuntimeException(e);
                 }
-            }, () -> getEmptyJCas());
+            }, () -> getEmptyJCas(),
+                    false);
         } catch (IOException e) {
             log.error("Could not initialize GNormPlus", e);
             throw new ResourceInitializationException(e);
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
index 64e71d45d..351cc8c56 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
@@ -83,6 +83,7 @@
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
                 <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
index 07cc4a817..a0c7ce3e3 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
@@ -43,6 +43,13 @@
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>SkipUnchangedDocuments</name>
+                <description>Whether to omit GNormPlus processing on documents that already exist in the XMI database table and whose document text has not changed.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
             <configurationParameter>
                 <name>RowMapping</name>
                 <description>In case that the CoStoSys active table schema specified more than two columns to be retrieved, the other columns need a mapping into the CAS.A mapping item has the following form: &lt;column index&gt;=&lt;uima type&gt;#&lt;type feature&gt;:&lt;feature datatype&gt;:defaultValue where the defaultValue is optional. Example: 2=de.julielab.jules.types.max_xmi_id#id:int:0 maps the content of the third (index 2, zero-based) retrieved column (may also belong to an additional table!) to feature "id" of the type "d.j.j.t.max_xmi_id" which is an int. In case there is no value returned from the database for a document, use a 0 as default.</description>
diff --git a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
index 0cd7354c5..583db41a1 100644
--- a/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
+++ b/jcore-jsbd-ae/src/main/java/de/julielab/jcore/ae/jsbd/main/SentenceAnnotator.java
@@ -29,6 +29,7 @@
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.text.AnnotationIndex;
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
@@ -150,6 +151,11 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException {
             if (StringUtils.isBlank(aJCas.getDocumentText())) {
                 final String docId = JCoReTools.getDocId(aJCas);
                 LOGGER.warn("The document text of document {} is empty.", docId);
+                final AnnotationIndex<Annotation> annotationIndex = aJCas.getAnnotationIndex();
+                LOGGER.warn("All annotations in CAS:");
+                for (Annotation a : annotationIndex) {
+                    System.out.println(a);
+                }
                 return;
             }
             JCoReCondensedDocumentText documentText;
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
index 3c7a8dc9a..695b2918c 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
@@ -34,19 +34,6 @@
 
 public class PMCReaderTest {
 
-    @Test
-    public void testErrordoc() throws Exception {
-        // read a single file, parse it and right it to XMI for manual review
-        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
-                "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
-        CollectionReader reader = CollectionReaderFactory.createReader(PMCReader.class, PMCReader.PARAM_INPUT,
-                "errordoc.xml");
-        while (reader.hasNext()) {
-            reader.getNext(cas.getCas());
-            System.out.println(cas.getDocumentText());
-        }
-    }
-
     @Test
     public void testPmcReader1() throws Exception {
         // read a single file, parse it and right it to XMI for manual review
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
index 200ff0383..ec60f5b33 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
@@ -27,7 +27,7 @@
         </featureDescription>
         <featureDescription>
           <name>isDocumentHashUnchanged</name>
-          <description>For use by the XMIDBWriter. Used to prohibit that mirror subsets reset to 'not processed' for this document when there was no change in the document text. That allows to update the base document without indicating that a reprocessing is required. This is useful when the document is updated by the distributor (e.g. PubMed) but the text contents have not changed.</description>
+          <description>To be set by the XML DB reader/multiplier and to be used (among others) by the XMIDBWriter. Used to prohibit that mirror subsets reset to 'not processed' for this document when there was no change in the document text. That allows to update the base document without indicating that a reprocessing is required. This is useful when the document is updated by the distributor (e.g. PubMed) but the text contents have not changed.</description>
           <rangeTypeName>uima.cas.Boolean</rangeTypeName>
         </featureDescription>
       </features>

From f355d7f0da071e57605dfcc12a5be13c2d484b73 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 13 Oct 2022 16:41:22 +0200
Subject: [PATCH 245/269] Add the casflow types to the GNP DB XML multiplier
 descriptor.

---
 .../multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
index a0c7ce3e3..36c71c403 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
@@ -126,6 +126,8 @@
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
                 <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-casflow-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>

From ff76bf730a56cbba290cb66880ecedfb5d3c6428 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Thu, 13 Oct 2022 16:45:55 +0200
Subject: [PATCH 246/269] Reset the `currentBiocResultCollectionIndex` when
 reading the next batch of documents from the database.

Otherwise the index would be too high at some time and cause an index out of bounds error.
---
 .../julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index 6ee4f2bf2..a6d178dfb 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -69,6 +69,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
             // batch.
             if (bioCCasPopulator == null || bioCCasPopulator.documentsLeftInCollection() == 0) {
                 currentCollectionIndex = 0;
+                currentBiocResultCollectionIndex = 0;
                 final BioCCollection gnormPlusInputCollection = GNormPlusProcessing.createEmptyJulieLabBioCCollection();
                 // We first retrieve the whole current batch from the super multiplier and serialize the CASes
                 // to XMI. We do that because we only have one CAS at a time and, thus, must store the data

From d2035293410dd9e89c30dfe01b4a542c64a51c04 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 21 Oct 2022 11:54:49 +0200
Subject: [PATCH 247/269] Disable 'trivial XML whitespaces' per VTD-XML by
 default.

Such trivial whitespaces are whitespace characters, including newlines, that only server XML formatting purposes. Note, however, that VTD XML will also ignore white spaces between inline-XML tags such as `<e>some text</e> <e>some more text</e>`. Thus, the previous example would result in the document text `some textsome more text`. To handle such cases, the XML Mapper now has a property `ignoreTrivialWhitespaces` that toggles this behaviour.
---
 .../mapper/StructuredAbstractParser.java      |   2 +-
 .../reader/xmlmapper/mapper/XMLMapper.java    |  19 +-
 .../jcore/reader/xmlmapper/XMLReader.java     |   4 +
 .../jcore/reader/xmlmapper/XMLReaderTest.java |  12 ++
 ...MLReaderDescriptor_medline_singleFile2.xml |  12 ++
 .../resources/doc_medline_mathml_newlines.xml | 171 ++++++++++++++++++
 6 files changed, 218 insertions(+), 2 deletions(-)
 create mode 100644 jcore-xml-mapper/src/test/resources/doc_medline_mathml_newlines.xml

diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
index ce46c09f6..84efc4d6b 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/StructuredAbstractParser.java
@@ -60,7 +60,7 @@ public List<String> parseDocumentPart(VTDNav vn, PartOfDocument docTextPart, int
             Map<String, Object> abstractSectionData = rowIterator.next();
             String label = (String) abstractSectionData.get("Label");
             String nlmCategory = (String) abstractSectionData.get("NlmCategory");
-            String abstractSectionText = (String) abstractSectionData.get("AbstractText");
+            String abstractSectionText = ((String) abstractSectionData.get("AbstractText"));
             if (newlineBetweenSections) {
                 // in case the last section was empty, we delete the trailing
                 // newline
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
index 603c91ea8..22de77305 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
@@ -49,6 +49,23 @@ public class XMLMapper {
 
 	private DocumentTextHandler documentTextHandler;
 
+	private boolean ignoreTrivialWhitespaces;
+
+	/**
+	 * <p>
+	 * Whether or not to ignore trivial XML whitespaces and newlines according to {@link VTDGen#enableIgnoredWhiteSpace(boolean)}.
+	 * </p>
+	 * <p>
+	 * Activating this will ignore whitespaces that exist between XML tags and have no other character data.
+	 * This is not always desired behavior. Inline-annotated text may contain whitespaces between two tags that
+	 * should actually retained in the document text.
+	 * </p>
+	 * @param ignoreTrivialWhitespaces
+	 */
+	public void setIgnoreTrivialWhitespaces(boolean ignoreTrivialWhitespaces) {
+		this.ignoreTrivialWhitespaces = ignoreTrivialWhitespaces;
+	}
+
 	/**
 	 * Creates an new instacne of the XMLMapper
 	 * 
@@ -80,7 +97,7 @@ public void parse(byte[] data, byte[] identifier, JCas jcas) {
 			// needed for extraction of mixed-content-XML
 			// when there is a whitespace only between two
 			// tags, e.g. ...</s> <s id=".">...
-			vg.enableIgnoredWhiteSpace(true);
+			vg.enableIgnoredWhiteSpace(!ignoreTrivialWhitespaces);
 			vg.setDoc(data);
 			vg.parse(true);
 			VTDNav vn = vg.getNav();
diff --git a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReader.java b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReader.java
index b24c27a13..63ee44bbe 100644
--- a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReader.java
+++ b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReader.java
@@ -32,6 +32,7 @@
 import java.io.*;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
 
 /**
  * Generic XML {@link CollectionReader}. Uses a mapping file to map elements of the XML document to
@@ -44,6 +45,7 @@ public class XMLReader extends CollectionReader_ImplBase {
 	private static final Logger LOGGER = LoggerFactory.getLogger(XMLReader.class);
 	public static final String PARAM_INPUT_DIR = "InputDirectory";
 	public static final String PARAM_INPUT_FILE = "InputFile";
+	public static final String PARAM_IGNORE_TRIVIAL_WS = "IgnoreTrivialWS";
 	public static final String RESOURCE_MAPPING_FILE = "MappingFile";
 	private List<File> files = null;
 	private int currentIndex = 0;
@@ -59,6 +61,7 @@ public void initialize() throws ResourceInitializationException {
 		
 		String inputDir = (String) getUimaContext().getConfigParameterValue(PARAM_INPUT_DIR);
 		String inputFile = (String) getUimaContext().getConfigParameterValue(PARAM_INPUT_FILE);
+		boolean ignoreTrivialWs = (boolean) Optional.ofNullable(getUimaContext().getConfigParameterValue(PARAM_IGNORE_TRIVIAL_WS)).orElse(true);
 		InputStream is = null;
 		try {
 			is = getUimaContext().getResourceAsStream(RESOURCE_MAPPING_FILE);
@@ -101,6 +104,7 @@ public void initialize() throws ResourceInitializationException {
 		
 		try {
 			xmlMapper = new XMLMapper(JulieXMLTools.readStream(is, 1000));
+			xmlMapper.setIgnoreTrivialWhitespaces(ignoreTrivialWs);
 		} catch (FileNotFoundException e) {
 			throw new ResourceInitializationException(e);
 		} catch (IOException e) {
diff --git a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java
index ef926761a..62de41982 100644
--- a/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java
+++ b/jcore-xml-mapper/src/test/java/de/julielab/jcore/reader/xmlmapper/XMLReaderTest.java
@@ -174,6 +174,7 @@ public XMLReaderTest() {
 
 	@Test
 	public void testSingleEntityData() throws Throwable {
+		//
 		medlineReader = createCollectionReader("src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml");
 		try {
 			assertTrue(medlineReader.hasNext());
@@ -1186,4 +1187,15 @@ public void testStructuredAbstract() throws UIMAException, IOException {
 		// exists both).
         // EF March 2018: Haven't I done this already? Structured abstracts are handled
 	}
+
+	@Test
+	public void testNewlines() throws UIMAException, IOException {
+		JCas jCas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+				"de.julielab.jcore.types.jcore-document-structure-types");
+		CollectionReader reader = CollectionReaderFactory.createReader(XMLReader.class, XMLReader.PARAM_INPUT_FILE,
+				"src/test/resources/doc_medline_mathml_newlines.xml", XMLReader.RESOURCE_MAPPING_FILE,
+				"src/test/resources/newMappingFile.xml");
+		reader.getNext(jCas.getCas());
+		System.out.printf(jCas.getDocumentText());
+	}
 }
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
index fdc051f37..f1872c028 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
@@ -28,6 +28,12 @@
         <multiValued>false</multiValued>
         <mandatory>true</mandatory>
       </configurationParameter>
+      <configurationParameter>
+        <name>IgnoreTrivialWS</name>
+        <type>Boolean</type>
+        <multiValued>false</multiValued>
+        <mandatory>false</mandatory>
+      </configurationParameter>
     </configurationParameters>
     <configurationParameterSettings>
       <nameValuePair>
@@ -42,6 +48,12 @@
           <string>src/test/resources/pubmedDocumentTag/testfile.xml</string>
         </value>
       </nameValuePair>
+      <nameValuePair>
+        <name>IgnoreTrivialWS</name>
+        <value>
+          <boolean>false</boolean>
+        </value>
+      </nameValuePair>
     </configurationParameterSettings>
     <typeSystemDescription>
       <imports>
diff --git a/jcore-xml-mapper/src/test/resources/doc_medline_mathml_newlines.xml b/jcore-xml-mapper/src/test/resources/doc_medline_mathml_newlines.xml
new file mode 100644
index 000000000..7b1159592
--- /dev/null
+++ b/jcore-xml-mapper/src/test/resources/doc_medline_mathml_newlines.xml
@@ -0,0 +1,171 @@
+<MedlineCitation Status="MEDLINE" IndexingMethod="Curated" Owner="NLM">
+    <PMID Version="1">30712376</PMID>
+    <DateCompleted>
+        <Year>2019</Year>
+        <Month>07</Month>
+        <Day>01</Day>
+    </DateCompleted>
+    <DateRevised>
+        <Year>2020</Year>
+        <Month>12</Month>
+        <Day>15</Day>
+    </DateRevised>
+    <Article PubModel="Print-Electronic">
+        <Journal>
+            <ISSN IssnType="Electronic">1029-2454</ISSN>
+            <JournalIssue CitedMedium="Internet">
+                <Volume>35</Volume>
+                <Issue>1</Issue>
+                <PubDate>
+                    <Year>2019</Year>
+                    <Month>01</Month>
+                </PubDate>
+            </JournalIssue>
+            <Title>Biofouling</Title>
+            <ISOAbbreviation>Biofouling</ISOAbbreviation>
+        </Journal>
+        <ArticleTitle>An investigation into the effects of marine biofilm on the roughness and drag characteristics of surfaces coated with different sized cuprous oxide (Cu<sub>2</sub>O) particles.</ArticleTitle>
+        <Pagination>
+            <MedlinePgn>15-33</MedlinePgn>
+        </Pagination>
+        <ELocationID EIdType="doi" ValidYN="Y">10.1080/08927014.2018.1559305</ELocationID>
+        <Abstract>
+            <AbstractText>Biofilms typically increase surface roughness and consequently the drag penalties on marine vessels. However, there is a lack of data regarding the time-dependent influence of biofilms on antifouling surface characteristics and frictional drag, especially for surface coatings with different sizes of cuprous oxide (<mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" display="inline">
+                <mml:msub>
+                    <mml:mrow>
+                        <mml:mtext>Cu</mml:mtext>
+                    </mml:mrow>
+                    <mml:mrow>
+                        <mml:mn>2</mml:mn>
+                    </mml:mrow>
+                </mml:msub>
+                <mml:mi mathvariant="normal">O</mml:mi>
+            </mml:math>). In this study, a series of pressure drop measurements was carried out using flat plates coated with different sizes of <mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" display="inline">
+                <mml:msub>
+                    <mml:mrow>
+                        <mml:mtext>Cu</mml:mtext>
+                    </mml:mrow>
+                    <mml:mrow>
+                        <mml:mn>2</mml:mn>
+                    </mml:mrow>
+                </mml:msub>
+                <mml:mi mathvariant="normal">O</mml:mi>
+            </mml:math>. The cuprous oxide-containing surfaces were deployed at sea for a period of six months to allow biofilm to develop. Surface microstructure and roughness analyses were carried out every six weeks using scanning electron microscopy and laser roughness surface profilometry. From the data, the added frictional drag caused by biofilm on ships was predicted, based on roughness function using Granville extrapolations. The analyses indicated that biofilms had significant impacts by altering the surface microstructure, resulting in higher frictional drag. However, due to the interaction between the biofilm and the physico-chemical properties of the substratum for panels coated with larger <mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" display="inline">
+                <mml:msub>
+                    <mml:mrow>
+                        <mml:mtext>Cu</mml:mtext>
+                    </mml:mrow>
+                    <mml:mrow>
+                        <mml:mn>2</mml:mn>
+                    </mml:mrow>
+                </mml:msub>
+                <mml:mi mathvariant="normal">O</mml:mi>
+            </mml:math>, the roughness and drag measurement results were both found to have fluctuating increments.</AbstractText>
+        </Abstract>
+        <AuthorList CompleteYN="Y">
+            <Author ValidYN="Y">
+                <LastName>Li</LastName>
+                <ForeName>Chang</ForeName>
+                <Initials>C</Initials>
+                <Identifier Source="ORCID">0000-0003-3514-7857</Identifier>
+                <AffiliationInfo>
+                    <Affiliation>Marine, Offshore and Subsea Technology group, School of Engineering, Newcastle University, Newcastle upon Tyne, UK.</Affiliation>
+                </AffiliationInfo>
+            </Author>
+            <Author ValidYN="Y">
+                <LastName>Atlar</LastName>
+                <ForeName>Mehmet</ForeName>
+                <Initials>M</Initials>
+                <AffiliationInfo>
+                    <Affiliation>Department of Naval Architecture Ocean and Marine Engineering, University of Strathclyde, Glasgow, UK.</Affiliation>
+                </AffiliationInfo>
+            </Author>
+            <Author ValidYN="Y">
+                <LastName>Haroutunian</LastName>
+                <ForeName>Maryam</ForeName>
+                <Initials>M</Initials>
+                <AffiliationInfo>
+                    <Affiliation>Marine, Offshore and Subsea Technology group, School of Engineering, Newcastle University, Newcastle upon Tyne, UK.</Affiliation>
+                </AffiliationInfo>
+            </Author>
+            <Author ValidYN="Y">
+                <LastName>Norman</LastName>
+                <ForeName>Rose</ForeName>
+                <Initials>R</Initials>
+                <AffiliationInfo>
+                    <Affiliation>Marine, Offshore and Subsea Technology group, School of Engineering, Newcastle University, Newcastle upon Tyne, UK.</Affiliation>
+                </AffiliationInfo>
+            </Author>
+            <Author ValidYN="Y">
+                <LastName>Anderson</LastName>
+                <ForeName>Colin</ForeName>
+                <Initials>C</Initials>
+                <AffiliationInfo>
+                    <Affiliation>Department of Research and Development, American Chemet Corporation, East Helena, Montana 59635, USA.</Affiliation>
+                </AffiliationInfo>
+            </Author>
+        </AuthorList>
+        <Language>eng</Language>
+        <PublicationTypeList>
+            <PublicationType UI="D016428">Journal Article</PublicationType>
+            <PublicationType UI="D013485">Research Support, Non-U.S. Gov't</PublicationType>
+        </PublicationTypeList>
+        <ArticleDate DateType="Electronic">
+            <Year>2019</Year>
+            <Month>02</Month>
+            <Day>04</Day>
+        </ArticleDate>
+    </Article>
+    <MedlineJournalInfo>
+        <Country>England</Country>
+        <MedlineTA>Biofouling</MedlineTA>
+        <NlmUniqueID>9200331</NlmUniqueID>
+        <ISSNLinking>0892-7014</ISSNLinking>
+    </MedlineJournalInfo>
+    <ChemicalList>
+        <Chemical>
+            <RegistryNumber>789U1901C5</RegistryNumber>
+            <NameOfSubstance UI="D003300">Copper</NameOfSubstance>
+        </Chemical>
+        <Chemical>
+            <RegistryNumber>T8BEA5064F</RegistryNumber>
+            <NameOfSubstance UI="C000520">cuprous oxide</NameOfSubstance>
+        </Chemical>
+    </ChemicalList>
+    <CitationSubset>IM</CitationSubset>
+    <MeshHeadingList>
+        <MeshHeading>
+            <DescriptorName UI="D018441" MajorTopicYN="Y">Biofilms</DescriptorName>
+        </MeshHeading>
+        <MeshHeading>
+            <DescriptorName UI="D003300" MajorTopicYN="N">Copper</DescriptorName>
+            <QualifierName UI="Q000737" MajorTopicYN="Y">chemistry</QualifierName>
+        </MeshHeading>
+        <MeshHeading>
+            <DescriptorName UI="D017276" MajorTopicYN="N">Friction</DescriptorName>
+        </MeshHeading>
+        <MeshHeading>
+            <DescriptorName UI="D008422" MajorTopicYN="N">Materials Testing</DescriptorName>
+        </MeshHeading>
+        <MeshHeading>
+            <DescriptorName UI="D008855" MajorTopicYN="N">Microscopy, Electron, Scanning</DescriptorName>
+        </MeshHeading>
+        <MeshHeading>
+            <DescriptorName UI="D012623" MajorTopicYN="N">Seawater</DescriptorName>
+        </MeshHeading>
+        <MeshHeading>
+            <DescriptorName UI="D013499" MajorTopicYN="Y">Surface Properties</DescriptorName>
+        </MeshHeading>
+    </MeshHeadingList>
+    <KeywordList Owner="NOTNLM">
+        <Keyword MajorTopicYN="Y">added resistance</Keyword>
+        <Keyword MajorTopicYN="Y">antifouling</Keyword>
+        <Keyword MajorTopicYN="Y">biofilm</Keyword>
+        <Keyword MajorTopicYN="Y">frictional drag</Keyword>
+        <Keyword MajorTopicYN="Y">particle size</Keyword>
+        <Keyword MajorTopicYN="Y">pressure drop measurement</Keyword>
+        <Keyword MajorTopicYN="Y">roughness characteristic</Keyword>
+        <Keyword MajorTopicYN="Y">roughness function</Keyword>
+        <Keyword MajorTopicYN="Y">Cuprous oxide (<mml:math xmlns:mml="http://www.w3.org/1998/Math/MathML" display="inline"> 						<mml:msub> 							<mml:mrow> 								<mml:mtext>Cu</mml:mtext> 		</mml:mrow> 							<mml:mrow> 								<mml:mn>2</mml:mn> 							</mml:mrow> 						</mml:msub> 						<mml:mi mathvariant="normal">O</mml:mi> 					</mml:math>)</Keyword>
+    </KeywordList>
+</MedlineCitation>
\ No newline at end of file

From 2d67e76a5ab944aeef3424543bffea1bef8fcd51 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 21 Oct 2022 11:55:12 +0200
Subject: [PATCH 248/269] Remove debug message.

---
 .../multiplier/gnp/GNormPlusMultiplierLogic.java     | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index a6d178dfb..d11dd432a 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -6,6 +6,7 @@
 import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
 import de.julielab.jcore.reader.BioCCasPopulator;
 import de.julielab.jcore.types.ext.DBProcessingMetaData;
+import de.julielab.jcore.utility.JCoReTools;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.AbstractCas;
@@ -83,7 +84,17 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                     // skip document if it is unchanged and skipping is enabled
                     if (!(isDocumentHashUnchanged && skipUnchangedDocuments)) {
                         final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
+//                        try {
+//                            log.info("Checking bioC Document ID: {}", bioCDocument.getID());
+//                        } catch (NullPointerException e) {
+//                            log.error("BioCDocument populated with CAS of document {} does not have an ID. All annotations of that CAS:", JCoReTools.getDocId(jCas));
+//                            for (var a : jCas.getAnnotationIndex()) {
+//                                log.error("{}", a);
+//                            }
+//                        }
                         gnormPlusInputCollection.addDocument(bioCDocument);
+                    } else {
+                        log.trace("Document with ID {} already exists in the XMI database table with unchanged text contents, skipping GNormPlus processing.", JCoReTools.getDocId(jCas));
                     }
                     try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
                         try (final GZIPOutputStream os = new GZIPOutputStream(baos)) {
@@ -98,6 +109,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                 }
                 // now process the whole batch with GNP
                 if (gnormPlusInputCollection.getDocmentCount() > 0) {
+                    log.trace("Processing {} documents with GNormPlus.", gnormPlusInputCollection.getDocmentCount());
                     final Path outputFilePath = GNormPlusProcessing.processWithGNormPlus(gnormPlusInputCollection, outputDirectory);
                     try {
                         bioCCasPopulator = new BioCCasPopulator(outputFilePath);

From 76c8858dbe7befec169621a477ffb3db675bfaa2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 21 Oct 2022 17:28:14 +0200
Subject: [PATCH 249/269] Remove unused field.

---
 .../java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java   | 1 -
 1 file changed, 1 deletion(-)

diff --git a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
index c669e4f92..63d621719 100644
--- a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
+++ b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
@@ -39,7 +39,6 @@ public class PMCDBMultiplier extends DBMultiplier {
     public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
     public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
     public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
-    protected static final byte[] comma = ",".getBytes();
     private final static Logger log = LoggerFactory.getLogger(PMCDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_OMIT_BIB_REFERENCES, mandatory = false, defaultValue = "false", description = "If set to true, references to the bibliography are omitted from the CAS text.")
     protected boolean omitBibReferences;

From ee9e6911e835a1c0c00b0ba2870500eefcd88404 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 21 Oct 2022 17:28:38 +0200
Subject: [PATCH 250/269] Set `ignoreTrivialWhitespaces` to `true` by default
 in the XML mapper code.

---
 .../de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
index 22de77305..3f8e19996 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
@@ -49,7 +49,7 @@ public class XMLMapper {
 
 	private DocumentTextHandler documentTextHandler;
 
-	private boolean ignoreTrivialWhitespaces;
+	private boolean ignoreTrivialWhitespaces = true;
 
 	/**
 	 * <p>

From ed0ed27f1569041f02949e3f53f336eee0ecc172 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 21 Oct 2022 17:31:28 +0200
Subject: [PATCH 251/269] Resolve #152.

---
 jcore-gnormplus-ae/component.meta             |   8 +-
 jcore-gnormplus-ae/pom.xml                    |   5 +
 .../gnp/GNormPlusPMCDBMultiplier.java         |  96 +++++++++++++
 .../jcore-gnormplus-pmc-db-multiplier.xml     | 134 ++++++++++++++++++
 4 files changed, 241 insertions(+), 2 deletions(-)
 create mode 100644 jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusPMCDBMultiplier.java
 create mode 100644 jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml

diff --git a/jcore-gnormplus-ae/component.meta b/jcore-gnormplus-ae/component.meta
index 2314d53be..b44cae343 100644
--- a/jcore-gnormplus-ae/component.meta
+++ b/jcore-gnormplus-ae/component.meta
@@ -1,10 +1,14 @@
 {
     "categories": [
-        "multiplier",
-        "ae"
+        "ae",
+        "multiplier"
     ],
     "description": "Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.",
     "descriptors": [
+        {
+            "category": "multiplier",
+            "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-pmc-db-multiplier"
+        },
         {
             "category": "multiplier",
             "location": "de.julielab.jcore.multiplier.gnp.desc.jcore-gnormplus-bioc-multiplier"
diff --git a/jcore-gnormplus-ae/pom.xml b/jcore-gnormplus-ae/pom.xml
index 17dbaf538..0056bb402 100644
--- a/jcore-gnormplus-ae/pom.xml
+++ b/jcore-gnormplus-ae/pom.xml
@@ -36,6 +36,11 @@
             <artifactId>jcore-xml-db-reader</artifactId>
             <version>${project.parent.version}</version>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-pmc-db-reader</artifactId>
+            <version>${project.parent.version}</version>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-gnormplus</artifactId>
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusPMCDBMultiplier.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusPMCDBMultiplier.java
new file mode 100644
index 000000000..85a16c211
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusPMCDBMultiplier.java
@@ -0,0 +1,96 @@
+package de.julielab.jcore.multiplier.gnp;
+
+import de.julielab.jcore.ae.gnp.GNormPlusAnnotator;
+import de.julielab.jcore.consumer.gnp.BioCDocumentPopulator;
+import de.julielab.jcore.multiplier.pmc.PMCDBMultiplier;
+import de.julielab.jcore.types.Gene;
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
+import org.apache.uima.cas.AbstractCas;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Optional;
+
+import static de.julielab.jcore.ae.gnp.GNormPlusAnnotator.DESC_FOCUS_SPECIES;
+
+@ResourceMetaData(name = "JCoRe GNormPlus PMC Database Multiplier", description = "A CAS multiplier to be used with the DB PMC multiplier reader in place of the DB PMC multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.", vendor = "JULIE Lab Jena, Germany")
+@TypeCapability(inputs = {}, outputs = {"de.julielab.jcore.types.ConceptMention", "de.julielab.jcore.types.Organism"})
+public class GNormPlusPMCDBMultiplier extends PMCDBMultiplier {
+    public static final String PARAM_ADD_GENES = GNormPlusAnnotator.PARAM_ADD_GENES;
+    public static final String PARAM_GENE_TYPE_NAME = GNormPlusAnnotator.PARAM_GENE_TYPE_NAME;
+    public static final String PARAM_OUTPUT_DIR = GNormPlusAnnotator.PARAM_OUTPUT_DIR;
+    public static final String PARAM_GNP_SETUP_FILE = GNormPlusAnnotator.PARAM_GNP_SETUP_FILE;
+    public static final String PARAM_FOCUS_SPECIES = GNormPlusAnnotator.PARAM_FOCUS_SPECIES;
+    public static final String PARAM_SKIP_UNCHANGED_DOCUMENTS = "SkipUnchangedDocuments";
+    private final static Logger log = LoggerFactory.getLogger(GNormPlusPMCDBMultiplier.class);
+    private static boolean shutdownHookInstalled = false;
+    @ConfigurationParameter(name = PARAM_ADD_GENES, mandatory = false, defaultValue = "false", description = GNormPlusAnnotator.DESC_ADD_GENES)
+    private boolean addGenes;
+    @ConfigurationParameter(name = PARAM_GNP_SETUP_FILE, mandatory = false, description = GNormPlusAnnotator.DESC_GNP_SETUP_FILE)
+    private String setupFile;
+    @ConfigurationParameter(name = PARAM_GENE_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.Gene", description = GNormPlusAnnotator.DESC_GENE_TYPE_NAME)
+    private String geneTypeName;
+    @ConfigurationParameter(name = PARAM_OUTPUT_DIR, mandatory = false, description = GNormPlusAnnotator.DESC_OUTPUT_DIR)
+    private String outputDirectory;
+    @ConfigurationParameter(name = PARAM_FOCUS_SPECIES, mandatory = false, description = DESC_FOCUS_SPECIES)
+    private String focusSpecies;
+    @ConfigurationParameter(name = PARAM_SKIP_UNCHANGED_DOCUMENTS, mandatory = false, description = "Whether to omit GNormPlus processing on documents that already exist in the XMI database table and whose document text has not changed.")
+    private boolean skipUnchangedDocuments;
+    private BioCDocumentPopulator bioCDocumentPopulator;
+    private GNormPlusMultiplierLogic multiplierLogic;
+
+    @Override
+    public void initialize(UimaContext aContext) throws ResourceInitializationException {
+        super.initialize(aContext);
+        addGenes = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_GENES)).orElse(false);
+        geneTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_GENE_TYPE_NAME)).orElse(Gene.class.getCanonicalName());
+        skipUnchangedDocuments = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_SKIP_UNCHANGED_DOCUMENTS)).orElse(false);
+        try {
+            bioCDocumentPopulator = new BioCDocumentPopulator(addGenes, geneTypeName);
+        } catch (ClassNotFoundException e) {
+            log.error("Gene annotation class {} could not be found.", geneTypeName, e);
+            throw new ResourceInitializationException(e);
+        }
+        try {
+            multiplierLogic = new GNormPlusMultiplierLogic(aContext, bioCDocumentPopulator, () -> super.hasNext(), () -> {
+                try {
+                    return (JCas) super.next();
+                } catch (AnalysisEngineProcessException e) {
+                    log.error("Error when calling next() of the base multiplier.");
+                    throw new RuntimeException(e);
+                }
+            }, () -> getEmptyJCas(),
+                skipUnchangedDocuments);
+        } catch (IOException e) {
+            log.error("Could not initialize GNormPlus", e);
+            throw new ResourceInitializationException(e);
+        }
+    }
+
+    @Override
+    public boolean hasNext() {
+        try {
+            return multiplierLogic.hasNext();
+        } catch (Throwable t) {
+            log.error("Error when checking hasNext() on multiplier", t);
+        }
+        return false;
+    }
+
+    @Override
+    public AbstractCas next() throws AnalysisEngineProcessException {
+        try {
+            return multiplierLogic.next();
+        } catch (Throwable t) {
+            log.error("Error when retrieving next multiplier CAS", t);
+            throw new AnalysisEngineProcessException(t);
+        }
+    }
+}
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
new file mode 100644
index 000000000..761cc42ae
--- /dev/null
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
@@ -0,0 +1,134 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+    <primitive>true</primitive>
+    <annotatorImplementationName>de.julielab.jcore.multiplier.gnp.GNormPlusPMCDBMultiplier</annotatorImplementationName>
+    <analysisEngineMetaData>
+        <name>JCoRe GNormPlus PMC Database Multiplier</name>
+        <description>A CAS multiplier to be used with the DB PMC multiplier reader in place of the DB PMC multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <vendor>JULIE Lab Jena, Germany</vendor>
+        <configurationParameters>
+            <configurationParameter>
+                <name>AddGenes</name>
+                <description>If set to true, all Gene annotations in the CAS will be added to the BioC documents. The default type used is de.julielab.jcore.types.Gene. This can be changed with the GeneTypeName parameter.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GNormPlusSetupFile</name>
+                <description>File path or class path resource path to the setup.txt file for GNormPlus. If not specified, a default setup file is loaded that expects the Dictionary/ directory directly under the working directory, performs gene recognition with the CRF and thus expects the GNormPlus CRF directory directly under the working directory and maps the found genes to NCBI gene IDs for all organisms.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>GeneTypeName</name>
+                <description>The UIMA type denoting gene annotations that should be written into the BioC format when the AddGenes parameter is set to true.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OutputDirectory</name>
+                <description>Optional. If specified, the GNormPlus output files in BioC format will be saved to the given directory. In this way, this component can be used directly as a BioC XML writer through the GNormPlus algorithm.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>FocusSpecies</name>
+                <description>If given, all gene mentions are assigned to this NCBI taxonomy ID, i.e. species recognition is omitted.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>SkipUnchangedDocuments</name>
+                <description>Whether to omit GNormPlus processing on documents that already exist in the XMI database table and whose document text has not changed.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>OmitBibliographyReferences</name>
+                <description>If set to true, references to the bibliography are omitted from the CAS text.</description>
+                <type>Boolean</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>AddShaHash</name>
+                <description>For use with AnnotationDefinedFlowController. Possible values: document_text, defaults to 'document_text' and thus doesn't need to be specified manually at the moment. This parameter needs to match the value for the same parameter given to the XMIDBWriter in this pipeline. Then, a comparison between the existing hash in the database and the new hash of the CAS read in this pipeline can be made. In case the hashes match, the CAS is directly routed to the components specified in the ToVisitKeys parameter, skipping all other components. Note that this only works with AAEs where the first component is an 'AnnotationControlledFlow'.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentTable</name>
+                <description>For use with AnnotationDefinedFlowController. String parameter indicating the name of the table where the XMI data and, thus, the hash is stored. The name must be schema qualified. Note that in this component, only the ToVisit annotation is created that determines which components to apply to a CAS with matching (unchanged) hash. The logic to actually control the CAS flow is contained in the AnnotationDefinedFlowController.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>DocumentTableSchema</name>
+                <description>For use with AnnotationDefinedFlowController. The name of the schema that the document table - given with the DocumentTable parameter - adheres to. Only the primary key part is required for hash value retrieval.</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>ToVisitKeys</name>
+                <description>For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.</description>
+                <type>String</type>
+                <multiValued>true</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
+        <configurationParameterSettings>
+            <nameValuePair>
+                <name>AddGenes</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>GeneTypeName</name>
+                <value>
+                    <string>de.julielab.jcore.types.Gene</string>
+                </value>
+            </nameValuePair>
+            <nameValuePair>
+                <name>OmitBibliographyReferences</name>
+                <value>
+                    <boolean>false</boolean>
+                </value>
+            </nameValuePair>
+        </configurationParameterSettings>
+        <typeSystemDescription>
+            <imports>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+            </imports>
+        </typeSystemDescription>
+        <fsIndexCollection/>
+        <capabilities>
+            <capability>
+                <inputs/>
+                <outputs>
+                    <type>de.julielab.jcore.types.ConceptMention</type>
+                    <type>de.julielab.jcore.types.Organism</type>
+                </outputs>
+                <languagesSupported/>
+            </capability>
+        </capabilities>
+        <operationalProperties>
+            <modifiesCas>true</modifiesCas>
+            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+            <outputsNewCASes>true</outputsNewCASes>
+        </operationalProperties>
+    </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file

From 90c19c2a7a9fdd4ac00e5b95344c8e264080e281 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Fri, 21 Oct 2022 17:58:52 +0200
Subject: [PATCH 252/269] Add `jcore-casflow-types` to the GNP DB multipliers.

For the `ToVisit` annotation type.
---
 .../multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml    | 1 +
 .../multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
index 761cc42ae..952b7119e 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
@@ -112,6 +112,7 @@
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-casflow-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
index 351cc8c56..9b9c310f0 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
@@ -84,6 +84,7 @@
                 <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-casflow-types"/>
             </imports>
         </typeSystemDescription>
         <fsIndexCollection/>

From d9cd8dec7e14e201a7fd56312dc13fd3c4a01df8 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 1 Nov 2022 10:15:58 +0100
Subject: [PATCH 253/269] Resolve #153.

---
 .../jcore-gnormplus-pmc-db-multiplier.xml     |  7 +++++
 .../jcore/multiplier/pmc/PMCDBMultiplier.java |  6 +++-
 .../pmc/desc/jcore-pmc-db-multiplier.xml      |  7 +++++
 .../PMCDBMultiplierHashComparisonTest.java    | 11 ++++++--
 .../multiplier/pmc/PMCDBMultiplierTest.java   | 18 ++++++++++++
 .../jcore/reader/pmc/CasPopulator.java        | 28 +++++++++++++++----
 .../jcore/reader/xml/CasPopulator.java        | 21 ++++++++++++--
 .../jcore/reader/xml/XMLDBMultiplier.java     |  6 +++-
 .../reader/xmlmapper/mapper/XMLMapper.java    |  3 +-
 9 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
index 952b7119e..a5639b8a7 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
@@ -85,6 +85,13 @@
                 <multiValued>true</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>TruncateAtSize</name>
+                <description>The maximum number of characters allowed in the document text. Characters exceeding this size are discarded. This can be necessary when large documents cannot be handled by subsequent components in the pipeline. Defaults to Integer.MAX_VALUE.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings>
             <nameValuePair>
diff --git a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
index 63d621719..4648f81a2 100644
--- a/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
+++ b/jcore-pmc-db-reader/src/main/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplier.java
@@ -39,6 +39,7 @@ public class PMCDBMultiplier extends DBMultiplier {
     public static final String PARAM_TABLE_DOCUMENT = "DocumentTable";
     public static final String PARAM_TABLE_DOCUMENT_SCHEMA = "DocumentTableSchema";
     public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
+    public static final String PARAM_TRUNCATE_AT_SIZE = "TruncateAtSize";
     private final static Logger log = LoggerFactory.getLogger(PMCDBMultiplier.class);
     @ConfigurationParameter(name = PARAM_OMIT_BIB_REFERENCES, mandatory = false, defaultValue = "false", description = "If set to true, references to the bibliography are omitted from the CAS text.")
     protected boolean omitBibReferences;
@@ -51,6 +52,8 @@ public class PMCDBMultiplier extends DBMultiplier {
     private String xmiStorageDataTableSchema;
     @ConfigurationParameter(name = PARAM_TO_VISIT_KEYS, mandatory = false, description = "For use with AnnotationDefinedFlowController. The delegate AE keys of the AEs this CAS should still applied on although the hash has not changed. Can be null or empty indicating that no component should be applied to the CAS. This is, however, the task of the AnnotationDefinedFlowController.")
     private String[] toVisitKeys;
+    @ConfigurationParameter(name = PARAM_TRUNCATE_AT_SIZE, mandatory = false, description = "The maximum number of characters allowed in the document text. Characters exceeding this size are discarded. This can be necessary when large documents cannot be handled by subsequent components in the pipeline. Defaults to Integer.MAX_VALUE.")
+    private int truncationSize;
 
     private CasPopulator casPopulator;
     private Map<String, String> docId2HashMap;
@@ -63,6 +66,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         documentItemToHash = Optional.ofNullable((String) aContext.getConfigParameterValue(PARAM_ADD_SHA_HASH)).orElse("document_text");
         toVisitKeys = (String[]) aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
         omitBibReferences = Optional.ofNullable((Boolean) aContext.getConfigParameterValue(PARAM_OMIT_BIB_REFERENCES)).orElse(false);
+        truncationSize = Optional.ofNullable((Integer)aContext.getConfigParameterValue(PARAM_TRUNCATE_AT_SIZE)).orElse(Integer.MAX_VALUE);
         // We don't know yet which tables to read. Thus, we leave the row mapping out.
         // We will now once the DBMultiplier#process(JCas) will have been run.
         initialized = false;
@@ -74,7 +78,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         }
 
         try {
-            casPopulator = new CasPopulator(omitBibReferences);
+            casPopulator = new CasPopulator(omitBibReferences, truncationSize);
         } catch (IOException e) {
             String errorMsg = "Could not initialize the PMC CasPopulator.";
             log.error(errorMsg);
diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
index 3193805bf..324fdf489 100644
--- a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
@@ -44,6 +44,13 @@
                 <multiValued>true</multiValued>
                 <mandatory>false</mandatory>
             </configurationParameter>
+            <configurationParameter>
+                <name>TruncateAtSize</name>
+                <description>The maximum number of characters allowed in the document text. Characters exceeding this size are discarded. This can be necessary when large documents cannot be handled by subsequent components in the pipeline. Defaults to Integer.MAX_VALUE.</description>
+                <type>Integer</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
         </configurationParameters>
         <configurationParameterSettings>
             <nameValuePair>
diff --git a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
index a36155dfa..83d7cf9f6 100644
--- a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
+++ b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierHashComparisonTest.java
@@ -103,7 +103,7 @@ private static void prepareSourceXMLTable(DataBaseConnector dbc, CoStoSysConnect
     private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnection conn) throws SQLException {
         // The PMC parser tries to format blocks of content using newlines which makes the test a bit awkward.
         // The test might break if this formatting is changed.
-        String documentTextFmt = "\nThis is text nr %d.\n\n";
+        String documentTextFmt = "This is text nr %d.\n";
         dbc.createTable(TARGET_XMI_TABLE, "xmi_text", "Test table for hash comparison test.");
         dbc.assureColumnsExist(TARGET_XMI_TABLE, List.of(HASH_FIELD_NAME), "text");
         String sql = String.format("INSERT INTO %s (%s,%s,%s,%s,%s) VALUES (?,XMLPARSE(CONTENT ?),?,?,?)", TARGET_XMI_TABLE, DOCID_FIELD_NAME, BASE_DOCUMENT_FIELD_NAME, HASH_FIELD_NAME, MAX_XMI_ID_FIELD_NAME, SOFA_MAPPING_FIELD_NAME);
@@ -111,7 +111,7 @@ private static void prepareTargetXMITable(DataBaseConnector dbc, CoStoSysConnect
         // Note that we only add half of the documents compared to the source XML import. This way we test
         // if the code behaves right when the target document does not yet exist at all.
         for (int i = 0; i < 5; i++) {
-            String xml = String.format(documentTextFmt, i, i);
+            String xml = String.format(documentTextFmt, i);
             ps.setString(1, String.valueOf(i));
             ps.setString(2, xml);
             // For one document in the "target XMI" table we put in a wrong hash. Thus, this document should not trigger
@@ -167,6 +167,8 @@ private JCas prepareCas() throws UIMAException {
 
     @Test
     public void testHashComparison() throws Exception {
+        // This simulates the PMC DB reader output: a cas that lists the primary keys of the 10 source XML documents,
+        // the names of the source XML table, the XMI target table etc.
         JCas jCas = prepareCas();
         TypeSystemDescription tsDesc = TypeSystemDescriptionFactory.createTypeSystemDescription("de.julielab.jcore.types.jcore-document-meta-pubmed-types", "de.julielab.jcore.types.jcore-document-structure-types", "de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types", "de.julielab.jcore.types.extensions.jcore-document-meta-extension-types", "de.julielab.jcore.types.jcore-casflow-types");
         AnalysisEngine engine = AnalysisEngineFactory.createEngine(PMCDBMultiplier.class, tsDesc,
@@ -179,11 +181,14 @@ public void testHashComparison() throws Exception {
         List<String> toVisitKeys = new ArrayList<>();
         while (jCasIterator.hasNext()) {
             JCas newCas = jCasIterator.next();
+            // Collect the ToVisitKeys from each CAS. We expect four CASes to have one, i.e. that the document text
+            // has is the same as already existing in the target XMI document table, we added 5 XMI documents
+            // to the target table and for one we changed the hash code.
             Collection<ToVisit> select = JCasUtil.select(newCas, ToVisit.class);
             select.forEach(tv -> tv.getDelegateKeys().forEach(k -> toVisitKeys.add(k)));
             newCas.release();
         }
-        // There are 4 documents in the target table with the correct hash so we expect the delegate key 4 times
+        // There are 4 documents in the target table with the correct hash, so we expect the delegate key 4 times
         assertThat(toVisitKeys).containsExactly("ThisIsTheVisitKey", "ThisIsTheVisitKey", "ThisIsTheVisitKey", "ThisIsTheVisitKey");
     }
 
diff --git a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java
index f8d65f822..dfba24034 100644
--- a/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java
+++ b/jcore-pmc-db-reader/src/test/java/de/julielab/jcore/multiplier/pmc/PMCDBMultiplierTest.java
@@ -71,6 +71,24 @@ public void next() throws Exception {
         assertThat(docIds).containsExactlyInAnyOrder("PMC6949206", "PMC7511315");
     }
 
+    @Test
+    public void truncateText() throws Exception {
+        AnalysisEngine engine = AnalysisEngineFactory.createEngine(PMCDBMultiplier.class, PMCDBMultiplier.PARAM_TRUNCATE_AT_SIZE, 20);
+        JCasIterator jCasIterator = engine.processAndOutputNewCASes(prepareCas());
+        List<String> documentTexts = new ArrayList<>();
+        List<String> docIds = new ArrayList<>();
+        while (jCasIterator.hasNext()) {
+            JCas newCas = jCasIterator.next();
+            documentTexts.add(newCas.getDocumentText());
+            final String docId = JCasUtil.selectSingle(newCas, Header.class).getDocId();
+            docIds.add(docId);
+            newCas.release();
+        }
+        assertThat(documentTexts).containsExactlyInAnyOrder("pmc\n" +
+                "Rescue of premat", "pmc\n" +
+                "Transcriptomic p");
+    }
+
     /**
      * Creates a JCas and adds a RowBatch for the test documents in the source XML table as well as the data table and subset table and schema names.
      *
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index a3633959b..93bdcdeca 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -16,16 +16,26 @@ public class CasPopulator {
     private final static Logger log = LoggerFactory.getLogger(CasPopulator.class);
     private NxmlDocumentParser nxmlDocumentParser;
     private Iterator<URI> nxmlIterator;
+    private int truncationSize;
 
-    public CasPopulator(Iterator<URI> nxmlIterator, Boolean omitBibReferences) throws IOException {
+    public CasPopulator(Iterator<URI> nxmlIterator, Boolean omitBibReferences, int truncationSize) throws IOException {
         this.nxmlIterator = nxmlIterator;
+        this.truncationSize = truncationSize;
         nxmlDocumentParser = new NxmlDocumentParser();
         String settings = omitBibReferences ? "/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml" : "/de/julielab/jcore/reader/pmc/resources/elementproperties.yml";
         nxmlDocumentParser.loadElementPropertyFile(settings);
     }
 
+    public CasPopulator(Boolean omitBibReferences, int truncationSize) throws IOException {
+        this(null, omitBibReferences, truncationSize);
+    }
+
     public CasPopulator(Boolean omitBibReferences) throws IOException {
-        this(null, omitBibReferences);
+        this(null, omitBibReferences, Integer.MAX_VALUE);
+    }
+
+    public CasPopulator(Iterator<URI> pmcFiles, boolean omitBibReferences) throws IOException {
+        this(pmcFiles, omitBibReferences, Integer.MAX_VALUE);
     }
 
     public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, NoDataAvailableException {
@@ -47,7 +57,13 @@ public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, N
             }
         }
         StringBuilder sb = populateCas(result, new StringBuilder());
-        cas.setDocumentText(sb.toString());
+        final String documentText = sb.toString();
+        cas.setDocumentText(truncateText(documentText));
+    }
+
+    private String truncateText(String documentText) {
+        // Truncate the document text to the given length
+        return documentText.length() > truncationSize ? documentText.substring(0, truncationSize) : documentText;
     }
 
     public void populateCas(InputStream is, JCas cas) throws ElementParsingException, NoDataAvailableException {
@@ -58,8 +74,8 @@ public void populateCas(InputStream is, JCas cas) throws ElementParsingException
         } catch (DocumentParsingException e) {
             throw new NoDataAvailableException(e);
         }
-        StringBuilder sb = populateCas(result, new StringBuilder());
-        cas.setDocumentText(sb.toString());
+        String documentText = populateCas(result, new StringBuilder()).toString();
+        cas.setDocumentText(truncateText(documentText));
     }
 
     /**
@@ -113,7 +129,7 @@ private StringBuilder populateCas(ParsingResult result, StringBuilder sb) {
                 TextParsingResult textParsingResult = (TextParsingResult) result;
                 final String text = textParsingResult.getText();
                 // some special handling for documents that contain formatting tabs, newlines or no-break-spaces in the text
-                boolean textBeginsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(0));
+                boolean textBeginsWithWhitespace = text.isBlank() ? false : Character.isWhitespace(text.charAt(0));
                 boolean sbEndsWithWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length()-1));
                 if (textBeginsWithWhitespace && !sbEndsWithWhitespace)
                     sb.append(" ");
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/CasPopulator.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/CasPopulator.java
index 9dc0e6559..0175ec384 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/CasPopulator.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/CasPopulator.java
@@ -2,10 +2,12 @@
 
 import de.julielab.costosys.dbconnection.DataBaseConnector;
 import de.julielab.jcore.reader.xmlmapper.mapper.XMLMapper;
+import de.julielab.jcore.types.Header;
 import org.apache.uima.jcas.JCas;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -18,13 +20,18 @@ public class CasPopulator {
     private final XMLMapper xmlMapper;
     private Row2CasMapper row2CasMapper;
     private String[] rowMappingArray;
-    private BiConsumer<byte[][], JCas> dbProcessingMetaDataSetter;
+    private int truncationSize;
 
-    public CasPopulator(DataBaseConnector dbc, XMLMapper xmlMapper, Row2CasMapper row2CasMapper, String[] rowMappingArray) {
+    public CasPopulator(DataBaseConnector dbc, XMLMapper xmlMapper, Row2CasMapper row2CasMapper, String[] rowMappingArray, int truncationSize) {
         this.dbc = dbc;
         this.xmlMapper = xmlMapper;
         this.row2CasMapper = row2CasMapper;
         this.rowMappingArray = rowMappingArray;
+        this.truncationSize = truncationSize;
+    }
+
+    public CasPopulator(DataBaseConnector dbc, XMLMapper xmlMapper, Row2CasMapper row2CasMapper, String[] rowMappingArray) {
+        this(dbc, xmlMapper, row2CasMapper, rowMappingArray, Integer.MAX_VALUE);
     }
 
     public void populateCas(JCas jcas, byte[][] arrayArray, BiConsumer<byte[][], JCas> dbProcessingMetaDataSetter) throws CasPopulationException {
@@ -68,7 +75,15 @@ public void populateCas(JCas jcas, byte[][] arrayArray, BiConsumer<byte[][], JCa
         if (LOGGER.isDebugEnabled())
             LOGGER.debug("getNext(CAS), primaryKeyValue = {}", new String(identifier));
         try {
-            xmlMapper.parse(arrayArray[xmlIndex], identifier, jcas);
+            // Populate the XML or truncate it due too exceeding size
+            if (arrayArray[xmlIndex].length <= truncationSize) {
+                xmlMapper.parse(arrayArray[xmlIndex], identifier, jcas);
+            }
+            else {
+                jcas.setDocumentText("This document was truncated in " + getClass().getCanonicalName() + " as part of "+XMLDBMultiplier.class.getCanonicalName() + " because its original size exceeded " + truncationSize + "bytes");
+                final Header header = new Header(jcas);
+                header.setDocId(new String(identifier, StandardCharsets.UTF_8));
+            }
             // Are there additional rows besides the primary key columns and the
             // document XML?
             if (arrayArray.length > (pkIndices.size() + 1)) {
diff --git a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
index 6f0eda6aa..5c0373e24 100644
--- a/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
+++ b/jcore-xml-db-reader/src/main/java/de/julielab/jcore/reader/xml/XMLDBMultiplier.java
@@ -48,6 +48,7 @@ public class XMLDBMultiplier extends DBMultiplier {
     public static final String PARAM_TO_VISIT_KEYS = "ToVisitKeys";
     public static final String PARAM_ADD_TO_VISIT_KEYS = "AddToVisitKeys";
     public static final String PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG = "AddUnchangedDocumentTextFlag";
+    public static final String PARAM_TRUNCATE_AT_SIZE = "TruncateAtSize";
 
     private final static Logger log = LoggerFactory.getLogger(XMLDBMultiplier.class);
     /**
@@ -72,6 +73,8 @@ public class XMLDBMultiplier extends DBMultiplier {
     private boolean addToVisitKeys;
     @ConfigurationParameter(name = PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG, mandatory = false, description = "Toggles the addition of the 'document text is unchanged' flag. The value of this flag is determined via a SHA256 hash of the CAS document text. When " + PARAM_TABLE_DOCUMENT + " and " + PARAM_TABLE_DOCUMENT_SCHEMA + " are specified, the hash value of the document in storage is retrieved and compared to the current value. The flag is then set with respect to the comparison result.")
     private boolean addUnchangedDocumentTextFlag;
+    @ConfigurationParameter(name = PARAM_TRUNCATE_AT_SIZE, mandatory = false, description = "Specify size in bytes of the XML document size. If the document surpasses that size, it is not populated from XMI but given some placeholder information. This can be necessary when large documents cannot be handled by subsequent components in the pipeline.")
+    private int truncationSize;
 
 
     private Row2CasMapper row2CasMapper;
@@ -91,6 +94,7 @@ public void initialize(UimaContext aContext) throws ResourceInitializationExcept
         toVisitKeys = (String[]) aContext.getConfigParameterValue(PARAM_TO_VISIT_KEYS);
         addToVisitKeys = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_TO_VISIT_KEYS)).orElse(false);
         addUnchangedDocumentTextFlag = (boolean) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ADD_UNCHANGED_DOCUMENT_TEXT_FLAG)).orElse(false);
+        truncationSize = Optional.ofNullable((Integer)aContext.getConfigParameterValue(PARAM_TRUNCATE_AT_SIZE)).orElse(Integer.MAX_VALUE);
         // We don't know yet which tables to read. Thus, we leave the row mapping out.
         // We will now once the DBMultiplier#process(JCas) will have been run.
         Initializer initializer = new Initializer(mappingFileStr, null, null);
@@ -127,7 +131,7 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                     // the DBC should be set.
                     if (xmiStorageDataTable != null && !dbc.withConnectionQueryBoolean(d -> d.tableExists(xmiStorageDataTable)))
                         throw new AnalysisEngineProcessException(new IllegalArgumentException("The data table" + xmiStorageDataTable + " to retrieve hash values from for document text change detection does not exist in the database: " + dbc.getDbURL()));
-                    casPopulator = new CasPopulator(dbc, xmlMapper, row2CasMapper, rowMappingArray);
+                    casPopulator = new CasPopulator(dbc, xmlMapper, row2CasMapper, rowMappingArray, truncationSize);
                     initialized = true;
                 }
                 byte[][] documentData = documentDataIterator.next();
diff --git a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
index 3f8e19996..c2875d739 100644
--- a/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
+++ b/jcore-xml-mapper/src/main/java/de/julielab/jcore/reader/xmlmapper/mapper/XMLMapper.java
@@ -30,6 +30,7 @@
 import org.slf4j.LoggerFactory;
 
 import java.io.*;
+import java.nio.charset.StandardCharsets;
 import java.util.List;
 
 /**
@@ -157,7 +158,7 @@ private void buildTypes(byte[] identifier, JCas jcas, VTDNav vn) throws Collecti
 				builder.buildType(concreteType, jcas);
 			}
 		} catch (Exception e) {
-			LOG.error("", e);
+			LOG.error("Exception occurred in document ID {}", new String(identifier, StandardCharsets.UTF_8), e);
 		}
 	}
 }

From 0a171b0bc78462d3ee64cbe9af8de71c73a20294 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 1 Nov 2022 11:31:53 +0100
Subject: [PATCH 254/269] Fix a bug for truncated text where annotations beyond
 the new text length could exist.

---
 .../jcore/reader/pmc/CasPopulator.java        | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 93bdcdeca..54fe00c70 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -10,7 +10,9 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
 
 public class CasPopulator {
     private final static Logger log = LoggerFactory.getLogger(CasPopulator.class);
@@ -57,8 +59,21 @@ public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, N
             }
         }
         StringBuilder sb = populateCas(result, new StringBuilder());
-        final String documentText = sb.toString();
-        cas.setDocumentText(truncateText(documentText));
+       truncateTextAndAnnotations(sb.toString(), cas);
+    }
+
+    private void truncateTextAndAnnotations(String documentText, JCas cas) {
+        String text = documentText.length() > truncationSize ? documentText.substring(0, truncationSize) : documentText;
+        cas.setDocumentText(text);
+        // if truncation happened, we need to remove annotations exceeding the valid text span
+        List<Annotation> toRemove = new ArrayList<>();
+        if (text.length() < documentText.length()) {
+            for (Annotation a : cas.getAnnotationIndex()) {
+                if (a.getEnd() > text.length())
+                    toRemove.add(a);
+            }
+        }
+        toRemove.forEach(Annotation::removeFromIndexes);
     }
 
     private String truncateText(String documentText) {
@@ -75,7 +90,7 @@ public void populateCas(InputStream is, JCas cas) throws ElementParsingException
             throw new NoDataAvailableException(e);
         }
         String documentText = populateCas(result, new StringBuilder()).toString();
-        cas.setDocumentText(truncateText(documentText));
+        truncateTextAndAnnotations(documentText, cas);
     }
 
     /**

From 8dab98c21eaf54892fe7a75dfa85d65afec912ed Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 1 Nov 2022 12:40:34 +0100
Subject: [PATCH 255/269] Fix a bug where a truncated CAS wouldn't have a
 header.

---
 .../jcore/reader/pmc/CasPopulator.java        | 20 +++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 54fe00c70..8286ed6ef 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -1,6 +1,7 @@
 package de.julielab.jcore.reader.pmc;
 
 import de.julielab.jcore.reader.pmc.parser.*;
+import de.julielab.jcore.types.Header;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
@@ -59,7 +60,7 @@ public void populateCas(URI nxmlUri, JCas cas) throws ElementParsingException, N
             }
         }
         StringBuilder sb = populateCas(result, new StringBuilder());
-       truncateTextAndAnnotations(sb.toString(), cas);
+        truncateTextAndAnnotations(sb.toString(), cas);
     }
 
     private void truncateTextAndAnnotations(String documentText, JCas cas) {
@@ -69,8 +70,19 @@ private void truncateTextAndAnnotations(String documentText, JCas cas) {
         List<Annotation> toRemove = new ArrayList<>();
         if (text.length() < documentText.length()) {
             for (Annotation a : cas.getAnnotationIndex()) {
-                if (a.getEnd() > text.length())
-                    toRemove.add(a);
+                if (a.getEnd() > text.length()) {
+                    if (a instanceof Header) {
+                        // We don't want to remove the header. It is not really a text-anchored annotation anyway,
+                        // just shrink its span.
+                        a.removeFromIndexes();
+                        if (a.getBegin() > text.length())
+                            a.setBegin(0);
+                        a.setEnd(text.length());
+                        a.addToIndexes();
+                    } else {
+                        toRemove.add(a);
+                    }
+                }
             }
         }
         toRemove.forEach(Annotation::removeFromIndexes);
@@ -145,7 +157,7 @@ private StringBuilder populateCas(ParsingResult result, StringBuilder sb) {
                 final String text = textParsingResult.getText();
                 // some special handling for documents that contain formatting tabs, newlines or no-break-spaces in the text
                 boolean textBeginsWithWhitespace = text.isBlank() ? false : Character.isWhitespace(text.charAt(0));
-                boolean sbEndsWithWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length()-1));
+                boolean sbEndsWithWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length() - 1));
                 if (textBeginsWithWhitespace && !sbEndsWithWhitespace)
                     sb.append(" ");
                 sb.append(StringUtils.normalizeSpace(text));

From 1108aeb9f2281802e83c97fec1a66ef17172a0ca Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 2 Nov 2022 10:24:24 +0100
Subject: [PATCH 256/269] Fix issues in PMC (missing whitespaces, formulas).

Formulas are now removed since we don't analyze them anyway. Keep whitespaces at the end of some element text because there might just be some inline tag, e.g. "This is some <em>important</em> text". Here, we want to keep the whitespace after 'some'.
---
 .../jcore/reader/db/jmx/DBReaderInfo.java     |  24 ++++++++++++++++++
 .../reader/db/jmx/DBReaderInfoMBean.java      |   6 +++++
 .../gnp/GNormPlusMultiplierLogic.java         |   8 ------
 .../jcore/reader/pmc/CasPopulator.java        |   5 +++-
 .../elementproperties-no-bib-refs.yml         |   4 +++
 .../pmc/resources/elementproperties.yml       |   4 +++
 .../jcore/reader/pmc/PMCReaderTest.java       |  17 +++++++++++++
 .../documents-errorcauses/PMC2674676.xml.gz   | Bin 0 -> 23937 bytes
 8 files changed, 59 insertions(+), 9 deletions(-)
 create mode 100644 jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfo.java
 create mode 100644 jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfoMBean.java
 create mode 100644 jcore-pmc-reader/src/test/resources/documents-errorcauses/PMC2674676.xml.gz

diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfo.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfo.java
new file mode 100644
index 000000000..0c341c642
--- /dev/null
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfo.java
@@ -0,0 +1,24 @@
+package de.julielab.jcore.reader.db.jmx;
+
+public class DBReaderInfo implements  DBReaderInfoMBean{
+    private String currentDocumentId;
+    private String componentId;
+
+    public void setComponentId(String componentId) {
+        this.componentId = componentId;
+    }
+
+    public void setCurrentDocumentId(String currentDocumentId) {
+        this.currentDocumentId = currentDocumentId;
+    }
+
+    @Override
+    public String getCurrentDocumentId() {
+        return currentDocumentId;
+    }
+
+    @Override
+    public String getComponentId() {
+        return componentId;
+    }
+}
diff --git a/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfoMBean.java b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfoMBean.java
new file mode 100644
index 000000000..d82b04c1a
--- /dev/null
+++ b/jcore-db-reader/src/main/java/de/julielab/jcore/reader/db/jmx/DBReaderInfoMBean.java
@@ -0,0 +1,6 @@
+package de.julielab.jcore.reader.db.jmx;
+
+public interface DBReaderInfoMBean {
+    String getCurrentDocumentId();
+    String getComponentId();
+}
diff --git a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
index d11dd432a..39c59e7be 100644
--- a/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
+++ b/jcore-gnormplus-ae/src/main/java/de/julielab/jcore/multiplier/gnp/GNormPlusMultiplierLogic.java
@@ -84,14 +84,6 @@ public AbstractCas next() throws AnalysisEngineProcessException {
                     // skip document if it is unchanged and skipping is enabled
                     if (!(isDocumentHashUnchanged && skipUnchangedDocuments)) {
                         final BioCDocument bioCDocument = bioCDocumentPopulator.populate(jCas);
-//                        try {
-//                            log.info("Checking bioC Document ID: {}", bioCDocument.getID());
-//                        } catch (NullPointerException e) {
-//                            log.error("BioCDocument populated with CAS of document {} does not have an ID. All annotations of that CAS:", JCoReTools.getDocId(jCas));
-//                            for (var a : jCas.getAnnotationIndex()) {
-//                                log.error("{}", a);
-//                            }
-//                        }
                         gnormPlusInputCollection.addDocument(bioCDocument);
                     } else {
                         log.trace("Document with ID {} already exists in the XMI database table with unchanged text contents, skipping GNormPlus processing.", JCoReTools.getDocId(jCas));
diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 8286ed6ef..373e9a5ff 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -156,11 +156,14 @@ private StringBuilder populateCas(ParsingResult result, StringBuilder sb) {
                 TextParsingResult textParsingResult = (TextParsingResult) result;
                 final String text = textParsingResult.getText();
                 // some special handling for documents that contain formatting tabs, newlines or no-break-spaces in the text
-                boolean textBeginsWithWhitespace = text.isBlank() ? false : Character.isWhitespace(text.charAt(0));
+                boolean textBeginsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(0));
+                boolean textEndsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(text.length()-1));
                 boolean sbEndsWithWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length() - 1));
                 if (textBeginsWithWhitespace && !sbEndsWithWhitespace)
                     sb.append(" ");
                 sb.append(StringUtils.normalizeSpace(text));
+                if (textEndsWithWhitespace)
+                    sb.append(" ");
                 break;
             case NONE:
                 // do nothing
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
index 09bc0123c..a5cd1a93c 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties-no-bib-refs.yml
@@ -62,6 +62,10 @@ floats-group:
     omit-element: true
 array:
     omit-element: true
+inline-formula:
+    omit-element: true
+disp-formula:
+    omit-element: true
 xref:
     attributes:
         - name: ref-type
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
index 8037e3cf4..f1f7dc832 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/resources/elementproperties.yml
@@ -65,3 +65,7 @@ floats-group:
     omit-element: true
 array:
     omit-element: true
+inline-formula:
+    omit-element: true
+disp-formula:
+    omit-element: true
\ No newline at end of file
diff --git a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
index 695b2918c..c8b3bc40b 100644
--- a/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
+++ b/jcore-pmc-reader/src/test/java/de/julielab/jcore/reader/pmc/PMCReaderTest.java
@@ -468,4 +468,21 @@ public void testExtractIdFromFilename() throws Exception {
 		String docId = ((Header)cas.getAnnotationIndex(Header.type).iterator().next()).getDocId();
 		assertEquals(docId, "PMC2847692");
     }
+
+    @Test
+    public void testInlineXmlSpaceIssues() throws Exception {
+        // read a single file, parse it and right it to XMI for manual review
+        JCas cas = JCasFactory.createJCas("de.julielab.jcore.types.jcore-document-meta-pubmed-types",
+                "de.julielab.jcore.types.jcore-document-structure-pubmed-types");
+        CollectionReader reader = CollectionReaderFactory.createReader(PMCReader.class, PMCReader.PARAM_INPUT,
+                "src/test/resources/documents-errorcauses/PMC2674676.xml.gz");
+        while (reader.hasNext()) {
+            reader.getNext(cas.getCas());
+            // looks like this in the XML:
+            // This preprocessing is performed on both <italic>s</italic> and <italic>r</italic>
+            // Thus, there should be whitespaces around s and r. In the error case, the text looked like this:
+            // This preprocessing is performed on boths andr
+            assertThat(cas.getDocumentText()).contains("This preprocessing is performed on both s and r");
+        }
+    }
 }
diff --git a/jcore-pmc-reader/src/test/resources/documents-errorcauses/PMC2674676.xml.gz b/jcore-pmc-reader/src/test/resources/documents-errorcauses/PMC2674676.xml.gz
new file mode 100644
index 0000000000000000000000000000000000000000..10791559b182f496d551da35d0b05b3b5734f658
GIT binary patch
literal 23937
zcmV((K;XY0iwFo5Gh$-^15iyvGB!6fHa9jdcx`L|?Y(Jp+gO$$`n`Vz#xH6@ilQhk
z+NIQtEZb@2ShmA*Wxni+iVhICB#{CEHWtausd>Nso#n1rC{U8^te&inPE#asmvhfO
z`*QZ*UVXUy{OiZpo}U#_5DV|)-(J4Ey7XS`?C!pQcfGs&>hmk_r;E=w-qyd5kNd|5
z+ul!UnI(SgU1Y&Lx{H#T=O>|el@tQL@ROPF@SVG1f8dP<!_i<o9_;SEe*eOIf#dA>
z^qP?K4f|mczBvE4Gd2I0MV#a(U*aga{o%#DD3&L?yZ86^{rho0&1Sp9<KyF9`t^l;
zbg_tA9z5E;_KP|F=lb0XwZolawG=<R$V4u1DLeA(7w2dHW0|E(0e_0zgAd7n_(34@
z{C}LEO|vv9&d+}1o$o9};hVpr&@&(9=t;cTDf}1bukX^>dl{usGEK9E_aT9mi?dzx
z{Q6;*<s^>sxyaxv9;_^)H1V^Q8Ptd6=gmVFQ33m%Wofyre7SC{+A8^t%HL|(D9;lQ
z)+tvk;Ny$);qdTa=Xks~KHJ6L&(HKmtdBd1zYyo|q6vW9&%KMl55*!1{IlK4Bi0wz
zD)<o8p}KzoEOAK&jthj}@OW>3e>B=V+cnRvdnb5t{_*;9ba1$LaCp%1CFj7zB;6T|
zcE+RQ=C=dC5VJG`d^<nO%gJvHQ_p;qqt1mN!aaMzPl=#7|Bywq2ruC$iN<HU>L>oB
zj%T-NF6RLCwV+eJ@ZR`&K{xtlDS~Jk1>Qv*&60&k3h!DJ^EC7TRlMsciE<b-j-x_&
z*Zwj$b18&z<_X{mWznSar>DN=o%$tAkikaz;B!1``0(<-|36;_gMm0c&HOCH9rE{Q
zGoU01VM%^|BOf|Cy%QNBu>I@VF2C_56H^cVUrjijzVNar#81N=b6|iiXS?_;yeOBr
z;hNt^K^psc%VOVTej?X<c$!A}#)};}iyb|2u`kPZxc+2-&!47yr;E*@J9ZX4e&T|E
z^z)z;am$iF#K4$D_ML(HDZ2CHHUxN`MZt5eukuEaX29K+@~yx@>s@;{yOlc$Q+Z8S
z;VkwiBE}oRAH1vgpDuddD~%XSxcP!9#1;JTIW6O~%tg;bG!Fn*U}SsvQPc1vJ^Z_?
zPuJHy?{C+iy#4*bfp_`-)6LG`5`a~fM}?l%3xV*!4wguSMCM&x_3Xb}XCF7sJ~o?6
zav)q~fy4i(Crk9FBKpr#cyG&v4@3r5`SH^ZF3=ke_eN_MKv=Sqqy-+fJS4AvMCX_M
zQ<{VV-ahdVRXj$fp7(Lyf7S1K!-Juh!7?sFzvls?JsvrrxD=uI;_a&y^u5m_KJmum
zJ@4Sq+Z%dE2j0QnDbV^a&bRyCU~uZahMmPH|8^$u-}8(n{-1G_Ef)PL+$sJVk%yLv
zJjU4>GQ<!#Lrp3;3;osk2#y*5JzKzX=XmJ+;cNwb?0hsB9G>mcN8<W4P~(t5Tw`E&
zAi=ya?;bR~i-HbZE0{=rI-4&2nK&O0;K=#oS<K&z27m<o4ak8D5M5w+I;-Uh81B5-
z$qSzm9S%u;=P_dVfRDHW@AF)E43NB!K&roLjb|;q^5t0^1tQ6XC;ye(M!9r{<x&uU
zQ4T(t!_C(caF7WQ;_gI{EMiNbP(e0Q?i9rp(>^w>Lac>ir@=kn9*i_cd-zxFOa
zTwi~9f3vf5{&^ndS|xywO0EgL60kD!3fSbOoXWf8xx9-4i3v%S-pds)9C7arhzrv)
zgLg7d%bD=ZEcJ@?#5)sTiXGgEr#{HNmRa=pJ$8Q<PDxD+h+~09eMWS$(ZQeOMdqWZ
zm5V@1Q|B-J;1<bf5=yxTaOnJJKa2bcfN)_;OF6KQ5@@9lE1D4~7b(bG%QRjs(rh`0
z+rIF#TOf8`I`u?(3o?2NM?q2<=qUQ{nMhLL9B{kwxbA4O2xeG$fuDFlbl`&ELjop5
z0TOt`47bpOv$=(HO=$9EhKB*;76QJ($9o=}7`+EClrf){`!0%r_;Rp9yoUE)$~SS1
z6Sx6+o(2)@IK;7{#S&mif5D?+1TrhY|3sltS^Pc~ak%r7f16I!&omMNXO#wVnv}UG
zmQgI0`N~85@P<8F1*ijnI6lmPkoaXT5-%#`=5sHB!81ROSDt|Df9C*!Hm`x6FK}(K
zNM^;{+d7L1AC`O`^+mr2S`7b%PbB36G!{G=xUnEkZ{x?x`2sNS@UG-yTE_U&GF>V-
zBA8Ik0<=RMBNVdm=o(J;j-Ise=!#AXU{U~3!~(Xk1hn>Y0h|b~80;mCV8a=5;6+)`
z>!2Jg1S;gPvmEwS(0;3{hXc#KdssLAO9x6{<HtoV;yZi-5Np-<UgxL}ASfAf5i0!f
zU52-U5EVq3f>2hjGA#Mj;dhoae4^KMu<+EfOoF1sWy%!;*5C5fpaKzzlOq#m!`67)
zIlK_e;m%~jkP{$71?-hyPT|JELIwQ8=Lt6?1teL<X@p>a%VN`^48Ry=a%OddiWG(b
zy~jOTIc)AtRvu7cyvB%9h=-FD@TRt>oYA-jkhbFoeXfj)+&cST@P?+#6mLXjF$>VT
zc$Fl3!DF+`Umzd~ymE+ya2|NK#a#pf8A7Jy*}V^NZ4z;>Ujmp+LGweYCCBTEAK=jP
zC`390kOdkkU>6}>ByBoaq@jrU)+5Kb1JQs89)ORrX|X61fG6Bc_)-9wgA+*koPa<3
zfWrkI@xkC&!xk|Je!jl>b?9xq`t{9lfB$B<?ae{rsvQT+3#*)hZ~_D8_UUCn^Fd(r
zZj<yraTlEf#6YHDAOKo`y0{?Rodf@kiK!C|8O=3@C4i9)mmM$%fRsye8fA)x@Y)t|
zauK1KO=@87*Nl#cK7={|bvSuH5pYmIt^k@r20sUYOt70W$^1JJ^O6<@jr8?s;4+fF
z)7493e3EE7Y&es53zw*<v5k<shQ;09he|zwkLTXc?>~P!+r^Lcw?mNX-&+`O{iqil
z0N=~4AHo%=K)~1}Qo?>_)~|}(5maG_To1Zyt#a2j3GGVT-6RcHbkhLi0x4ib2$&b@
z>J9s7#LUvL3>@-n-Ogbj9{OnUX=nTPt!;hCi-9k~g^BZ2q9zbtEhoHZ^8A_K_$2Aw
z76N@6UIL1D54Z;C0jwp&HzhaG0z)&e>G8<}y;uTB`MLsHgK&ediZBX5l>w$#Q6wkP
zB$Ha?WHfMel%K?Onu!(gWZ<=N-y0qu?MroZyYF2`L6#a!?0NS9hdHg^g%~2G?+sCe
z9u0=$(*tj^1PH;MSO6cK0xg=S_wd9tqq8p@AxB?0!_?%keUicf(fx$*9G=WUQu07P
zT_Pu3xa&h$EOIG{fmQ+F1A6Y~kz8Mx{*F>b0s?G=G7Jb6fF=TAnlbNDmuNi<hkLG`
zR}zLZi}?3+HlF}ufHE*Jfc$8SC--pP=_D7~9m`QZC|oF02eF^qd{@EtJ}Tx6M<6c0
zYkBWbLwh(FAiOg)+A~vkDM;V@TSC&_qI#Ys=*UeZ2jH-b{gnqR11b-ADv@5Ekac=J
z4>Se72Krvl1@NB~!HUN_5yia_iF>B5V1P@m3I{%+PnA_laRB9FkWSM{fy#SGY~0J0
zp>x6J%>W+`D;Cb5<urUJcRpTzdf=>&9=I>Z%D}koZC(HT2}gtjzaln%M-0XHloSfT
z<K@+I=n1YGkQp`#P{>z{NgC7L=X)l6_z!=JU_n<0ayQ?S5*5@W5M5x&Lqo>RP!Al7
zgg%-2oBAlV@a`{>l|^w}B1gg7GZArm&)jB-bU6;sU$gw~4NuN?=?jz=%sY~ewXWgS
z`-}R_5@vi0fPzFo)eDk>?8kcsYAD2k@*gEiD)fQD@U_sm1q6)H`p{cWh)$_}+{p1x
zyvrYdy!`Rvvv)?CGI~<Z`K!ybUGo(_c7czn$E|-aYOi15Pxk9_){A&K_a`ht$rH^H
zH-2bZ#l)Hahldsz@F>;AgzH9WTc@#TA<D{=!G*Ri50qW_zo2zyIdGeTeGgbJiOU>$
zEJ#ojB!H*`GVvq<6jYWtU6Sw&5HtkdXsAGmki<@q+e~9TV%!07Q$Wom2N`XN=u%aw
zL4H9pb+=Z1O=;kXz#-9u%G^fk>IOp}Qq~!LBa%dyD6%6!l!D%>UazNKu7#G|19Y<#
z9qS<lJCqdc)LS=}O9)@4QC=VE*s#3M%|dq@$C>-6+=f!&0AT<Hq#S6TPx?2(Ap*Pu
zl8Sm6{0J)BRLZ111Ur1;&ya5T@d{b~bBRG(%tDX@o-7n)QKV}E0yZEVtP06ogeHk7
z3F0zjpL#LR{4eUK2Ug{pwBHHuO{~3P0g6JrJP<B4*J)8ehy78%!uo8Pi>PavC8Syb
z8X_|mw%~iIWpjr_leHk>t%7`AA|ah>jSmlTf{)s_6<@m*q|1r845CnR0&Ui)DPxw%
zN2(1BP7udK$l<bQumDyg{jx|wB!s_zyxhs~{<s67l-@_nCa|S1$O|%(l2}b-I)bM}
zG+@fIC+Q!#C537LsBK!N7F0d_9r^(T58E&Rz_=3(4bZmBKuRfE%a_tya0=gwm5_gi
zr+PL?UTGyu5~rd9_KyHTg<ol>a!u>Wz_^7B{46q1q05ji?_MV|Fg$)pAnG>G$Q7cm
zLE1D!9CRDPUCK>?#KNUXmKeT%tg-96`Rp(J+sImxMcLk__lSc%WooNn)dF@B5W*FL
zB<FlyPN%W3BoGf@AjJpdKfpiuQ7IvS<!g_!^E#znl<6YMkE~1P^9WE1$RjY12$mP4
z$@(6xt$9&~tB6##j5HxoLkikJO!A(=G%3A`{>-+if`Y!VBvYfl=G)S;ouT0gl-o8Q
zfK-EMCRuify2C(IWp6|(Va##T%A;Vt=5iCw(sxvR;pirO!ViHt6FEvz@FvMoFM9_$
zP{rT`Ac|0*x{xM+wB}VdfwJWsjxE#H5k;8{twO4Vr%yCYiJsSfvTC&fu@0yVtRzg6
z{Ot|@;=x?r%m<}MSTqL^EzP8%UXsd${$O2`bS2!$8{(^Y)>4Q@tl<~n>k@=CcPTxi
ztkF^;MED40>9Ex1!kLc~Wr$ovzS2ZtzLIj%ASlru33$u9fv1Lpv%FlM6D1rCN2g<S
zxi8PH{n6X8&`0i`Yy|=4{&I;#yI?yr01L*E28dwgZB?h+=jpZ|V9^`V9Ml9A$Sntu
zgnke7>k6(k$-gLeAfJZAT)VUl1+-TJl;@Z*<WA&8H1o|Z!?>F!X9(%D0{=l3jl^7A
zst8jOj*wUo3QShg2;I&n858K%ufv7C8ZNAd#c>@LhDILQQY!qCmouOTMXs&I^;?Yw
z2eQ}@k#LBDWvA2h;R*<y2z!Rz=XJJDca?M-y0Bykg!{w70$n3#8kc}qMG~bZ5D{nD
zB|s|7H`!Vu>=&en)dTW^1HB7SdtjA_OBdN9Ewi2t#0S`_I}*Kaoy54C$Z_b5^0v%y
z%k}eoKw5Bj^;0^D5$!e|W<2OZ07eGIUZPikeUt?%EvUVi8+V7_-?QY|Mg?jpC-F?#
zNI+ZCDw3lY8G$?owWVMY6qgeF!7V!v7t0crm*3$xHk;*$gek=!!r~-<^)u}<Q|#(<
z6+xxn5N^<Yn%#O;#$nVApJ*Bq5u`mZ8<TO#plmw9L-si+x$-S6*H5Z6-&R$M`_y(t
zIm$wXr7>4&nwxeHY7k#Yv~C&TN~cv}$tS)N@Y0O`oB*qrkWN55E9t<cUQzSeCS?!{
zU=ShNFtwx;k@L>BUoqFF1qJ_EM$#_@lPKE4Oq$L-Z?&(&DFi6Mz(%r31iQ#0^a<mx
zNFJ2l859U<NSkP@v(6|&pj<HN!q68j`f5X#w?!&TTCksA`uKZ_btHv0_09cws+K!V
z<CvvE`IIeqJK7`d5N(0>0#OK8f1^zR?u?iz0}(!KLA8Pm=28^`R_c>IJS9`OAK&|{
z+%jSEuH}Yp12^1#kfM8*i>t2W5*Gln9<g_q&J%%2hA88?oM14DpG9KrQBI>TB9v=~
z7adKs`CKIPv!>iaNu$h>5s@%YD~d7NxR|3m?=BMeJIfUA3Eu9t9OS^|zaq)yg?W{t
z<I0vq=<D?|mphsYp#w6jRL$*eI&nOs2{1v}cRq~-TW#P%R|qQmEOH?hxzxKbumlE;
zLa76+bNaO!T*l}O2KGwAi~<`|Z};GBzo$01+(^5IMTHgR)gl9lgC>THd3?MAeI$-0
zaxApBpW-Lb16?6&l4MIi3{sh#-P0hdMKP=k1UaCsIoftYF(v=XL&rUIdFxBEZW?2i
z=7x&lpcCU>pobm?2mDi*Z9IVa+(&^|6A>awimY?MM5Z8#h+kN$CyPqNS(!E}TsPYS
z&46L-dov6z<FE)F(!gu;rWgi-TS4LMl=Ss5jM(BxhUbVCCv-AmgcOA=!8}AO@FYR+
zv+TMUC6STqBFy~z3G!e<xA`hgl>->V9TGOK(IJ_j{bu^XaeS8Zs~*<D6u|_6MDPmU
zpS+xKge-?qX+K^=luMP@nRo>{8{l8lD9bU_7A>Tc4jAK#bZ%k*&1T*5v13(6vg%mi
z)=#uop+xiF{V?Lhh(fG<krfUlSugzUcN@&I9j}KX;;<6U2pOd{NKts#*5dh-z}Q<z
zHyR#~PPKc{pIE<dpBil+1hP;H1&->~DxnzOU0Xo7eg%R3#J0W+$jmv?LGO`d2)51A
z6-I)G9&n~b+6}1iUwQB_u8LQVmx-C83Ry#Yn)Fn{j$z|`(M1iCW)C(z{_0!_-rne7
zaO!^jSNol9b-hi@8<RLGhNPLcR*)&+`R_zed8#n9EOf)j0?;K<MB@lIttI9{113;v
zgdxm;7a$Ixlx)UQREFuv{hZx$c6c0Vv!sE7c1{67N>TWrQ1BK8j~1Q|YKvslEa*)U
ztOJcQ#0HR3D)tEkP?8qu)L)8>qda`?Q<?0%V=MUUM0(ql9~?;|1&?WETAl)OH~EkO
z&~p+HAW}^W+xqnG^)@pBikBn3LPkIoAj&3`ji8;_6c->0w3JN^SV4X(F!&lpx=>vw
ztv(Xu_>rDhxwjhO$*iI*YiRgG^%Y*$H`q7NvWTG10v}rosm-aQKF#XIk-Ih+s97z5
zC@3c=nMDARGG~La#psCEFs>VgT8&GQY6d!$uweRC3ea>xlsus6p>jQ2%1D_g$L`m-
z+(_EIh@k<hEW!dAJFgVkN4{FHpy9_Ap+sKkP!VD0Nk%h_C}nGT!6H99v6buss4PXy
z9_h~-6By7oS+VIT?nZnGA{E|4EJnvM`nE)4$8cEwLnGs^R}5T}=Z%jWoLhpY70dRP
z1qX*DHwjC0xmZ+EH%zlg<$KiVfJ(>2FjX1P<#4!DzLyxaOb+1t`N~zSY|6wBrU~1L
zh+`OuGeI*tUko%*vYya;UgGt@aGT%oXF9)LV_5PeN<nfafx(JU{yZ-+R1ujR`jMns
zOm=8QJNg^V5J6#Mw2l)e)`9q4Vjv)pZ8lpNCRJsZ4Md7Vw{z<vVTiIF(oeX?`9;z1
zB5xyAT_oI152aOs-Roltxq>L^!_^Fi&#C%eP{xJ^AOSp>rySp|L&AxuRD<|Z+sm}{
zIii3xRTa4ND+~p1qO}f<C^C@m=GS50yGDnlWDoXZ2=2L$vDnHCf}uKj;4dk*8xCwD
z12-r-55;i0kEHI+G1Y+Vh~#az^#6PXEOOxnY?{S$=1zlmjt-Q(sRV)je$}CI$Sn9T
zoO6JIJdQes_&Dw#{Y4x0*SLvl5%%lvoNJL!LJW{YD`11V-uFIc=>%rf-aRrX0&Sl8
z<VA383y9RVm)6@#Qm@fHTlHVC5lD`I@tORk7%|l2y=gLyhisfo(wJTKHa2Kc&jb&h
zAVJ7wd5>kxF$u?XR;YL7WK}18fmcuf25B}Y8!;*|a&IPf2Hio*Uj}1Q^s*CXQ?>hP
z>j&YV4$oQ8?nAs;c(oi=ou}4#>D&vr^Ts(2^vgxL7UOwC<jFLeN#IuUkKaTy+=&WZ
zB4ComVf_Jh_1kQdiFiSgGQT@wq~1OU6%+fV2n;Q7=<6+wuE6o(gz<oS$~~ym6>?Z+
z$^qIEeLx~1%1DmS-!WlhI_8|{N5Y5=XM?;#*#cCClE5|7<d%WpSEhAbXkxu-ZcuTB
zmcNVJENXOzsz?OHOGQfoH%b+R>EFROxa^hE(gFV*NLx-9$MD4}4!$!<3*aS*kjAez
zv_iu`!jd72pB`L?nQb9-ZcoaEGz@0Lh=|2#7rm2-u5bh6kZq_4CZZooqEnf^r13ax
z#R<GFSxsZ6K<hVN682&_Z`vMPK`}2Z)Y5&`R$b;+DQV>#D8@TLK~)9Ha9T+JftxaF
zyeT;T=T!`gyIH`*Zh%5hIupaL9%pw&ikI6UBOtD{?bsk{F4Z+Dz{(MDd6v}^@1RVi
z^)sM`X+joL(1vv|>_RH0EDmz$0U!jY;lODH=&x(>mrFE~>1qmnSXA?WgYh|D+$6@%
z(}-_6i655pb6IAT&4=+VmHaa_h>~AZxp`5<5M&3Di*!xCO)Iq_N}0xS$g$7brbV$?
zDl)`!*yxZ0bx9Paq41Q6)Q(+qe&@TPlPYLP6LSkM__wgI_Bbz=0coYd951-qG;8;G
zBLhV@4UIuO#;Gw{SY#QoO!Ce7HViQ0(@;Er2I!C*zGp2$22viMx^Sh$MT!kZrd&x=
zq{Yx+c#<lWlEZ#W_DZ`BYHKO(>9Jx=8YXNaC!c~|r4eHq<kwcId>^C+nZb`ohfX+K
zASa_JhG1#)7cv%q$#$YNoB0XJA!wr~pDOv&y&EpN;Em|eWNZ(BJV^_q#Pby<b*Zi>
z*Zf*Auo`u2^j%lww+wsg**)zw;D_ytB<(}&TWQNJWl>!5t4h#8MPE^8dwugC<XIuK
zfV0AIZL;C0RqZ=@l+{xi^*yKZ)-^(;DbsL-nTGErOzrsUcS42$kA}lj%Cz?Y)|}#0
zPX<9VC>pkv-72?I*>fj}=%4jR{s~y-XB4DPfHTt~G5Mly`VfKD^t#N#TP%x}nh3>k
zfptQNiIOdAv_CoJzy$QR<Bgf&eA?2c$1~Qq->mP3%G|zp+VD=pkH0nj*tGm!1JwRr
zpW+{KM~sB^-${Kd=F)O1v$8D1X>M%>@6NhO`+c+ef1Lc#ae++PMhB;B9=FDI_Cd;J
zMtQgjYKqZlCutMh?VVT<Z`?QH36R9uJS__;V-wlxQPl$VeAa*nxxFgm;U&q<U@J^2
zgWlD9i{^zU))W}1p+cEjNbIOzz0M2gIs=hyF4w?r5iEZKE541;qfE}IT2R!9$59i;
zU9#nB`^;I;ExWC3tK^H;5kb;R0}G3$tA-R$O(_`pEN?JNt^rBL<RGgCf}ALauXX|0
zQBE#|N&R0E#$Y6{`<~_5thf+lJxLQUN)aTe&v5E%k0hJI3w(-#q-;(*7|QX#akdr3
zCsWM=G}y9%$Rt`SKsyyb$c=($2WrnnQVHhLCfvLdn6#A0ECcjK$l)|--veNvpd#xJ
z)H$T|rr%PmIqsGX=28|=A{sDBELK^~b&+Ayaw$$wQDm8~rs%G1io=5;Oe*2R@~gBR
zfnG;XFQVrHsaJJib2SK`ydT#7oR^wo2du5NdSjN9!w-17%j+Qo2B9~lLO6T|KDmKT
zrAk4G>2q0!N9UTGz(h8bJ)3o`G52+(b6>Ai;;SX`VPR4&EVCOGkiX{z0CZZuJ~KJ^
zmBtPjfy@)S5XJZe{qdF;-k*fQsY)qzup6B)^`|HL=~ouk)gRQ4UAobZ^kijB{p_`c
zauddnur_iSSQ-_Eup%}a7!gE3eVUVsV@g(`k;`~EjaSUPX^|~r=qu&9voX&g5Xx5!
z*E@DzdmI_3Y9C?l8YW}w3Gf9shh1VBhwV{1896qaBsPRaogLbQ%yud$KHV^q&GfPg
z%{2q4{Sf^#u~zI~sNtneUFd$ZSj79Mp`_WhT*5m6TCP!U#js{Qu$?8WM?TR@y|R*q
zIQ>Ks9`(d4_X$If)B_jp1NFdM_28xZphLV6Opd!n2ZmMLzBU{fd&lEb9Z}*pNHh~G
z2BxS@W7%@+gD`$ne>Vf3ZoY&Q?+0&yUSulSgfujoHqHD)!^^62K+a`(KueP6U*G~~
zWTgfr@jpu+?Jy@&UK<Qf8`f4IU4K;#Y#I%YPA%DwKaKoT?|*A;<Nx#?aTrEd==QD^
zQ04u#;hkXpe2U7ipG<FwPO9M&;JKM0^y9sTLv)6?vW}4*;P=7=i25cZz=`Zp5}r*J
z>hV2|_ql2HjkorPrW>la*@8a}5N^Vj$Kb^e4ev}E-mw5p-W-Zm*GXk4UOgoIRP!Q8
z)rmNrk@%Y|f98zJaeTJ)z!DE?9+LA&2NLJGxd(L7u#w9u2oL<zb!T@~!Pkwz+k1vn
zwQa8z=B6YA4DnS#9Ln_$s+JHc-YcCx7{v6bI32F|vLJ_>j8)dAZeF6#bz4-TrIiUm
zwVu|50MoWB>gv$D;dBP%x|Y!6m<6Sc+q%TcRNWBulAz}yH(BZAC{yb-$?107AbT?t
zlhJ`{erdvIDtR1;swT@h=Xe?!<!RzaEp9|sDQ&uHxPdGq$FN8b|D#H)V&vxw{(c88
z*aG44Z)Xuz!{0%v40zW0*<ul&z=6%Fa_{x<h5RN<@8zcmXOf@z8?;BLkLOy7l@F!*
zwF~pwZuPzTpb?KW7#yG0e)+>K;Mo^zgIiOT>Od+Ma`nTDDO^4A;(V*N=S@~Rd3b_t
zGr_J|=o6t?owp-2YYXtP&hl;*p?A5|tw#Ed@PDN6zmNs};&PD?mHpvGET)D2S?#<C
z|F~HCC+0mk@3%gpO;CSQofUCyw}O5(Jp8tY{>ZERuRw0(Li*Juxu>vesKsVvi9Rb4
zg+u$<peGHQqb3P@XcXvTjxGji%nedfXdilgWGwIG{4Dg;Mj(+nv)DCdoy_6H%G=Tk
zR-2$;tO;3Z9q-G;RdFypfQ_5d*j+V7J675ziCIV1Bq^1zLuJl3NPp&#V<}(i0(>&f
zI8AmaToyrcCxa(!)uP27t20tLw?^kGRP0BT6ugA(U`jbl#AII6*%ma;J%<U{qM{XK
zZ^t;07>GQG5;`z1Pe(6E1h8u7<iT8U$Y^3E>`o(@VO+~>p^DO=afLC}q?~(b&{EOE
z<orglH7}RTCM%=Suzoa4;0VOJzs<2qS=k?L&$C6;0<1_>>@zK&ONs$Fq)A-ii!7AL
zF|B%B^w!+UFeTR$p<%MLGQ;SE(W=?B?Y?JiS4BfNZ>O4~K#xCU(ajrm8)e97w7QSA
z)%_wd2rt*uG@gmdX!5tKJP2c(k8)Ym42E12@VM6$@Tj8e``#NBr9*g3ZVES?aH1Sl
zWL-tu0O6u?K1my{0`6oIz)4Q@ORW3DOPo32P&dkf*IsMaqh&_is*c1tO9;kkzRV(%
zVwHm?83;AxR(0930-)CLTt$?k9c5gd%CevcuX~Dv14Uh1teYUhH45;_Sol;NJqt|~
zltU}DPsojdXxTr&ZR(iF*w85ig)OFwopMQhf)ahL$Uv38l({u+8&66`3roOF>TxDD
z2lW`!fMwi_rQ?5~`1@L94QH?{I2{ld0t5^&9Lr#K*OxErxmem?fI*coI@hOcyjyT$
z<Su~2I$f`qT-eD#ILnCbVZap|iikH8v56|MVi7A<QlfErIxz~J`Gr_|GOn*dOmR+O
z2p5>71_GyJZJ}UZ^o1-bq?&U>YEWxj4Vi)@v}6DcV|nhc8ly`hTy-iN9pq{XN<1p&
zh^rw+PvsoZ&H5Y&e#3C@SZMPbcAz__o>tgD%6J|)9tcDCAF#b|%YHfG;i>ocVZUFG
zz4%9mCrpr;9iZ0WMA$48XmE78ZpJmn$X0W57x1mMhOQ5Q+E(NaZNrOl!A3*b(SrPs
zngw(YEMjWdv?d9J4tr|yH9Te~PuM_F2|gpU<E&i(sU<~r-02ezkHd`mUk|37{zl;>
zmc*tYt9HKuWTT`JF?btMVu$p7K!2fKng@jXEc!y7Mj~`qW2X^h@%K9Al+O%%(qVoG
zlYz<M1}H?LIOz!+5&-hlmPHg}p?r!FLdjBu5eZL73l%>251dl(n^QxphB@&A1mLRU
zYX3z@4@un^9;nrMT{^@Ht)Q4@nUW;b>Wm{nw)b68->D1(h9h!Lo5BSxQYRsl+RP@m
zMbjZEwCRmm+A$wom<pL}U0@+_bd{6-$dMv<!Zk#2k-%umEFE4T!H1&daHDOp0Zsl%
zDexrqw<j%c+n#HBlb1$r9}r?tt$b(!meZc~dE7an4<@hly}xN67(#)rU)~X+H{ek!
z<G+vzN|Fju@pYslXzx&%VL70BDlM4%E72;Wg<UA3uVb(}(+mo(Ah}P(agIq}tYA=L
z7yRT;Qt3y>kbQu(KO->>nHI%9wskCfkRlES<DiKmN<|D#RH~T-D5TupZy1z{>$MN&
zVQA}vdH4)7!8!-_6KQijj34iMKvp!DI{ObVQ2&1cheg3iG6S$?);y`0a%sU;+ps$>
zHwSou9`gp5jQlFE{%8j7WKh<;q_*N-?b+xt&#W~%w*V(S|DiVURc-RBkzJbKNx*){
zS)!jksy*u-LV}G1;uj33v0t;=TgGkq!8ac&&;C))ybg>%*jYcTU#w~6`NISM{W079
z$9m_*CiLW!CY%q_l68n*KSxcpYtQ~xdDh@udj7v#Jz5Wv&o#e=?Y~)*KkC#U#>IBO
zRELYYhA#YZFKqRIpXtID;!AN3QoO=+{PPcmL5@dRF5<71<&aOn@_AazYuaA*knPEC
zYm?PUT-1&)_E5DGJKw1cm0(|W`Kj*2t?sHF=Q|)`)3qRI{J(2K^T)ye+QD?e|0<&V
zp`lPmz3TEEe0f$%u~~S5=6=t{1RAM!OF&cG)meKa4{fK$=$}fr{-jBN(xg8&P1;0#
z{F63qM4Lu`(xyLY)7QV1|Mh9qpK{Z))28uPqD}IX9o=a?+x+pXP`5_~<3791;SX~?
zzN2x0DDu;JO!uJVHKyVAK<397hjwfG>u_GQ(`#qj&wJF5K<aL^{(BnGYMc6oIQ|Ea
z)jGi_coH>xl=I_XQW*M^&V36y*W~c~zn03>p848zr?VW^F*(Qe!!OU@sMyn`A7xhT
z!9&q#Zix>S(X6ALJ4e=V3<j-0_O@uuc2U_nD!i;?_`#Ecq9!ZvHI{d#I@S%@|5nJ&
zw+Ii#F5z-HR#zUof&sTtt3~E)+j;Ym6CB|=k;+y`r4zKACSu2AMCn6r&l6Hj*AS=j
z&PcUB!sUm?SidV-TN{&hdTsIE7P4}TP|}#ya^i`IiY##FV_9@5qI3Wb=AtMt&BZ7t
zu+y=34EHqOwUqDxl;hxV6+2g3oUhX{z7|SW7TK*3#QL&^+cH9_f}0qjlv|-xI{4HI
zo9Z5!-7y9=YQ8o!sfnQC@|QZtBEov7GH)!BZ6gX@jNOhs?Hoo%rDq8M&mWYfW6m(g
zLVoTyW`<B^)DEQrRr5ns1Jcd5RUtF^9La=<mXw)m3cq{02(zroENjYQt^kJ7$qb{-
zxlaB3Fgy{fuydUG5+gBBu(eVqrqP%6h}`erR{0s`zfB){(Y)pT)w2SEYuwREx4HA<
zlFQ#Vi~}37*X7mnB8H<=Z}<;WpO})SD4xD0l>%fn(NvGMC(EJ9mXi@x*NcU(l6f_|
zz~od_LCVP}`Zi(Bx|THt;&SSc9qH^ZR677poMA9yLGg&gB%d$xxjFZWyFIv<EA}TM
zKHu_(ASUu>EYPQW**1kC*D`}wk%&Wcs(v;3gTxjmsgR>y6<1`$=Tf4C%DgQ)^tJiO
zGAB#6Ud00YC1q}XH5uvFI`9obhS$#HUd=}2swa#!oZ!el!5?jCOZaxCD-8ZM$L^an
zXq6;7ePD(T1gIkN^tx<%Y;PKHz)HozJ`S@6FjQt~-OPF*wC3=&CLex&`~_ck11ssH
zrd+U_jX*pPuSSLKITZ#vME-M6btfm&U7Y>!78MICsofHg?4*ok8WGj3YDgLQp<yjj
z_u-5Ts}GOi>>H<6V0v}is!hA|n92#zf(g#0?92XWNo(yj1}P^>7;B#~ea%L6<dt*P
zD*{yxssf+cB`Dl9(8i39r!pxU(l1xsg9sZz>qHw>ZUL7Y3gD=*62W?Q*3HbRf$#`0
zF$^$Wq74;PU5AO7VXjpB#i=H}VD>xL(5sAXrLb(#1NRZ?l3Y^+Q(CC3o>gi=FA`Ub
z9p*B<;BD1=u0frC1W20pf+;2GEtXDL$~3)4VYyX=1v8g1lRc#jC9du7v|XU|>iW`K
ziq9f0v(#nugGXpF)km{>sxoKmsHZ;EWyYoc%5Hl4Po2v5Ppr-vS({thWN+br(IeRf
z<<)nl_B<T>ucDAR1NR|c3O*po;w;#r73T}~snFe|u%>)Lb%(LvnJyflcm`i{c#X>m
zS`Axkwe#E*aU=XyyU_}(gEM!f1qMQyv#O<odXqJP!|^FwZ3pAizW2IsN|f|_b{*N=
zhMti;eJl|j1imAK7B;u3PqUu=&}r`RY1Z!u{W@}g4H_|_Wn|SH7XaTg4JFnr@}IEE
zJ#R}ECTeWJ@q;%UNi&Bo5Fl%5sK-q`P6zRu=B?a>+^X<H+U%P8B%`MKB%`e&U2fZ5
z*|=zZ!`<oxkGAV(_a9Q1g<D<`edc^uts6c_EgT~Gc<?18t`%51hbW5VDyEP_C`Afq
z>*K)MvD4~ovfAO|PKBwQQ7fi_ZADv}Xs8w{y3TupIW%e#v3hG(*kp#?_GXdFd^By<
zsOE}lcpG62CT{gc94>F38Hqkapqt50Hnk(cXI=ZiW^7fpSg5{>X;rDxX%*w$$RgyL
zPn*Q=9PRH|`W1_nPzI%4Kg^nJ&CU3G`vEnbkI~82JZ~wb4^(_0qRWNGR*L;tHMgke
zmOlMl$ui%<xk1&#^C#cswQJ}G@=niNOUEAOaVfi8IW^q4I776M7fg1p*+>dpL`hk_
zo~D;}=W#6R#EJCe{-evEJc?a-Tk-~Cl*6_zZY2xrQIid90Gtc1)YgrM{&!V`T5rsj
zx=2GrE5a>{S2@a8AQm04tR1Hl?g1_NDiQK6u6pDRxdtF<tt#JJl^g;srYyX@!Cy?9
zNP3r=!f}f*?`J6<J<vMjQH7&hS`v-1CD9F6x2qK!QuW_>*PxP6#iY&>y0%bIg=wrx
zT`#nxYJAAcADlYXF#bMxu)>A)gb|eg(KiEjb_+(;7IM1DwrY%FEJ@8p9W2|H&4Zeu
z^Y>;;*eR=fO;hW3RZ-rsy$4l?&onM~Qw*f^5yXDZ9aep-&^eM4Ha<0!ky4i&Zg+r&
z5v~g(%}`X#o7-F41-y+02`i?aNVWQi)XupXy`BZGO>&9xJPh0Vj~U;@aJ%l!@mn^M
zFR;BzW{|~~)kCLf=P|%!$CE5>iRC-2`i11@^MGsSVuNyO4fWxalc=qS+#2#8_xq7e
z-lgN33>~QXRZK{0hiYUK1U1#Cw&5(B53-$&jGNfVxPe{kdivz*rVZ7%@xVeXcv@zO
zCoFH$u)Otl!;1Z<<+PhR;Wg~d91%x@jYp>~-oloG`Q65B0URAf%fJr^`OW&x1GXrq
zZ>XvHV>#dPZfdHfZYk-jwrfvO=snk|_i#Cqz3*C%q@@f+Ep7VYFu3G=@L--q!!yql
zMA<a?0|h>wwAF`GLcZEoBkYz^In}SZ)h?%61IJe={<0=ay3@tjpsvRAf$z`q3~S3X
zJR_XruLb8v1RsBsVm^;5WmALF+EO;Hfb0V7&58sy1{iOEN`&0ev>Lx|;ryDCIHF2F
zyAb$w#dqrPX$!gUaB8rae!sGurp>HPc<pf755#!?OWgSmFz6I_|0r1g>(GwuF|?xt
z_571|G)YYDX*D#3pM*^_wx;4VplkQF{PF47e=!>Mu>41^;q?GVeYpw*^V)YMMOpR7
zuTP4;2EZPsTu-0%$r9<eAat7$m!}9x&j8M6r$x^s2mBYKSC5gYp5b)=;3C*l>E1V}
zMEWNo>q^Krm)O3Vu=rn&hCYQp%1>V{9r=@32nuwdG|<N0W8BXvi|!)rjBD%g<!=%g
z*X#q54xbY@Jl=RutoX!Qg9=KvB5y?qJF!}o=w{#)dxlMen)8b{niJ<SiJ{eTtvq4&
zI9Dp&qtkEWNZRe^iJp~NeyW2~&++*-EJj9=3>u<HS^^Kfzcwt=-HFOHWRMiXw9}$<
z$STo)&C|<$cZzqZhn9|-9%~N}8TY*_6U&mD_!ZL#?5ngow<1w~p`Pk@7&qp72`nI1
zF(lqrEqZ3Oty<OK4MsOz4%ae{qe4)Gf$A4Wo_g8R-Y<(e?B#bMJN!iCdyiwAG{{GZ
zVo)zV)h{l>d$xlfe}JUvdCuNgXMUc#p({G(W=znF2;2mtCa4>VYCDD~Loy0l<@MF0
z)cL;mJ}rc4n8~AIxa&kXW673|zRbd*sEvtf<*3_W>fkDX#*Bg$c*t(~*LIZv>^*9t
zIx{hYvyNrtdABi|T4XgO@7H>29SlwzMpI!<6qQ7|7VJ2W>C~BL4uexa3+5u1e}<<{
zSC@2MHz?J4%_>{qq)1Y>%MznPBp<S)boEuj#&0qeNXO!12rq@k++!3!H*=+k{xV5a
z3>px#*lJjNpJsWnL!{ZIz4K^>5gZc{_?XkAa^3Na?VJ*)$6u1(ds9CzsANIt<ta5?
zQTYXkSN%tKs>DdAWsI#;u{W)bp#h+lF$LIBL_kUn2sMbve?R4cuvaD}1yeSIzmOcq
zjpWiz`U1EIS(GBn&eI?wcCCtvG;sD-nz#q&RAv+m!#!-MkU(m!Qer)JoFlU7QZwqX
zC6YTJ4Kr8*ovMnIwGIsP`0TBJb8cm20zS)>+w$<LQUPZ`0YUPEc?5(u#C#9x(x~DR
ziJpWQLj{n=8=*5-<dEDJSrIl2bc+JOA*=h5>IF=#b0)&1F-BG}I!;2B0+7wc{<1a9
zOGoC(sJy=SHoX^=av=EvCn#q-sy#Bdb;XGE`zo(DVJj!Ci73a%gSDwB!?npK!-2^*
zX^6gQpLY0^S@oKktdvOvhgJt>jd~=CcqBuIdJGlWU?w)QB0)FiP(4n(i3-!x8_zQK
zvzcgOE)qQ?UM%sta%}=61O0|@ACOO$zyZ83ag^Mi%wgR>ym<ZcgWSxH7{1tTP)*(=
z4#AMfD+!gBB*X{8#0evRSMp7cj{_{{WfEr82pbs177;Y;OeYCEZ7K&sc;`nkNdg!*
zjY3eq%?&-d@&)&OCVnFO!zG+oN2iyg;0M$aM<NE4PC)6g)5-LSQhza!E_D1qNl#K}
zX``z9Bxy>Ci=&t-J!mbi6sad=D5jPOG5;IBQ_xa7kwOtLO{~E{oYDu5I^mq&sUHZ=
zh#}s_$*0_&8_rGW>L8&60Wl0F`+ozlNO|G-H1}pDim4JFFn*45=NZUiJBS9J`XDjA
z%%T?#g3OCzd9u6f7qg%rN7-W0pJuy@{67e?yD!ey|9-Zs#y?j9zZ^)Mt3d0tES4qp
zi*wM4NTzJYM(x2+dQ0$0RC-~E5JmOnd=;lC0|7o_q@0TFMleY5JEvPaW8j-FqZGy<
z_6Rb66iPWLp>Tfdz(EeZkq{o!Ibf171>TaRpfEu<DX$CqR@d(akz3~%M`of?du*}}
zzP6e%;{~G5lNh_K+U<D>a2qLBWPXz4)u!0r1u+weJWMp`h|Dy3fVfAQogxDPmAX~m
z3(P*kn+-P<B_4@d&X$_u#dBftCKm1Pk&Px&=($C?4%iS$F{Bo&8IYTXR0K?L#yDO8
zXTD6Z{F=`r?4fK0sn30o3K6&6#@n2^pKA>)m5O+o;hECPkgQ<Xf0^m4oMJH~EFffX
z2I&^{-VCTMw=e0nROb!`$D9^I+L$~Uktl7(NR)>MIu^bFL0~E6aj6{2GE7tpNE;1c
zLZrg5CAi?ZV)BxrV$>(eYY5e39k7%E;8z$BR+8ozZm;vE=zYM-nLg)Vcl2{e<`KLz
zAYV(;UXlMqNf?1@T>84-x}xny_L-Lpz$$A>XQ-U@AyPn9lVqJM?}IwB$|D9P;%Cm`
zVZQ>DvRle?hjUxN77%JcK5pKBl>6~3fS~^7a{sw#S%ALgYb^>RH(}_fbQ0s);;f%n
zpk(dbEPy<aL1ADr_dcW01q%?WsDoh>%4UF7|2PKUqMZuCxzHSIdW2ro*b%RXs1Qfo
zYOaBp+k}{vx|dfU-v6|IkHdXuk3af(P>Q(c4Udlx&7OWy^#dsDE=um;8bvd_y6SE-
zN>*rh)BSZ&(#YhPkR#0l@rua)9c`FX87+=4hoK92{qrZj>%<O#1t7nan*{jI(=dNw
zxWcgSy^=_4v4CqpFi#z=GysK|azf@nodQN=8883wz`6{1<E3}5`WmHm*rc=$Yf7sW
z?Q>4!l51faLVxP4M-<gY``(1?$do06sB>r$A^oVi3T!@^Mp!!Qr6fCyP3qyEhiAnQ
zkc&bgTfb*L$Wjm)wW)-O$$rDqELa`DFDl8wa(6N*&U%jRUxo(C+fcW;?j1H+Vvj~m
z-wsE^rmqiNJFWTQaJWylq(>ei-5l{Hh|5sgQwWM=T9OI@(>wSE>+bpCZy+Nkg$;-N
zltDu#9+XU2Hnx}%jwy>jpxA=4`pnMJJRGQ@XqRLlqX~x&yd(WCw#lyX51@004mrIS
zB*Zc)XM%%dne&jiItf#8rs0ZS!-iq`pjYJ4B2A(cv%8RHqwZ~qd$Q;jRb=}*R9ce{
zLRbr*Cs;NHYfF$q8D-XmCTj^@dCg7XcZ*?iZmTM{c%kfNK1gi$pz;F|3t%KEVy)wq
z(pI}HVLSPp>^A`u$F<NHyU_Sdhj^(E(Yplrrdbh{3xdc<LBy#*R9)w@!(Ux$vaiH^
z8Z`hR3>11v5iGNEhi!0a7MY0_iGZaRI4hScX|$CxpEj?sZAFz@71cD!G-jVdb<vbJ
zDQ!+Cguqe4Jh`Xev#@2POrJniBV+D?!H^8U?0G*jnq4WW3+J=m$E^X4k!{GQ(pm_R
zzyj5r{0GBE+{9a(qCg4(#RLd6fYuZ{Xq+ME0<v28gL6aKSqXot^g$0M$ZU@YKE(Fb
zT&c2^W#fF6xhmi{llVEO>V6~KL^>3UJ72p+v<4|viQF!y%2pyJ>iL$z?V`!cd1-%X
zF{4*a4`10|?hRB)p$)wxFwejkvD6cxZAF`$zvc1+>q{RvmMz!1_{8B<X0*BANg2c<
zEu=u=7gCI2DNFg38cb+_Zq;8W4Rcq@JmcJNWx9Ax(QTN;78k0;-B2p)JsV)8ziICj
z<O3b6@dWQy-@A!$bIPko#a*;35wD~yUam|wXjhO)xyTvqg3_t_%B>b**V3~q;Bt+Q
zxD>X3&cg96>H4YP3BnH$Iu}b|UU;-o;{Xy?`=<uH)+PMw=67fNSwL>rtjJ1MPs!)9
z<T|JT9NE_%KX1Ou-G<Gw^-@9$;d)PzCRpB0T61Z#T#<C~Wht_Vlq_D$1f@(x!-=M=
z^0UgiAj|i1=?z&y4-46G606>hU9lW?oTXnPWKMSiWEsgD7%6n52{42$z?%LvWB<!)
z7x2sc)fPsuhjk|28aA^8X&_?UCgE5sPnP=kw_KOemA1<%Y2a8muR%GtRX=4-QNTAP
zNOuwe0Lu&bH=4~SX@+JmR7W4s#SdIvJk3J9HK2+gJ7SvLm((Z-yg^~}*CVU|GBx!-
z@WAS;1imoF1K9WUGVPjHRER=kD0KSLQly-o4O2#}FiW{^%tF3xoU>&LwEa=3lH>7T
z`rbu+k7}+uEp;5EC1mM@^8~Sm1lDAhRqtg{2~BaRVk9haH0<yHMaLJAs}=_DVY!Gf
zUBs*EEIh7uETD=hWkRmQ5n;engwFPfj<=w9M3>bJhyBC9NY`PE<)rBcD)Ao&s8O&D
zT+<14M#LzhYCRwwV$)JlsQSV`cbf<Pt@_*p<JO+vwyV-9Ibh?}$pU?{D!!kE7?U6k
zoaSgY9937PQy1Tn9gP*d#^Tw&Qx>vVBpSsTXRN^hjevCev8i?_l|oxH(3K2f8`|80
ztksdDNns&7jJg%AG;K`&f;NrDHr=|`&Jf7?owA@+{2p(V;nfhmI;~>?LDM@~X@{+s
z?z_t5&^(4C;r}Aoh6pmobD1Y?t3gfqE0ZjxGLW`i*fnEFJD)@Em1jzY65z{N)t9{M
z>tiqXn+!txYs!NVP$*si<TA7rIm<J}TXa$SePgvSzSgaw^bd|t_j=xu{$n@-Xzj!Q
zAHe?~fvN+d=?K0X!Be9HnEay6AoCD{>8nNINKwHs$I8v|YLRA3036&SH0yj#b2~0S
zly*oNoJR|nr`A)hTGa9R3l+qXKIIteWmKeBhZdu&rX_ilQ$4yK*Reqog51P#AOdww
zMV2ZLh48SBO{Hi-PLq2;L0N{H1({&`vmb_$JOe-!EQZ*G?=&#gG-t0gKAxnOw@F9k
zv30>*03E`hBuG5zB3(p6n|_xW``}4`Ctt5L2}HS)!qg@}R1;(usgM<;0m@C#g5l;#
zPeCRYsitdG&X@flD92{fbv^^?vhcr<hn40pxRiJ~;b+(5p?mZnk+LDY-=;Bn(SY_G
z8F_4tK1t$-o>P1Y#Ilg*szA3=Kv(FgLkPq&_5-CtO(G&@ax8sN>{TULzL6fpp0~#k
zA_+B<)#if;4&}r_gFPk(Qn4G3P?sA@JGlIEG{i5DBnoK7ZBHFc%AWh-feetFqah0v
z%48O=G;Z@Mh*hGCfwUYQc-M3`TFtNsv)++}PL8|<2nz@xR-Xr8NWk+{{}=M^1IhwP
z&9(MPDgeRIpV4gE%cBoK-5Dvtk~uyGtmdvgnz>jyoxJAwpdM}xXq9n5t#;eu<gILe
z52aRl9Zh(z?B>G$XUnhTM*7)Cb9`Jn2IWTHL`f3mbFHsj!vUo+2qyM`YfDNUG|<A_
zJFsi)u6f%H%s#Y9H=L`V-4g$r3O4CQ>cRt45iB|>-<!TumfnG<ThOJS2@ZF-1nkVH
z%>A57odZ~Cxma$}PzzW>jMT}N<JcrDCvx-^s}`g~N|_ehL9w!^e`K*oB^^1!nUXXN
z6`ZRIeEQO$lXXE#f_9W={=IpZwzwi~ECuaN_o2W}3<`UyR`U9O-DnP5SZhPo{l53Q
zJ@&v-1FgaAl8@EE1P?>VWlSY`!-5#3g?Oc?2C=>biEt|I+c`(+TYeexXZd9zeJR%H
zDs)qB-l|R*AuXBvcc|nkD9i-K^Dv3RCuSm*F?Lutcqsy8@0ObKt2@`rl~uU2CbU;L
zGKa88fcX}<BvV9=7tBR)YlYAVH)Kfrut=w7<G%Es!-&q}<yDcDy4^)6lKDmlPFVIJ
zUd=pB^9D&mxiPhLiUg=J>_vJg?dN#q3CEb2js?<bf%YrcWf2taspkTaoj$S@Jxzm>
z1Utvavh}*e-VZLp+eS{gb&*H4uMty#r<u3#i~57ibM1*VOJFfRSHUWJAOr879X!+w
zEu;X(eiRwv;6-Fl!*H@Y%dzF0HOp`qq=_$3Q!-RtB++$aj#MRGKakRiT5s;GM}{oN
zI4MYTu^~(*LX|*8u?57jVr&o)%&}Bws8`9sA(~fn0+-Y`ENH^v0G9z`9i8I$ZX6DO
zE#KOvrVboQC~*Kw;|{!^{H33WT!qDqz0VQcY}^W+s^NV+uX^0RjzQ|fxiERad<u|e
z1bgJ@o!sHW5*#j>h&fgc(4fFm^l@j1KUI&hoKJGFdlAR>G21?-2KXR`u3@@IP3nsl
zTa75ce`v8ag__hzjB$Bf!^uj-ozev?OTmdHVr-4ZaHW(8vNasQU%&ddCEKKL;B?SL
zBEQ-ij=jHn``g-p#|)OpEIz;sfYqfKXoPq~_lSlEMpiOWp<qbgdCZekwI%|=h_hJ?
zq?@^?6e7GW)>IOMAw3$JCgP8u${b08imHQe-ly5UnBjNaylavHm!6Z9Y@0)Dag4XE
zL|>EzFl=n<(iN1-?<o+m;{0a(!I(<SSCS5LVubcRBI=z^WV9zG=de6Wrf!RtO5Ua~
zK!VZA7fY)j=TzJ~7H}mO>C={Ac6p|z(OUBkSVZIZm3B$NAyO#EtDr^c2RZQCXkZXl
zF0~S1Ng)hswx&yLR%w6IR<BsK!*A;mz&iF@)+Z*HxlW5~2nwc+wdUGg6{@N?!I$#6
zc)J|rVE11@9UX1*<&t8f#T^-Mayet&nlB?>t3lc1xOU>zcXYfu&zf6?CYp>~F?nG{
za8hz38~BI;kZ((d+Dd>T21r$gB5t*VUys5eq=dauXzNgMHOZ26sY+<GHtMQ`0xle_
z#G&pXiY?Kw7zsD$3a-*=6@?Zal5Y!GAyY3-FSx4<Qg-wtJ5%&^v!gzxROrmE$QO_l
zET9d9V>VNH5)#HNG!p*cxlYTNlc}R1OLrV(%!@LNkm!7{Vl|LD#25?i>2yyWO=6n*
zI7g9JqLqQW$<Q;Ey{KA$IEFzdWdilGiO%*zA7!+F^#J7sgpOB=6;=_K%wvqbDaR21
zuB{?O8}Vdd6=I<R;51WBb+n8O?+{K<IHlRa8IUeOrN*n<w!_IIOYztE`s3wJMrK>B
zlmNdiV~n|RQF-g?{S66bcp@5`l;(=}BqGZuV;VfHafA;WT~9MNNRmzhi7HM9x@WGe
zpvrf3)$^{dt}bMB(bd(<zV~yKbA%6zmNNSkj{&e8&^Kp-lLzbond;O;j+kY4&6O;3
zt){u;30^&5aXWUTl^f~`g)jajrqfKU@~YAIDq$k4ZkBHHMut>mm8R*Jas48_(dI@!
zlm1f~NI5(jI<Pf?cTw`Jt5*wBnM3%b8~5l}X##6xYGuY4pzUaN8mL5nL%03BO_(;V
z$Bn#YP7pBVRTW#<_YlWbSu9aOuE@D4q|Hd@X2e~<2QI>`zJkGb84E9FJ{rp>D`hv9
z#MV-m{Q4cM+)U1ARUj$SG?rI|vrO<tB0*zOos9xgTvsbHgjkvnB&n53uOURrDor9w
zUPdZzH3TtbITx8pO|vrg%Y-9idnB`Bt7eDQ^(q8b6D=ZTWEVkVCWy`#0%5JYa`M3d
zS@ah&9bb{gJj&I@VG?<tX16_u>fMt`HGpw|Z(vVN$-_ze=Ka(+qgeXXhbu@VO0KGE
zATY<~&}1b=zTe@HMD#0Q%W9+9TK(hj!H>jR;oLz<U1EHg_lXCRE3mrBTx9+v3e#oI
zRz;Litr)KLx7syZa`6aHpiicc_xZqWda^vf0Ajm{Ok*sKBg}toim%gMr0T7GLY$-)
zmO3n`R%8WDVN2FlQcFLTo@Ty+lv#{F>dse4T|`ilJffV^gbcdo3_ohk48Ybqpj-@T
zjLfa;sHqK&MO>T{qmWFzzmbUpnxe|;8Up{TYTwuAwVG(SE)^EkrIcjufZn9iMAZhj
zop@60s}V75Y$p{b*2qy+T#MXYC8tV}T|;7ZOEoLg5$MHzDs3!i0a~dh0JH`yy+Ce2
z0v1sc11}GD60t2tg*wr`P`i_bLC|0xk=bEEq{eWvw1NsQOe<*gkOyodj@GLg|DCa%
zp)N3i7)fkmU8Rv>?5cbQWhD~UnrI4EVFblvXTJP$&8B<$FH}P^y?=!>d^+hAM0KR#
zF#IAHG3s5uDk~+MUAS`)|3+09#!OO^lxf$tux<?A+Jz5zZ>PhecF3_qIv`H+5T*wt
z^~t+sL{?ovrs3M`I>#GTb5#vHT<Go+4Z10r-(}4bb_HT=AAO69LN*yN*8+BlC#m9h
zoO%+j)=V8kL$p6_QCP(TxaV4~ZhY)kGq<p1U-vB`=fD3O9_<{jEQ;w!Q>G<(l0r0c
zq)W~lyN>|26JhGBU`duXC};{K`m??jdRh3l+_%&!oI{$jDa<3yozetEjFY5u-$WIX
zYC&@KGIk1xn0#a8o9e0y<-fJ_^ees@EAP#5iS8&ft4@P!76G+gWE#z=amdu60J?i4
z{1B%;*39Fk=qBAsHc1csB|fL@me=TmFBjgW@$=r){Jd~4xTY9@R(h7Da~fk;Mngk+
z8y#@o7Bz0u!l!oRZc$4`bPUY3drK8BlY)|wZ^+uHT&F#G3-JJORoXYLWEt&=<n{86
zAcH-y7gLZZEOlsy36-dN{FQ)7E_520bs6g})waAoC9OGhMR{wtR6<<_i;RT|rqw6+
zKt+k!V32cB37FABnBia(j2Sgy%*SeSYt<#t_g<r`L9*vag&qR}VJw}_;L3D(T~p$g
z%x1MTz-lr#2@BS|`sV8R6|0__BI#Rqx-=L|VbZE*t7)~sViiND6X4fnGwUUCfU57k
zm)@bBpHjxt4y6-%GV1Q{PmH?zhaDL7U7RtSqwS~(ZO5CS?YIeTFD+%=4xB@af`e5;
zS%50M*)cV&J($e|vwvW0wX`N;h^|I@#{yyXIQMIasbl$Nxd;;(DNST1ke0$7I$)r(
z+R`2FU}msQ3=NK<B3&v^iw?-f?{OqWxMeH|cc7FHo5x<7$6&j$Y#U2GLDj=p3QXF&
z*sLMrauj3W4$ue@X$HGA-T2IxuO5N3V}rBO!7)k0)%n@P4{p!T=s(Q5F9KcFp`jKS
zdWCL1C)XVp#wr>9r4ruVym};+cm^}P(`ZWusJ+gjTkn;~7jV&KT*XHKTeMu_70wb(
zuObw=7qXrs`uMXLX|ofhiHMezToajRL)!(f_Gju&43Pe@1T>W%Ya-1eUMenX33-;z
zE#r)F6e67P_G*E?5vsFe#bHDoO#*TP?oKV2?u~M3$SArc=F+HmJk&(7h#vwbvHTah
z!ruj>JbIo)R_I!kfNx0<v133!yLWn}4D6ul^iU+5>*wXSfM38HbE<2#ME(=|s>F>j
zRRll1{`mUm5AP(B5o%169b#el$!n^0CCQr+;4o^=uEd~%@hQo%A<gJ<8HuW2y3eT}
z(}-j@INDqMgkg5FoLQy7lU;7AwR)BWz%;>K<Bra-N7wm<RYc_kuMvFb-GKUc@5^#D
z##CB-k77zK%Qiv1Mp~y|toVRw6w6Qvc&5J$r`7$OY`&rU`+KwA|1Tr^*RI5|T_h7`
zZx26e*Nigz+%@=#{Ek~EGTHhhq+P}kGGvP^p13G1%ct%H%J@e7l+|Ln$KT-6B%}p_
zsFJ60R?q%JKFOCJ9!x;I#~hQ4K3v3@H;>6C{P!72o;&zCKfgKKwf{WZ<wq(bWI%wj
zmJxosXdYtMUg7!K3W&}*hac0&GbzLGFodpeMuYu|Y+-yyKrmnWIZ~4o*93=E&BV@o
z0iswjC;$Iq2^SRWv;rA%W;n}X4}m!UQD7Rj_X$r&eszXoE36sTCO@4`m;Owg4@XDG
zaMJYoES6v47WnfSoYqbhdi;+(h2<g&Uz{Hw9m0bkbMbEgj_OgvS19Nh)EIpS)X*la
zmc#wj1wG!CNr=A0HU#N1#)lXfa_cf)c1JO`$u**widFy;=^|Q8p?9x-eKXwOzZsq=
z0Z+Pub(Dk3fb*xQF-U&O?p2fzMG_Th)`oVkF=RIbX)r~Xa~=0a<0|rv#)pR$<Qt6<
zr5;4S;b=4*43CGKVBh#VVBdPO@~#Wsy!Eq;pxHFW$IC85DE*^lkc$@lZJ}@DX5_n&
zMm9s!YFNl5aBFyQGaejoR|wrQMYGE9s$vm~0_{ZZZMnb*nz--%hzd;|+iH|<IIbew
zaJaYM0ojiBKoC6M-vrn8HpVs3ex_)PeLS+g6!YKxFxrg3xI=)-y7ECJZo}B?ji}UK
zGop~`(ZP$)s!AdQ=VJy+kXQf%yNj@PW_aRVc%RB-=N*%)*9n&Y3B3y$(o;i`hX5!-
zq+>Mz{seN|8cy+Rr>j;b^HrM6TE_TaodAfqKu??lC;}x?@4?o8NweF04oWqE=|Y*g
z$Y==~hmHjba&POGi<ej1J&*5zHy-T`aGZC7@-3)lyn5}*ymq#0rk2-cjMaagTQeRF
z^rd<DChUKEs{4n#2>UG*Gf{07LUWiz3xBlg(qEbkJ=CF{*n>9W_){MqeHDS`j{PMJ
zqSUCQ#gd~uFM({DJXO%#=qrT?hx=7TK+&-?A{=}dk_dW1+kO7A3nIK~zsoJ$yM=2W
zH{-x3%L*se=vP*N!CO$e(rm?^PL42<@f6QR^pK|dqE$%w-08Li^F&}Y-cw}kmq6Ou
zq?}IgXJrxlZHM)$%Md@tem+a@pCaX7i+j-~XT9C|rOOci9xWD|f^sbb5dw{f+RS|j
zd_*QQO4G$<YT_&s`VGj-44s-cT-lX~5(rxY>tbZmy0!AQu5PYhZZ|NEy}b&l*f&Bw
zJq-HD`CCMpcp|b{4+tJ`pkyV7!e2X3l*8}9TAao;Pq8>~$WqG)uQy^ZEdsgAI2PR*
z$E6d)E)ToLI#4cfdB?b&ckEY1@59jstmEiA9CX^QbA5Z2tA%;EQZ&^vl=;kP_?^L>
z;(+E-)zWW1^h+3k?lrz;Zuz@tD!dyEZU`DIN&EZN;~$PHr#?IwA5=B2gS{@crQu+I
zbg+MTyot(n{I7>3<RNV1Hm}}x;(9*l18t&iCqnc~1nA>0+U2B9eDA%$<d7#B*{;*s
z+z<U6WOp%titzrtcdx~qdqqi>DhTv)?o;qu7X5BQOjE4b+D%|5lPxJ=)oWI#vQ^qT
ztkTx;c+kaudvJKLH#*qlin&oF-|CK)zl+<f4aMJgL5R=(Do)!Zs*j!Y0IWXQDwJKn
z>4cW-+HD)-S)2^-IYmy{b1ArwT=>!<TZs)<xm=Y9eRK+)cz%}oD{r1I$nY!i`d(JE
zb-#YRneMQP-c`}zV85z%9(O_Rz2oD(;n7CO{T&=vFSDqX?6#N~+yO5AHn&$Nl6&KW
z#?wY7J2^)DOq6XK__^B-0L=msw(VnsYtil)c`xo&3SN=wCMT|OS_Kfxya)PI)#<Pz
z)l~S5Owen?`-_G5llPVqCQ*Q{OV4{%`}E#uSV8nolG1269PJ-$<c1r52S41)vRz;5
zBw#!*Q$K9Rla~?bq7y&ts4czcqBuV?O4OP6aKC!U<57)Sk2*P92gidk>P#D)@^|oe
zJ^7S>7TGNE%eFCESar)GxA-3}IU1e<;1-jJtsuG$EGM2&#uu{DdT#UO>z6}s>(%Jy
zc(C2h_g*h1Y2W*)juJH|=y-$*d<7?lT_m`p!T5N8xEWGx?wEQqQe2i)xV!D<cJg5U
zJLm?JHW{!*QEFikgJ!IF1KTX9a2X}zlo;DKJHZN-K8RFgo6RCq^<1%uFv=;<;_(vD
zaa963s7OF?_p1`n@Su|teRyy<Mn<y%etZXajSOIJIouD=Lf0{>`h=>yDu+uY(DD6G
zR)&Wu4kkPH%FGi=W0NUQ+>xT~(CNI9@pCb*4($Kns4C|k9CvX}ACLC;Na5SykT-Y5
zOw#oB2`5aovY#&C*_JS6yK-tf8#t?|?Tqcxpr0>M7Gv}6=-||Q>&Gzx&3?>!4Q<eu
zpDuge%QW!4Pkw<#1>d~0U+HM39)1Tq_1iLwTm2B%E#_r+h^y?b)fL`}X1|Yuc^W^_
zuJjD5`wd9-X&Wrwbc_GO;w!)eo+Q9NO8u{FWp&$nsxMm(WitbUu7>88S<#g$KpFa&
z#;Zh7d8HH111&Nt8K-ioOwl4$-%*z4=76$A@#_6WJL$XtcKK7^BevPhF%N1o)bQ}Q
zDnkvA23?rs@zMD3XuO&F^c~z0kJNtLL9YGaok-hc>rM#x*<bj#ZE{#ka7%?Mh^UR#
zzv>hZcbz7|ZP7|qUTvl$47+$%q^uXDl#D^y&tWvfxOQDu2(cf~8Y|n-C10m7)}t`?
zYvRCYP<5e>23-ta!{dYDXm2ya*LU!oJPSTQ9T}hDMm!Q7pZQWeXZ-pDYBL(ZkmxG`
zS}M*|WwGLLL~WXjhE+<Yv=V=~AgxrtM)Ygm#ct7@!-LV_;Bce3_3z;Kc$ORTOSDG*
z>SUh%JQuBE&&@^=P)99Ma96ry;f^kHC<6(OPNzUjMYaUZyU2BibxJ%Yv;vB|DXAUS
z@aV9LMu%Mp?dbUE_;9chp&fk(55sdvo!`v`x0&er)+tD+7&J2FfEp-Jf+C^K(wC*&
zl~{X%NtHVvFFz^!->(K28+>Elmdd19V*%X2^Cc|+dsP|szzUms$k(+QLHiCKfakzO
ztJBD4n(1b?Ky(+CxkMS=29tB%O(9IT{jM|w%HAI_7wK!qj?Wp@m}y3hI*l6dSBcd4
zVARP5cs$q}9c+}lF!~OTgLlGv6U*4Tqtjq6o)T8`)-Qv$P|{98{2!7+Bv{m?Z5f?p
zvX5ER9#`85-Cv0$6|K&YPC*Bs{6&=KZTsjX0^F>y>{5|E=}ev;iM@U&oq+Lt-X)+N
zEt=y+rKTwviqW9V1=To62w2aIk~GnRV%kh96AJ#uiX`=+MnOA?e7H_|A#?g)`au-O
zCH_&&Gx%eTO#Slu(!0hAvy(<gbS;dp;(i!u!n20V3`fU6Xf|>+jJ|^_>e^qm@r_Ow
z@>zuP=hW2>9!LbGgw8p(lcu^KDl}9gnaFgonl2e=CGQzl*O<olO6KuBUIePS$A?Fw
zx*c!aMF=`N9*qZ^`9epV`?{Xy2EkrcZP7ZN@)NknRa?yXBkjM&V<qE$b@v!dyK|~O
zsf0VP)z9+b3sAYR?pNt>8k;+%o=gs&F$TZIXma7*l-ZqFIr(}BNh;$^DH%>yPi~;!
zcB$Od4GP$Qla@&+Yf0+(eUYi)?X__!GVQBM^hZ^1<OoCfJ4ft~zJt%}F{fD(QnGTT
zh0s=cr<3#Rb2^>+NxJhsO53H`P9wb&!Q9W1HpaDCd|V5^c%yS~r90d%&D&bh_YGC_
zZNt-#F-Fr-Wt^4-lD%P-{vXxQ?r7Y}>U{ujj`ue5f{!+L<2*efQ5MX6O33Fgpqk8D
zLy<e>>%H}}MSGreCsuS*q^mZx-XIijO>(T=*P}ussTS7jFCg_s=#{q;CVN=HQnx$h
zQf2UJoO@fZU)@}7KUdb|rJws@^rYC^>lMuQBu&K4^6RU+yKm)*Z<;24a6Eckn)s+@
zMI9Y)k|sX-4o;}`={MgvD7D=hbx{+|o07Xc-4LXqh~cwDr%7x1uqvS+jyhFr8655%
zA0KXHFdu&hzsnyDDOw`hZ%hLcU9TQ4ZgUAsRp*kUFmkU<O6(>QuVixo({+(9yTLTo
zbLV0T>|d1zO<<Agi>HKf9M;(XaCBHx|3*i9ox<Sv27AM!&FTSt2Y1YKxU8RM!gy6q
zvh>bxGmmzXlfOxF>+3H?1_u(aRQYlhhLO+9>69XJ!;(69boD!XFAw^Mk0*t-+G$6J
z)xg}Nqi)%)!{dYThNSQ*9-APIV}BxByIz`p@t0UO4`5L)kfYNx57C_qkO^SoAm-Eu
zt`DGVAb*wxb~*9o*PTl%gzP1E$$o3KuzgGgyJ4-c(|BAxhHg2=!=usR@qQPZ<*uoJ
zxJf0%@po{fe6wKc=Z>ZRmyDzye+L`(A5B2{dQ$7ZlFVw6h+VG!OeM;5UEx=kG=FmI
zhSywuH&;#TbqDt0(C+`qmAnH41hpIqsn?3zS(+jvdQ`T#sTe>1VmH3lEn4Ng4|EH0
zB`j6jT6POzBed!*gYBC0CHAzG-FbSl0JUlQuEbbXTX>;s3lE0_?sH%NWwg&7FDEF+
zf=ZkfLbkO4aU%8uF4o`Ji-{mK#*QHLX;1eq!ne7DUzn<{g`3n3Z|cx6+5|5e8#G9~
zFwl}DSjSD}-Y_q`<=j`z67aE4?_Sr7elJfnD05T5`3F<g8U8n!1VI4*o|*^Pc6vBI
zr4Qr5_|$$!KL><piFw}mPrd#${W@}g4cuSH?yuI!O;4#GPn#-aoAq7VUk=T~S4|JU
zvcDV;P9ITa`-g^iCJpZd&O5`-bGh%027Ud@=X7~u*3WISRPiy{SpcROhI_~26d*Y{
zub(S|c5-fpd`SiQ^`Ixz<{9XT)xJPBWi6R-nF-91P0n*3*DQ`|x7Sa|d8uu6`svLt
zXj3nWr?;Zu_F3`*BCYY`K+;_9O9q4H)P^S)Gim}vMenh*8eY%*#A`V4ZT(2YmHoT=
zD(?3q{p~;OCVWi~TTcS<YrxK@PMVn$)=&Rt9^L%F#tjFyx5<I+H67SR;x@_(%fM|X
zcT#DlWp@%M&ZRG-{9g$FoA_2ye4A_W&mj<2KL{qyW13vImiliaWW5at+1{Y{OoXiI
zo$wLFFRT!;uImdPG1o)y1P`unI8Z`YUg%au20P%7;htoY;b9{?`s*Xs+VoD-T774&
z<GsH3kvbouK#sOS*=ikGKrqLy7NJjVZ>qnel%X>&>&**QpBC(bP#@d0<NJ{4#`n4I
z4rvG<_t9jsH^r`ux|5wEht*aSX3v(yBlU}k0C6^v0af@#xm*g<yR_F56+>Mu(4j<j
zCVe3Cf|_HK>@803twBxWkw8_4YPdhm3t~J1-heN0^D$rB!MF*mlGuc47-wL%IR)Ko
zprHGkP|*D*3d&tFwp33H{%>7uvwz-J-9J7%z16sR6E_468MNHo&w9j1B%0YhKovo@
zn~1qB#;{Ma(_(B*KQY}io!W}h^y_A)i7L&b8#{<eJjv~qa~m<cqiOdhPO6RATIH-m
z8!-&+T62JtM;xFF4%#{QJI-aT4L`dDZN%HccEj+5>ch8<W+QALCtq5xca9qMPCPv;
zg1n(^IrV+Dnhe^l{d{U#nWVBU0=Fkh%Ebg0KTTt}`|7|F+d>LA^!eAi@$KGkxZMYv
z-0p+UEddb%Z;;sLbIYh8n*$ol19tos*jj4B@Wh6IXpPX_0WHkgQrCEbrJ}HPFx8Of
zK3GHpYhxqLq|!||lvO)k$lCF!pJ?iUlpvuCNEyvIXheNxt^T&M=PYY4rirBK*G&6U
zooOSB>1lk?C8lVR5_M$P)in(xT_cLI4A~9qeE>D`9$2s+t1x0Y1FTmYE>w3+UZxR2
zu$8LS%y~!(^zfK9Xe^kOu${!}ygNMB`@vJkEe+X>Bilo11CHshr;JmLhMFNIQ7mJB
z#l7|(3zA*~^m#*vyq|&Go-`1Q*;=qKe;n`euLt~hpMoH`>8nc;bpK0E3{hKp_v1Ug
zsr}L3kt5<eKMwcCxYRNU1vgh0;pR8{py6g8ZgR5^H@exnE9BR`+Yj99hKuWM<*>>%
zICId1GpbKKd9ZlR1cOJA*u#c{INIbOjvmu>b$B4}oC7J4!3DKn)u8XSc3joK%z<{m
zGVmO3MUnUPX(>Yc4WO<Lzp>M++a}YM)09STBze==wbnM%&t?BttFf!WvbuoQ+R|0l
z6nq9b>tc~g>0$p+1Hd`w9!NkrniyhwGL;r+?1F5aE2c8)E4norZ}&7a-Wm?zuV4LJ
z?1l_Lx`FG7^jm(lH5`K~xi8OvzRPPZK6@lD<A^zRRmL`(#7ceVF|X<_P)mXlXR{dO
zBT7st0JBN>+hSQeF>PjhG&D`bA3v3O9;px%{NjC@-Gf#KUvYP7*$fnmliFq&wR?Po
gH>%o#Zhpm&tahKSdq;Vy&j0QI2Pk>-Egzu)0Q2fAtN;K2

literal 0
HcmV?d00001


From e309d3ac5c38da52f877c08405367fb52ffd83c2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Tue, 8 Nov 2022 14:32:47 +0100
Subject: [PATCH 257/269] Update Lucene to v8.9.0 in ES consumer.

---
 jcore-elasticsearch-consumer/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-elasticsearch-consumer/pom.xml b/jcore-elasticsearch-consumer/pom.xml
index cd72f8d4f..6e3b89942 100644
--- a/jcore-elasticsearch-consumer/pom.xml
+++ b/jcore-elasticsearch-consumer/pom.xml
@@ -32,7 +32,7 @@
         <dependency>
             <groupId>org.apache.lucene</groupId>
             <artifactId>lucene-analyzers-common</artifactId>
-            <version>6.4.2</version>
+            <version>8.9.0</version>
         </dependency>
         <dependency>
             <groupId>org.assertj</groupId>

From c0b614300dfe1e26a5e2e50563bf87d3cd05a6c2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 08:21:22 +0100
Subject: [PATCH 258/269] Fix a syntax error that is a consequence of the
 Lucene version update.

---
 .../julielab/jcore/consumer/es/sharedresources/LuceneIndex.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
index 907c333a6..0a889a6b9 100644
--- a/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
+++ b/jcore-elasticsearch-consumer/src/main/java/de/julielab/jcore/consumer/es/sharedresources/LuceneIndex.java
@@ -158,7 +158,7 @@ public void open() {
     @Override
     public int size() {
         if (iw != null && iw.isOpen())
-            return iw.numDocs();
+            return iw.getDocStats().numDocs;
         else if (searcher != null)
             return searcher.getIndexReader().numDocs();
         return 0;

From d2fd352845a95621c26ea4f4efdc11992e078fe0 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 12:39:44 +0100
Subject: [PATCH 259/269] Count in newlines when deciding on white spaces for
 PMC CasPopulator CAS document text generation.

---
 .../java/de/julielab/jcore/reader/pmc/CasPopulator.java     | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
index 373e9a5ff..5841694e8 100644
--- a/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
+++ b/jcore-pmc-reader/src/main/java/de/julielab/jcore/reader/pmc/CasPopulator.java
@@ -20,6 +20,8 @@ public class CasPopulator {
     private NxmlDocumentParser nxmlDocumentParser;
     private Iterator<URI> nxmlIterator;
     private int truncationSize;
+    private static final String LINESEP =System.getProperty("line.separator");
+
 
     public CasPopulator(Iterator<URI> nxmlIterator, Boolean omitBibReferences, int truncationSize) throws IOException {
         this.nxmlIterator = nxmlIterator;
@@ -156,8 +158,8 @@ private StringBuilder populateCas(ParsingResult result, StringBuilder sb) {
                 TextParsingResult textParsingResult = (TextParsingResult) result;
                 final String text = textParsingResult.getText();
                 // some special handling for documents that contain formatting tabs, newlines or no-break-spaces in the text
-                boolean textBeginsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(0));
-                boolean textEndsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(text.length()-1));
+                boolean textBeginsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(0)) && !text.startsWith(LINESEP);
+                boolean textEndsWithWhitespace = text.isEmpty() ? false : Character.isWhitespace(text.charAt(text.length() - 1)) && !text.endsWith(LINESEP);
                 boolean sbEndsWithWhitespace = sb.length() == 0 ? false : Character.isWhitespace(sb.charAt(sb.length() - 1));
                 if (textBeginsWithWhitespace && !sbEndsWithWhitespace)
                     sb.append(" ");

From bcd071f4b61efbc3eb74aa29643a658b7cf8105d Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 14:54:49 +0100
Subject: [PATCH 260/269] Fix bug in LikelihoodAssignmentAnnotator where
 `multipleLikelhood` was not compared but assigned.

---
 .../ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
index 622c6cded..a4f7c3985 100644
--- a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
+++ b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
@@ -116,7 +116,7 @@ private void assignLikelihood(JCas aJCas) {
 			// in the sentence and create the corresponding likelihood indicator
 			LikelihoodIndicator assignedLikelihood = null;
 			if (sentHasLikelihood == true) {
-				if (multipleLikelihood = true) {
+				if (multipleLikelihood == true) {
 					// determine the lowest likelihood category in the sentence
 					NavigableMap<Integer, LikelihoodIndicator> likelihoodSubMap = likelihoodMap
 							.subMap(firstLikelihoodBegin, true,

From 8df4f6b0d887a82cd3febfd1b7ab7ec55db80d92 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 16:29:09 +0100
Subject: [PATCH 261/269] Add the `next-concept` assignment strategy.

Christine Engelmann, who had developed the likelihood scale, the likelihood dictionary and this component, told me that the likelihood assignment strategy used in this component performed a bit worse that a simple 'assign likelihood to the direct next concept' in her evaluations. Thus, we added the alternate strategy. The strategy to use is set via a component parameter.
---
 jcore-likelihood-assignment-ae/pom.xml        |   5 +
 .../LikelihoodAssignmentAnnotator.java        | 439 ++++++++++--------
 .../desc/jcore-likelihood-assignment-ae.xml   |   9 +-
 .../LikelihoodAssignmentAnnotatorTest.java    |  38 +-
 4 files changed, 296 insertions(+), 195 deletions(-)

diff --git a/jcore-likelihood-assignment-ae/pom.xml b/jcore-likelihood-assignment-ae/pom.xml
index 0ab512b9b..7876117d8 100644
--- a/jcore-likelihood-assignment-ae/pom.xml
+++ b/jcore-likelihood-assignment-ae/pom.xml
@@ -27,6 +27,11 @@
             <groupId>de.julielab</groupId>
             <artifactId>jcore-descriptor-creator</artifactId>
         </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-utilities</artifactId>
+            <version>${jcore-utilities-version}</version>
+        </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-types</artifactId>
diff --git a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
index a4f7c3985..f9f862a44 100644
--- a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
+++ b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
@@ -1,209 +1,272 @@
-
 package de.julielab.jcore.ae.likelihoodassignment;
 
 import de.julielab.jcore.types.ConceptMention;
 import de.julielab.jcore.types.LikelihoodIndicator;
 import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.utility.JCoReAnnotationIndexMerger;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
 import org.apache.uima.cas.FSIterator;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.NavigableMap;
-import java.util.TreeMap;
+import java.util.*;
 
-@ResourceMetaData(name="JCoRe Likelihood Assignment AE", description = "Analysis Engine to assign likelihood indicators to their corresponding entities and events.")
-@TypeCapability(inputs="de.julielab.jcore.types.LikelihoodIndicator")
+@ResourceMetaData(name = "JCoRe Likelihood Assignment AE", description = "Analysis Engine to assign likelihood indicators to their corresponding entities and events.")
+@TypeCapability(inputs = "de.julielab.jcore.types.LikelihoodIndicator")
 public class LikelihoodAssignmentAnnotator extends JCasAnnotator_ImplBase {
 
-	private static final Logger LOGGER = LoggerFactory
-			.getLogger(LikelihoodAssignmentAnnotator.class);
-
-	/**
-	 * Maps sentence ends to sentence begins.
-	 */
-	private TreeMap<Integer, Integer> sentMap;
-	/**
-	 * Maps concept mentions to their begins.
-	 */
-	private TreeMap<Integer, ArrayList<ConceptMention>> conceptMap;
-	/**
-	 * Maps likelihood indicators to their begins.
-	 */
-	private TreeMap<Integer, LikelihoodIndicator> likelihoodMap;
-
-	/**
-	 * Quantifies likelihood values.
-	 */
-	private HashMap<String, Integer> likelihoodValueMap;
-
-	public void initialize(UimaContext aContext)
-			throws ResourceInitializationException {
-		super.initialize(aContext);
-
-		// ordinal scale for likelihood indicators;
-		// used when there are multiple occurrences (the lowest category is
-		// chosen)
-		likelihoodValueMap = new HashMap<>();
-		likelihoodValueMap.put("negation", 1);
-		likelihoodValueMap.put("low", 2);
-		likelihoodValueMap.put("investigation", 3);
-		likelihoodValueMap.put("moderate", 4);
-		likelihoodValueMap.put("high", 5);
-	}
-
-	@Override
-	public void process(JCas aJCas) throws AnalysisEngineProcessException {
-		assignLikelihood(aJCas);
-	}
-
-	/**
-	 * If a sentence contains a likelihood indicator, this indicator is assigned
-	 * to all concept mentions occurring in the sentence. If a sentence does not
-	 * contain a likelihood indicator, the default likelihood category (i.e.
-	 * 'assertion') is assigned to all concept mentions occurring in the
-	 * sentence. In case of multiple likelihood indicators the lowest likelihood
-	 * category is chosen.
-	 *
-	 * @param aJCas
-	 */
-	private void assignLikelihood(JCas aJCas) {
-		buildTreeMaps(aJCas);
-
-		// create default likelihood indicator for assertions (has begin = 0 and
-		// end = 0)
-		LikelihoodIndicator assertionIndicator = new LikelihoodIndicator(aJCas);
-		assertionIndicator.setLikelihood("assertion");
-		assertionIndicator.setComponentId(this.getClass().getName());
-		assertionIndicator.addToIndexes();
-
-		// iterate over sentences
-		for (int sentBegin : sentMap.keySet()) {
-			int sentEnd = sentMap.get(sentBegin);
-			boolean sentHasLikelihood = false;
-			boolean multipleLikelihood = false;
-			Integer firstLikelihoodBegin = 0;
-			Integer lastLikelihoodBegin = 0;
-
-			// determine whether the sentence contains a likelihood indicator at
-			// all and whether it even contains multiple likelihood indicators
-			firstLikelihoodBegin = likelihoodMap.ceilingKey(sentBegin);
-			if (firstLikelihoodBegin != null) {
-				if (firstLikelihoodBegin > sentEnd) {
-					sentHasLikelihood = false;
-				} else {
-					sentHasLikelihood = true;
-				}
-			}
-			if (sentHasLikelihood == true) {
-				lastLikelihoodBegin = likelihoodMap.floorKey(sentEnd);
-				if (firstLikelihoodBegin == lastLikelihoodBegin) {
-					multipleLikelihood = false;
-				} else {
-					multipleLikelihood = true;
-				}
-			}
-
-			// determine which likelihood category to assign to concept mentions
-			// in the sentence and create the corresponding likelihood indicator
-			LikelihoodIndicator assignedLikelihood = null;
-			if (sentHasLikelihood == true) {
-				if (multipleLikelihood == true) {
-					// determine the lowest likelihood category in the sentence
-					NavigableMap<Integer, LikelihoodIndicator> likelihoodSubMap = likelihoodMap
-							.subMap(firstLikelihoodBegin, true,
-									lastLikelihoodBegin, true);
-					int currentLikelihoodValue = 100;
-					for (int i : likelihoodSubMap.keySet()) {
-						LikelihoodIndicator likelihood = likelihoodSubMap
-								.get(i);
-						String likelihoodCat = likelihood.getLikelihood();
-						int likelihoodValue = likelihoodValueMap
-								.get(likelihoodCat);
-						if (likelihoodValue < currentLikelihoodValue) {
-							assignedLikelihood = likelihood;
-							currentLikelihoodValue = likelihoodValue;
-						}
-					}
-				} else {
-					LikelihoodIndicator likelihood = likelihoodMap
-							.get(firstLikelihoodBegin);
-					assignedLikelihood = likelihood;
-				}
-			} else {
-				assignedLikelihood = assertionIndicator;
-			}
-
-			// get all events in the sentence and assign the corresponding
-			// likelihood indicator
-			if (conceptMap.ceilingKey(sentBegin) != null) {
-				int firstConceptBegin = conceptMap.ceilingKey(sentBegin);
-				if (firstConceptBegin > sentEnd) {
-					continue;
-				} else {
-					int lastConceptBegin = conceptMap.floorKey(sentEnd);
-					NavigableMap<Integer, ArrayList<ConceptMention>> conceptSubMap = conceptMap
-							.subMap(firstConceptBegin, true, lastConceptBegin,
-									true);
-					for (int i : conceptSubMap.keySet()) {
-						ArrayList<ConceptMention> conceptList = conceptSubMap
-								.get(i);
-						for (ConceptMention concept : conceptList) {
-							concept.setLikelihood(assignedLikelihood);
-						}
-					}
-				}
-			}
-		}
-	}
-
-	@SuppressWarnings("rawtypes")
-	public void buildTreeMaps(JCas aJCas) {
-		FSIterator sentIt = aJCas.getAnnotationIndex(Sentence.type).iterator();
-		FSIterator conceptIt = aJCas.getAnnotationIndex(ConceptMention.type)
-				.iterator();
-		FSIterator likelihoodIt = aJCas.getAnnotationIndex(
-				LikelihoodIndicator.type).iterator();
-
-		sentMap = new TreeMap<Integer, Integer>();
-		while (sentIt.hasNext()) {
-			Sentence sent = (Sentence) sentIt.next();
-			int sentBegin = sent.getBegin();
-			int sentEnd = sent.getEnd();
-			sentMap.put(sentBegin, sentEnd);
-		}
-
-		conceptMap = new TreeMap<Integer, ArrayList<ConceptMention>>();
-		while (conceptIt.hasNext()) {
-			ConceptMention concept = (ConceptMention) conceptIt.next();
-			int conceptBegin = concept.getBegin();
-			if (conceptMap.containsKey(conceptBegin)) {
-				ArrayList<ConceptMention> conceptList = conceptMap
-						.get(conceptBegin);
-				conceptList.add(concept);
-				conceptMap.put(conceptBegin, conceptList);
-			} else {
-				ArrayList<ConceptMention> conceptList = new ArrayList<ConceptMention>();
-				conceptList.add(concept);
-				conceptMap.put(conceptBegin, conceptList);
-			}
-		}
-
-		likelihoodMap = new TreeMap<Integer, LikelihoodIndicator>();
-		while (likelihoodIt.hasNext()) {
-			LikelihoodIndicator likelihood = (LikelihoodIndicator) likelihoodIt
-					.next();
-			int likelihoodBegin = likelihood.getBegin();
-			likelihoodMap.put(likelihoodBegin, likelihood);
-		}
-	}
+    public static final String PARAM_ASSIGNMENT_STRATEGY = "AssignmentStrategy";
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(LikelihoodAssignmentAnnotator.class);
+    public static final String STRATEGY_ALL = "all";
+    public static final String STRATEGY_NEXT_CONCEPT = "next-concept";
+    @ConfigurationParameter(name = PARAM_ASSIGNMENT_STRATEGY, mandatory = false, defaultValue = STRATEGY_NEXT_CONCEPT, description = "There are two available assignment strategies for likelihood indicators to ConceptMentions, '" + STRATEGY_ALL + "' and '" + STRATEGY_NEXT_CONCEPT + "'. The first, 'all', assigns the lowest likelihood indicator in a sentence to all ConceptMention in this sentence. The second assigns a likelihood indicator only to the directly following ConceptMention in the same sentence. The latter strategy fares a bit better in evaluations carried out for the publication of this approach. Defaults to '" + STRATEGY_NEXT_CONCEPT + "'.")
+    private String assignmentStrategy;
+    /**
+     * Maps sentence ends to sentence begins.
+     */
+    private TreeMap<Integer, Integer> sentMap;
+    /**
+     * Maps concept mentions to their begins.
+     */
+    private TreeMap<Integer, ArrayList<ConceptMention>> conceptMap;
+    /**
+     * Maps likelihood indicators to their begins.
+     */
+    private TreeMap<Integer, LikelihoodIndicator> likelihoodMap;
+
+    /**
+     * Quantifies likelihood values.
+     */
+    private HashMap<String, Integer> likelihoodValueMap;
+
+    public void initialize(UimaContext aContext)
+            throws ResourceInitializationException {
+        super.initialize(aContext);
+
+        assignmentStrategy = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ASSIGNMENT_STRATEGY)).orElse("next-concept");
+
+        // ordinal scale for likelihood indicators;
+        // used when there are multiple occurrences (the lowest category is
+        // chosen)
+        likelihoodValueMap = new HashMap<>();
+        likelihoodValueMap.put("negation", 1);
+        likelihoodValueMap.put("low", 2);
+        likelihoodValueMap.put("investigation", 3);
+        likelihoodValueMap.put("moderate", 4);
+        likelihoodValueMap.put("high", 5);
+    }
+
+    @Override
+    public void process(JCas aJCas) throws AnalysisEngineProcessException {
+        // We have two strategies available for the assignment of likelhood indicators to ConceptMentions.
+        // Either the original one, implemented in 'assignLikelihood', where likelihood indicators in a sentences are
+        // assigned to all ConceptMentions in the same sentence or a simplified one that, according to
+        // Christine Engelmann, actually fared a bit better in evaluations, were a likelihood indicator is only
+        // assigned to the next following ConceptMention, implemented in 'assignLikelihoodToNextConceptMention'.
+        if (assignmentStrategy.equalsIgnoreCase(STRATEGY_NEXT_CONCEPT))
+            assignLikelihoodToNextConceptMention(aJCas);
+        else if (assignmentStrategy.equalsIgnoreCase(STRATEGY_ALL))
+            assignLikelihood(aJCas);
+        else
+            throw new AnalysisEngineProcessException(new IllegalArgumentException("The " + PARAM_ASSIGNMENT_STRATEGY + " parameter requires one of two values, " + STRATEGY_ALL + " or " + STRATEGY_NEXT_CONCEPT + " but was set to " + assignmentStrategy + "."));
+    }
+
+    /**
+     * <p>Simple assignment strategy that sets the direct nearest previous likelihood indicator to each ConceptMention.</p>
+     * <p>No other ConceptMention must stand in between because then, a previous ConceptMention would be assigned the
+     * likelihood indicator.</p>
+     * <p>This strategy was proposed by Christine Engelmann because it fared a bit better in her evaluations than
+     * the alternative strategy implemented in {@link #assignLikelihood(JCas)}.</p>
+     *
+     * @param aJCas The CAS to do likelihood assignment in.
+     * @throws AnalysisEngineProcessException If the creation of the {@link JCoReAnnotationIndexMerger}, that is used internally, fails.
+     */
+    private void assignLikelihoodToNextConceptMention(JCas aJCas) throws AnalysisEngineProcessException {
+        // create default likelihood indicator for assertions (has begin = 0 and
+        // end = 0)
+        LikelihoodIndicator assertionIndicator = new LikelihoodIndicator(aJCas);
+        assertionIndicator.setLikelihood("assertion");
+        assertionIndicator.setComponentId(this.getClass().getName());
+        assertionIndicator.addToIndexes();
+
+        for (Sentence sentence : aJCas.<Sentence>getAnnotationIndex(Sentence.type)) {
+            // We use the annotation merger that gives us a sorted sequence of annotations of specified types.
+            // Then, we must only assign for each concept the directly preceding likelihood annotation, if there is one.
+            JCoReAnnotationIndexMerger merger;
+            try {
+                merger = new JCoReAnnotationIndexMerger(Set.of(ConceptMention.type, LikelihoodIndicator.type), true, sentence, aJCas);
+            } catch (ClassNotFoundException e) {
+                LOGGER.error("Could not create JCoReAnnotationIndexMerger", e);
+                throw new AnalysisEngineProcessException(e);
+            }
+            Annotation lastAnnotation = null;
+            while (merger.incrementAnnotation()) {
+                final Annotation annotation = (Annotation) merger.getAnnotation();
+                ConceptMention cm = null;
+                if (annotation instanceof ConceptMention) {
+                    cm = (ConceptMention) annotation;
+                    // default likelihood is assertion
+                    cm.setLikelihood(assertionIndicator);
+                }
+                // check if there is a likelihood anntotion preceeding the ConceptMention in this sentence without
+                // another ConceptMention in between
+                if (lastAnnotation != null && lastAnnotation instanceof LikelihoodIndicator && cm != null) {
+                    LikelihoodIndicator likelihood = (LikelihoodIndicator) lastAnnotation;
+                    cm.setLikelihood(likelihood);
+                }
+                lastAnnotation = annotation;
+            }
+        }
+    }
+
+    /**
+     * If a sentence contains a likelihood indicator, this indicator is assigned
+     * to all concept mentions occurring in the sentence. If a sentence does not
+     * contain a likelihood indicator, the default likelihood category (i.e.
+     * 'assertion') is assigned to all concept mentions occurring in the
+     * sentence. In case of multiple likelihood indicators the lowest likelihood
+     * category is chosen.
+     *
+     * @param aJCas
+     */
+    private void assignLikelihood(JCas aJCas) {
+        buildTreeMaps(aJCas);
+
+        // create default likelihood indicator for assertions (has begin = 0 and
+        // end = 0)
+        LikelihoodIndicator assertionIndicator = new LikelihoodIndicator(aJCas);
+        assertionIndicator.setLikelihood("assertion");
+        assertionIndicator.setComponentId(this.getClass().getName());
+        assertionIndicator.addToIndexes();
+
+        // iterate over sentences
+        for (int sentBegin : sentMap.keySet()) {
+            int sentEnd = sentMap.get(sentBegin);
+            boolean sentHasLikelihood = false;
+            boolean multipleLikelihood = false;
+            Integer firstLikelihoodBegin = 0;
+            Integer lastLikelihoodBegin = 0;
+
+            // determine whether the sentence contains a likelihood indicator at
+            // all and whether it even contains multiple likelihood indicators
+            firstLikelihoodBegin = likelihoodMap.ceilingKey(sentBegin);
+            if (firstLikelihoodBegin != null) {
+                if (firstLikelihoodBegin > sentEnd) {
+                    sentHasLikelihood = false;
+                } else {
+                    sentHasLikelihood = true;
+                }
+            }
+            if (sentHasLikelihood == true) {
+                lastLikelihoodBegin = likelihoodMap.floorKey(sentEnd);
+                if (firstLikelihoodBegin == lastLikelihoodBegin) {
+                    multipleLikelihood = false;
+                } else {
+                    multipleLikelihood = true;
+                }
+            }
+
+            // determine which likelihood category to assign to concept mentions
+            // in the sentence and create the corresponding likelihood indicator
+            LikelihoodIndicator assignedLikelihood = null;
+            if (sentHasLikelihood == true) {
+                if (multipleLikelihood == true) {
+                    // determine the lowest likelihood category in the sentence
+                    NavigableMap<Integer, LikelihoodIndicator> likelihoodSubMap = likelihoodMap
+                            .subMap(firstLikelihoodBegin, true,
+                                    lastLikelihoodBegin, true);
+                    int currentLikelihoodValue = 100;
+                    for (int i : likelihoodSubMap.keySet()) {
+                        LikelihoodIndicator likelihood = likelihoodSubMap
+                                .get(i);
+                        String likelihoodCat = likelihood.getLikelihood();
+                        int likelihoodValue = likelihoodValueMap
+                                .get(likelihoodCat);
+                        if (likelihoodValue < currentLikelihoodValue) {
+                            assignedLikelihood = likelihood;
+                            currentLikelihoodValue = likelihoodValue;
+                        }
+                    }
+                } else {
+                    LikelihoodIndicator likelihood = likelihoodMap
+                            .get(firstLikelihoodBegin);
+                    assignedLikelihood = likelihood;
+                }
+            } else {
+                assignedLikelihood = assertionIndicator;
+            }
+
+            // get all events in the sentence and assign the corresponding
+            // likelihood indicator
+            if (conceptMap.ceilingKey(sentBegin) != null) {
+                int firstConceptBegin = conceptMap.ceilingKey(sentBegin);
+                if (firstConceptBegin > sentEnd) {
+                    continue;
+                } else {
+                    int lastConceptBegin = conceptMap.floorKey(sentEnd);
+                    NavigableMap<Integer, ArrayList<ConceptMention>> conceptSubMap = conceptMap
+                            .subMap(firstConceptBegin, true, lastConceptBegin,
+                                    true);
+                    for (int i : conceptSubMap.keySet()) {
+                        ArrayList<ConceptMention> conceptList = conceptSubMap
+                                .get(i);
+                        for (ConceptMention concept : conceptList) {
+                            concept.setLikelihood(assignedLikelihood);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    @SuppressWarnings("rawtypes")
+    public void buildTreeMaps(JCas aJCas) {
+        FSIterator sentIt = aJCas.getAnnotationIndex(Sentence.type).iterator();
+        FSIterator conceptIt = aJCas.getAnnotationIndex(ConceptMention.type)
+                .iterator();
+        FSIterator likelihoodIt = aJCas.getAnnotationIndex(
+                LikelihoodIndicator.type).iterator();
+
+        sentMap = new TreeMap<>();
+        while (sentIt.hasNext()) {
+            Sentence sent = (Sentence) sentIt.next();
+            int sentBegin = sent.getBegin();
+            int sentEnd = sent.getEnd();
+            sentMap.put(sentBegin, sentEnd);
+        }
+
+        conceptMap = new TreeMap<>();
+        while (conceptIt.hasNext()) {
+            ConceptMention concept = (ConceptMention) conceptIt.next();
+            int conceptBegin = concept.getBegin();
+            if (conceptMap.containsKey(conceptBegin)) {
+                ArrayList<ConceptMention> conceptList = conceptMap
+                        .get(conceptBegin);
+                conceptList.add(concept);
+                conceptMap.put(conceptBegin, conceptList);
+            } else {
+                ArrayList<ConceptMention> conceptList = new ArrayList<>();
+                conceptList.add(concept);
+                conceptMap.put(conceptBegin, conceptList);
+            }
+        }
+
+        likelihoodMap = new TreeMap<>();
+        while (likelihoodIt.hasNext()) {
+            LikelihoodIndicator likelihood = (LikelihoodIndicator) likelihoodIt
+                    .next();
+            int likelihoodBegin = likelihood.getBegin();
+            likelihoodMap.put(likelihoodBegin, likelihood);
+        }
+    }
 
 }
diff --git a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
index 1a6b9b081..dc6fba717 100644
--- a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
+++ b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
@@ -7,7 +7,14 @@
         <name>JCoRe Likelihood Assignment AE</name>
         <description>Analysis Engine to assign likelihood indicators to their corresponding entities and events.</description>
         <version>2.6.0-SNAPSHOT</version>
-        <configurationParameters />
+        <configurationParameters>
+            <configurationParameter>
+                <name>AssignmentStrategy</name>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+        </configurationParameters>
         <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
diff --git a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
index 6fe9746f5..357b0bb3e 100644
--- a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
+++ b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
@@ -9,9 +9,7 @@
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
-import org.apache.uima.util.XMLInputSource;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -71,10 +69,7 @@ public void initCas(JCas aJCas) {
     @Test
     @SuppressWarnings({ "rawtypes"})
     public void testProcess() throws ResourceInitializationException, IOException, InvalidXMLException {
-
-        XMLInputSource assignmentXML = null;
-        ResourceSpecifier assignmentSpec = null;
-        AnalysisEngine assignmentAnnotator = AnalysisEngineFactory.createEngine(DESCRIPTOR);
+        AnalysisEngine assignmentAnnotator = AnalysisEngineFactory.createEngine(DESCRIPTOR, LikelihoodAssignmentAnnotator.PARAM_ASSIGNMENT_STRATEGY, LikelihoodAssignmentAnnotator.STRATEGY_ALL);
 
         JCas aJCas = null;
         try {
@@ -119,4 +114,35 @@ public String getPredictedAssignments(Iterator conceptIter) {
 
         return conceptLikelihood;
     }
+
+    @Test
+    public void testAssignNextStrategy() throws Exception {
+        AnalysisEngine assignmentAnnotator = AnalysisEngineFactory.createEngine(DESCRIPTOR, LikelihoodAssignmentAnnotator.PARAM_ASSIGNMENT_STRATEGY, LikelihoodAssignmentAnnotator.STRATEGY_NEXT_CONCEPT);
+        final JCas jCas = assignmentAnnotator.newJCas();
+        jCas.setDocumentText("Our data suggest that it is highly probable that the interaction occurred, however not the other one.");
+        new Sentence(jCas, 0, jCas.getDocumentText().length()).addToIndexes();
+
+        LikelihoodIndicator suggest = new LikelihoodIndicator(jCas, 9, 16);
+        suggest.setLikelihood("moderate");
+        suggest.addToIndexes();
+
+        LikelihoodIndicator highly = new LikelihoodIndicator(jCas, 28, 43);
+        highly.setLikelihood("high");
+        highly.addToIndexes();
+
+        ConceptMention interaction = new ConceptMention(jCas, 53, 64);
+        interaction.addToIndexes();
+
+        LikelihoodIndicator not = new LikelihoodIndicator(jCas, 83, 86);
+        not.setLikelihood("negation");
+        not.addToIndexes();
+
+        ConceptMention theOtherOne = new ConceptMention(jCas, 87, 100);
+        theOtherOne.addToIndexes();
+
+        assignmentAnnotator.process(jCas);
+
+        assertEquals(highly, interaction.getLikelihood());
+        assertEquals( not, theOtherOne.getLikelihood());
+    }
 }

From ca784a0c34bf20ef86a5c437f267956b3cc2812f Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 17:45:57 +0100
Subject: [PATCH 262/269] Allow the specification of the ConceptMention
 sub-type to assign likelihood indicators to.

This is required because when we use the next-concept assignment on events it happens very often that an event argument receives the assignment instead of the EventMention instance.
---
 .../LikelihoodAssignmentAnnotator.java        | 37 ++++++++++++-----
 .../desc/jcore-likelihood-assignment-ae.xml   |  8 ++++
 .../LikelihoodAssignmentAnnotatorTest.java    | 41 +++++++++++++++++--
 3 files changed, 73 insertions(+), 13 deletions(-)

diff --git a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
index f9f862a44..7d7de8bba 100644
--- a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
+++ b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
@@ -4,6 +4,7 @@
 import de.julielab.jcore.types.LikelihoodIndicator;
 import de.julielab.jcore.types.Sentence;
 import de.julielab.jcore.utility.JCoReAnnotationIndexMerger;
+import de.julielab.jcore.utility.JCoReAnnotationTools;
 import org.apache.uima.UimaContext;
 import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
 import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
@@ -11,6 +12,7 @@
 import org.apache.uima.fit.descriptor.ConfigurationParameter;
 import org.apache.uima.fit.descriptor.ResourceMetaData;
 import org.apache.uima.fit.descriptor.TypeCapability;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.tcas.Annotation;
 import org.apache.uima.resource.ResourceInitializationException;
@@ -24,12 +26,15 @@
 public class LikelihoodAssignmentAnnotator extends JCasAnnotator_ImplBase {
 
     public static final String PARAM_ASSIGNMENT_STRATEGY = "AssignmentStrategy";
-    private static final Logger LOGGER = LoggerFactory
-            .getLogger(LikelihoodAssignmentAnnotator.class);
+    public static final String PARAM_CONCEPT_TYPE_NAME = "ConceptTypeName";
     public static final String STRATEGY_ALL = "all";
     public static final String STRATEGY_NEXT_CONCEPT = "next-concept";
+    private static final Logger LOGGER = LoggerFactory
+            .getLogger(LikelihoodAssignmentAnnotator.class);
     @ConfigurationParameter(name = PARAM_ASSIGNMENT_STRATEGY, mandatory = false, defaultValue = STRATEGY_NEXT_CONCEPT, description = "There are two available assignment strategies for likelihood indicators to ConceptMentions, '" + STRATEGY_ALL + "' and '" + STRATEGY_NEXT_CONCEPT + "'. The first, 'all', assigns the lowest likelihood indicator in a sentence to all ConceptMention in this sentence. The second assigns a likelihood indicator only to the directly following ConceptMention in the same sentence. The latter strategy fares a bit better in evaluations carried out for the publication of this approach. Defaults to '" + STRATEGY_NEXT_CONCEPT + "'.")
     private String assignmentStrategy;
+    @ConfigurationParameter(name = PARAM_CONCEPT_TYPE_NAME, mandatory = false, defaultValue = "de.julielab.jcore.types.ConceptMention", description = "The qualified UIMA type name for the concept annotation for which likelihood assignment should be performed. Must be a subclass of de.julielab.jcore.types.ConceptMention. Defaults to de.julielab.jcore.types.ConceptMention.")
+    private String conceptTypeName;
     /**
      * Maps sentence ends to sentence begins.
      */
@@ -47,12 +52,14 @@ public class LikelihoodAssignmentAnnotator extends JCasAnnotator_ImplBase {
      * Quantifies likelihood values.
      */
     private HashMap<String, Integer> likelihoodValueMap;
+    private ConceptMention conceptTypeTemplate;
 
     public void initialize(UimaContext aContext)
             throws ResourceInitializationException {
         super.initialize(aContext);
 
         assignmentStrategy = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ASSIGNMENT_STRATEGY)).orElse("next-concept");
+        conceptTypeName = (String) Optional.ofNullable(aContext.getConfigParameterValue(PARAM_CONCEPT_TYPE_NAME)).orElse(ConceptMention.class.getCanonicalName());
 
         // ordinal scale for likelihood indicators;
         // used when there are multiple occurrences (the lowest category is
@@ -67,6 +74,14 @@ public void initialize(UimaContext aContext)
 
     @Override
     public void process(JCas aJCas) throws AnalysisEngineProcessException {
+        if (conceptTypeTemplate == null) {
+            try {
+                conceptTypeTemplate = (ConceptMention) JCoReAnnotationTools.getAnnotationByClassName(aJCas, conceptTypeName);
+            } catch (Exception e) {
+                LOGGER.error("Could not obtain the specified concept UIMA type with name " + conceptTypeName + ".", e);
+                throw new AnalysisEngineProcessException(e);
+            }
+        }
         // We have two strategies available for the assignment of likelhood indicators to ConceptMentions.
         // Either the original one, implemented in 'assignLikelihood', where likelihood indicators in a sentences are
         // assigned to all ConceptMentions in the same sentence or a simplified one that, according to
@@ -103,27 +118,29 @@ private void assignLikelihoodToNextConceptMention(JCas aJCas) throws AnalysisEng
             // Then, we must only assign for each concept the directly preceding likelihood annotation, if there is one.
             JCoReAnnotationIndexMerger merger;
             try {
-                merger = new JCoReAnnotationIndexMerger(Set.of(ConceptMention.type, LikelihoodIndicator.type), true, sentence, aJCas);
+                merger = new JCoReAnnotationIndexMerger(Set.of(JCasUtil.getAnnotationType(aJCas, conceptTypeTemplate.getClass()), JCasUtil.getAnnotationType(aJCas, LikelihoodIndicator.class)), true, sentence, aJCas);
             } catch (ClassNotFoundException e) {
                 LOGGER.error("Could not create JCoReAnnotationIndexMerger", e);
                 throw new AnalysisEngineProcessException(e);
             }
-            Annotation lastAnnotation = null;
+            LikelihoodIndicator previousLikelihood = null;
             while (merger.incrementAnnotation()) {
                 final Annotation annotation = (Annotation) merger.getAnnotation();
                 ConceptMention cm = null;
-                if (annotation instanceof ConceptMention) {
+                if (conceptTypeTemplate.getClass().isAssignableFrom(annotation.getClass())) {
                     cm = (ConceptMention) annotation;
                     // default likelihood is assertion
                     cm.setLikelihood(assertionIndicator);
                 }
                 // check if there is a likelihood anntotion preceeding the ConceptMention in this sentence without
                 // another ConceptMention in between
-                if (lastAnnotation != null && lastAnnotation instanceof LikelihoodIndicator && cm != null) {
-                    LikelihoodIndicator likelihood = (LikelihoodIndicator) lastAnnotation;
-                    cm.setLikelihood(likelihood);
+                if (previousLikelihood != null && cm != null) {
+                    cm.setLikelihood(previousLikelihood);
+                    // this likelihood indicator has been "consumed"
+                    previousLikelihood = null;
                 }
-                lastAnnotation = annotation;
+                if (annotation instanceof LikelihoodIndicator)
+                    previousLikelihood = (LikelihoodIndicator) annotation;
             }
         }
     }
@@ -231,7 +248,7 @@ private void assignLikelihood(JCas aJCas) {
     @SuppressWarnings("rawtypes")
     public void buildTreeMaps(JCas aJCas) {
         FSIterator sentIt = aJCas.getAnnotationIndex(Sentence.type).iterator();
-        FSIterator conceptIt = aJCas.getAnnotationIndex(ConceptMention.type)
+        FSIterator conceptIt = aJCas.getAnnotationIndex(conceptTypeTemplate.type)
                 .iterator();
         FSIterator likelihoodIt = aJCas.getAnnotationIndex(
                 LikelihoodIndicator.type).iterator();
diff --git a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
index dc6fba717..b768176d2 100644
--- a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
+++ b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
@@ -10,6 +10,14 @@
         <configurationParameters>
             <configurationParameter>
                 <name>AssignmentStrategy</name>
+                <description>There are two available assignment strategies for likelihood indicators to ConceptMentions, 'all' and 'next-concept'. The first, 'all', assigns the lowest likelihood indicator in a sentence to all ConceptMention in this sentence. The second assigns a likelihood indicator only to the directly following ConceptMention in the same sentence. The latter strategy fares a bit better in evaluations carried out for the publication of this approach. Defaults to 'next-concept'."</description>
+                <type>String</type>
+                <multiValued>false</multiValued>
+                <mandatory>false</mandatory>
+            </configurationParameter>
+            <configurationParameter>
+                <name>ConceptTypeName</name>
+                <description>The qualified UIMA type name for the concept annotation for which likelihood assignment should be performed. Must be a subclass of de.julielab.jcore.types.ConceptMention. Defaults to de.julielab.jcore.types.ConceptMention.</description>
                 <type>String</type>
                 <multiValued>false</multiValued>
                 <mandatory>false</mandatory>
diff --git a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
index 357b0bb3e..8479136af 100644
--- a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
+++ b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
@@ -1,9 +1,7 @@
 
 package de.julielab.jcore.ae.likelihoodassignment;
 
-import de.julielab.jcore.types.ConceptMention;
-import de.julielab.jcore.types.LikelihoodIndicator;
-import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.*;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
 import org.apache.uima.jcas.JCas;
@@ -145,4 +143,41 @@ public void testAssignNextStrategy() throws Exception {
         assertEquals(highly, interaction.getLikelihood());
         assertEquals( not, theOtherOne.getLikelihood());
     }
+
+    @Test
+    public void testAssignNextStrategySpecificConceptType() throws Exception {
+        // Here we test that the interaction type EventMention gets the likelihood assignment and not
+        // the entity argument because that is also a ConceptMention which gets assigned by default.
+        AnalysisEngine assignmentAnnotator = AnalysisEngineFactory.createEngine(DESCRIPTOR,
+                LikelihoodAssignmentAnnotator.PARAM_ASSIGNMENT_STRATEGY, LikelihoodAssignmentAnnotator.STRATEGY_NEXT_CONCEPT,
+                LikelihoodAssignmentAnnotator.PARAM_CONCEPT_TYPE_NAME, EventMention.class.getCanonicalName());
+        final JCas jCas = assignmentAnnotator.newJCas();
+        jCas.setDocumentText("Our data suggest one entity interacts with another but there is phosphorylation.");
+        new Sentence(jCas, 0, jCas.getDocumentText().length()).addToIndexes();
+
+        LikelihoodIndicator suggest = new LikelihoodIndicator(jCas, 9, 16);
+        suggest.setLikelihood("moderate");
+        suggest.addToIndexes();
+
+        EntityMention oneEntity = new EntityMention(jCas, 17, 27);
+        oneEntity.addToIndexes();
+
+        EventMention interacts = new EventMention(jCas, 28, 37);
+        interacts.addToIndexes();
+
+        EntityMention another = new EntityMention(jCas, 43, 50);
+        another.addToIndexes();
+
+        EventMention phosphorylation = new EventMention(jCas, 64, 79);
+        phosphorylation.addToIndexes();
+
+        assignmentAnnotator.process(jCas);
+
+        // only the EventMentions should be assigned likelihoods.
+        assertEquals(null, oneEntity.getLikelihood());
+        assertEquals( suggest, interacts.getLikelihood());
+        assertEquals(null, another.getLikelihood());
+        // due to the next-concept strategy, this mention should receive the default assertion likelihood
+        assertEquals("assertion", phosphorylation.getLikelihood().getLikelihood());
+    }
 }

From 0635236ee017eddfe0d865bb01cf2bf0b398b74e Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 17:47:04 +0100
Subject: [PATCH 263/269] Minor change.

---
 .../de/julielab/jcore/utility/JCoReAnnotationIndexMerger.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationIndexMerger.java b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationIndexMerger.java
index 2923b7bd7..d016b5eb7 100644
--- a/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationIndexMerger.java
+++ b/jcore-utilities/src/main/java/de/julielab/jcore/utility/JCoReAnnotationIndexMerger.java
@@ -120,7 +120,6 @@ public boolean incrementAnnotation() {
     }
 
     protected boolean moveIterator(boolean initialize) {
-        int minBegin = Integer.MAX_VALUE;
         if (!initialize) {
             annotationIterators.get(currentIndex).moveToNext();
             firstToken = false;
@@ -135,6 +134,9 @@ protected boolean moveIterator(boolean initialize) {
                 return true;
             }
         }
+
+        // find the iterator with the lowest-begin-offset annotation and set currentIndex accordingly
+        int minBegin = Integer.MAX_VALUE;
         for (int i = 0; i < annotationIterators.size(); i++) {
             FSIterator<? extends TOP> it = annotationIterators.get(i);
             if (initialize)

From 184875db2798cb93b2133d52072b80faaaf43fee Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 9 Nov 2022 18:42:41 +0100
Subject: [PATCH 264/269] Handle the assignment to multiple concept mentions at
 the same offsets.

As may be the case for EventMentions where multiple events may be induced by the same trigger word.
---
 .../LikelihoodAssignmentAnnotator.java        | 17 ++++++++++---
 .../LikelihoodAssignmentAnnotatorTest.java    |  4 +++
 .../LikelihoodDetectionAnnotatorTest.java     | 25 +++++++++++++++----
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
index 7d7de8bba..4c31a62f9 100644
--- a/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
+++ b/jcore-likelihood-assignment-ae/src/main/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotator.java
@@ -124,6 +124,9 @@ private void assignLikelihoodToNextConceptMention(JCas aJCas) throws AnalysisEng
                 throw new AnalysisEngineProcessException(e);
             }
             LikelihoodIndicator previousLikelihood = null;
+            boolean previousLikelihoodConsumed = false;
+            int lastAssignedCmBegin = 0;
+            int lastAssignedCmEnd = 0;
             while (merger.incrementAnnotation()) {
                 final Annotation annotation = (Annotation) merger.getAnnotation();
                 ConceptMention cm = null;
@@ -133,14 +136,20 @@ private void assignLikelihoodToNextConceptMention(JCas aJCas) throws AnalysisEng
                     cm.setLikelihood(assertionIndicator);
                 }
                 // check if there is a likelihood anntotion preceeding the ConceptMention in this sentence without
-                // another ConceptMention in between
-                if (previousLikelihood != null && cm != null) {
+                // another ConceptMention in between - except when multiple ConceptMentions exist in the same offsets
+                // which is possible for EventMentions that exist on the EventTrigger annotation. The trigger may
+                // refer to multiple events.
+                if (cm != null && (previousLikelihood != null && (!previousLikelihoodConsumed || (lastAssignedCmBegin == cm.getBegin() && lastAssignedCmEnd == cm.getEnd())))) {
                     cm.setLikelihood(previousLikelihood);
                     // this likelihood indicator has been "consumed"
-                    previousLikelihood = null;
+                    previousLikelihoodConsumed = true;
+                    lastAssignedCmBegin = cm.getBegin();
+                    lastAssignedCmEnd = cm.getEnd();
                 }
-                if (annotation instanceof LikelihoodIndicator)
+                if (annotation instanceof LikelihoodIndicator) {
                     previousLikelihood = (LikelihoodIndicator) annotation;
+                    previousLikelihoodConsumed = false;
+                }
             }
         }
     }
diff --git a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
index 8479136af..34861be6c 100644
--- a/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
+++ b/jcore-likelihood-assignment-ae/src/test/java/de/julielab/jcore/ae/likelihoodassignment/LikelihoodAssignmentAnnotatorTest.java
@@ -164,6 +164,9 @@ public void testAssignNextStrategySpecificConceptType() throws Exception {
 
         EventMention interacts = new EventMention(jCas, 28, 37);
         interacts.addToIndexes();
+        // this is here to test that the assignment to same-offset annotations works
+        EventMention interacts2 = new EventMention(jCas, 28, 37);
+        interacts2.addToIndexes();
 
         EntityMention another = new EntityMention(jCas, 43, 50);
         another.addToIndexes();
@@ -176,6 +179,7 @@ public void testAssignNextStrategySpecificConceptType() throws Exception {
         // only the EventMentions should be assigned likelihoods.
         assertEquals(null, oneEntity.getLikelihood());
         assertEquals( suggest, interacts.getLikelihood());
+        assertEquals( suggest, interacts2.getLikelihood());
         assertEquals(null, another.getLikelihood());
         // due to the next-concept strategy, this mention should receive the default assertion likelihood
         assertEquals("assertion", phosphorylation.getLikelihood().getLikelihood());
diff --git a/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java b/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java
index 814ce9755..eee8b0d8e 100644
--- a/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java
+++ b/jcore-likelihood-detection-ae/src/test/java/de/julielab/jcore/ae/likelihooddetection/LikelihoodDetectionAnnotatorTest.java
@@ -5,18 +5,18 @@
 import de.julielab.jcore.types.Token;
 import org.apache.uima.analysis_engine.AnalysisEngine;
 import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JFSIndexRepository;
 import org.apache.uima.resource.ResourceInitializationException;
-import org.apache.uima.resource.ResourceSpecifier;
 import org.apache.uima.util.InvalidXMLException;
-import org.apache.uima.util.XMLInputSource;
 import org.junit.jupiter.api.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Iterator;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
@@ -68,9 +68,6 @@ public void initCas(JCas aJCas) {
     @Test
     @SuppressWarnings("rawtypes")
     public void testProcess() throws ResourceInitializationException, IOException, InvalidXMLException {
-
-        XMLInputSource likelihoodXML = null;
-        ResourceSpecifier likelihoodSpec = null;
         AnalysisEngine likelihoodAnnotator = AnalysisEngineFactory.createEngine(DESCRIPTOR);
         JCas aJCas = null;
         try {
@@ -127,4 +124,22 @@ private ArrayList<String> getPredictedIndicators(Iterator likelihoodIter) {
         prediction.add(predictedCategories);
         return prediction;
     }
+
+    @Test
+    public void test() throws Exception {
+        String text = "Genome-wide expression analyses indicate that TAZ/YAP, TEADs, and TGFβ-induced signals coordinate a specific pro-tumorigenic transcriptional program";
+        AnalysisEngine likelihoodAnnotator = AnalysisEngineFactory.createEngine(DESCRIPTOR);
+        JCas aJCas = null;
+        try {
+            aJCas = likelihoodAnnotator.newJCas();
+        } catch (ResourceInitializationException e) {
+            LOGGER.error("testProcess()", e);
+        }
+        likelihoodAnnotator.process(aJCas);
+
+        final Collection<LikelihoodIndicator> select = JCasUtil.select(aJCas, LikelihoodIndicator.class);
+        for (var s : select) {
+            System.out.println(s.getCoveredText());
+        }
+    }
 }

From c04a5951e3b17491e27d99f747c5f7100bdd8c94 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Mon, 14 Nov 2022 16:36:11 +0100
Subject: [PATCH 265/269] Resolve #154.

---
 jcore-annotation-removal-ae/LICENSE           |  26 ++
 jcore-annotation-removal-ae/README.md         |  34 ++
 jcore-annotation-removal-ae/component.meta    |  20 +
 jcore-annotation-removal-ae/pom.xml           |  55 +++
 .../AnnotationRemovalAnnotator.java           |  51 +++
 .../desc/jcore-annotation-removal-ae.xml      |  33 ++
 .../AnnotationRemovalAnnotatorTest.java       |  60 +++
 pom.xml                                       | 354 ++++++++++++------
 8 files changed, 516 insertions(+), 117 deletions(-)
 create mode 100644 jcore-annotation-removal-ae/LICENSE
 create mode 100644 jcore-annotation-removal-ae/README.md
 create mode 100644 jcore-annotation-removal-ae/component.meta
 create mode 100644 jcore-annotation-removal-ae/pom.xml
 create mode 100644 jcore-annotation-removal-ae/src/main/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotator.java
 create mode 100644 jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml
 create mode 100644 jcore-annotation-removal-ae/src/test/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotatorTest.java

diff --git a/jcore-annotation-removal-ae/LICENSE b/jcore-annotation-removal-ae/LICENSE
new file mode 100644
index 000000000..fbbd41e05
--- /dev/null
+++ b/jcore-annotation-removal-ae/LICENSE
@@ -0,0 +1,26 @@
+BSD 2-Clause License
+
+Copyright (c) 2017, JULIE Lab
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/jcore-annotation-removal-ae/README.md b/jcore-annotation-removal-ae/README.md
new file mode 100644
index 000000000..563b7ad3e
--- /dev/null
+++ b/jcore-annotation-removal-ae/README.md
@@ -0,0 +1,34 @@
+# JCoRe Annotation Removal AE
+
+**Descriptor Path**:
+```
+de.julielab.jcore.ae.annotationremoval.desc.jcore-annotation-removal-ae
+```
+
+Removes annotations from the CAS that belong to one of the types specified as a parameter value in the descriptor.
+
+
+
+**1. Parameters**
+
+| Parameter Name | Parameter Type | Mandatory | Multivalued | Description |
+|----------------|----------------|-----------|-------------|-------------|
+| param1 | UIMA-Type | Boolean | Boolean | Description |
+| param2 | UIMA-Type | Boolean | Boolean | Description |
+
+**2. Predefined Settings**
+
+| Parameter Name | Parameter Syntax | Example |
+|----------------|------------------|---------|
+| param1 | Syntax-Description | `Example` |
+| param2 | Syntax-Description | `Example` |
+
+**3. Capabilities**
+
+| Type | Input | Output |
+|------|:-----:|:------:|
+| de.julielab.jcore.types.TYPE |  | `+` |
+| de.julielab.jcore.types.ace.TYPE | `+` |  |
+
+
+[1] Some Literature?
diff --git a/jcore-annotation-removal-ae/component.meta b/jcore-annotation-removal-ae/component.meta
new file mode 100644
index 000000000..0666d9f0f
--- /dev/null
+++ b/jcore-annotation-removal-ae/component.meta
@@ -0,0 +1,20 @@
+{
+    "categories": [
+        "ae"
+    ],
+    "description": "Removes annotations from the CAS that belong to one of the types specified as a parameter value in the descriptor.",
+    "descriptors": [
+        {
+            "category": "ae",
+            "location": "de.julielab.jcore.ae.annotationremoval.desc.jcore-annotation-removal-ae"
+        }
+    ],
+    "exposable": true,
+    "group": "general",
+    "maven-artifact": {
+        "artifactId": "jcore-annotation-removal-ae",
+        "groupId": "de.julielab",
+        "version": "2.6.0-SNAPSHOT"
+    },
+    "name": "JCoRe Annotation Removal AE"
+}
diff --git a/jcore-annotation-removal-ae/pom.xml b/jcore-annotation-removal-ae/pom.xml
new file mode 100644
index 000000000..f5152245a
--- /dev/null
+++ b/jcore-annotation-removal-ae/pom.xml
@@ -0,0 +1,55 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>jcore-annotation-removal-ae</artifactId>
+    <packaging>jar</packaging>
+    <groupId>de.julielab</groupId>
+
+    <parent>
+        <groupId>de.julielab</groupId>
+        <artifactId>jcore-base</artifactId>
+        <version>2.6.0-SNAPSHOT</version>
+    </parent>
+
+    <version>2.6.0-SNAPSHOT</version>
+
+    <dependencies>
+        <dependency>
+            <groupId>ch.qos.logback</groupId>
+            <artifactId>logback-classic</artifactId>
+            <scope>test</scope>
+        </dependency>
+        <dependency>
+            <groupId>org.slf4j</groupId>
+            <artifactId>slf4j-api</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-types</artifactId>
+            <version>${jcore-types-version}</version>
+        </dependency>
+        <dependency>
+            <groupId>de.julielab</groupId>
+            <artifactId>jcore-descriptor-creator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>org.junit.jupiter</groupId>
+            <artifactId>junit-jupiter-engine</artifactId>
+        </dependency>
+    </dependencies>
+    <name>JCoRe Annotation Removal AE</name>
+    <organization>
+        <name>JULIE Lab Jena, Germany</name>
+        <url>http://www.julielab.de</url>
+    </organization>
+    <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-annotation-removal-ae</url>
+    <description>Removes annotations from the CAS that belong to one of the types specified as a parameter value in the descriptor.</description>
+    <licenses>
+        <license>
+            <name>BSD 2-Clause</name>
+            <url>https://opensource.org/licenses/BSD-2-Clause</url>
+        </license>
+    </licenses>
+</project>
diff --git a/jcore-annotation-removal-ae/src/main/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotator.java b/jcore-annotation-removal-ae/src/main/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotator.java
new file mode 100644
index 000000000..019f06e02
--- /dev/null
+++ b/jcore-annotation-removal-ae/src/main/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotator.java
@@ -0,0 +1,51 @@
+
+package de.julielab.jcore.ae.annotationremoval;
+
+import org.apache.uima.UimaContext;
+import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
+import org.apache.uima.cas.Type;
+import org.apache.uima.fit.descriptor.ConfigurationParameter;
+import org.apache.uima.fit.descriptor.ResourceMetaData;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+@ResourceMetaData(name="JCoRe Annotation Removal AE", description = "Removes annotations from the CAS that belong to one of the types specified as a parameter value in the descriptor.", vendor = "JULIE Lab Jena, Germany")
+public class AnnotationRemovalAnnotator extends JCasAnnotator_ImplBase {
+public static final String PARAM_ANNOTATION_TYPES = "AnnotationTypes";
+	private final static Logger log = LoggerFactory.getLogger(AnnotationRemovalAnnotator.class);
+
+	@ConfigurationParameter(name=PARAM_ANNOTATION_TYPES, description="List of qualified UIMA type names for which all annotations should be removed from each CAS.")
+	private String[] annotationTypesForRemoval;
+
+	/**
+	 * This method is called a single time by the framework at component
+	 * creation. Here, descriptor parameters are read and initial setup is done.
+	 */
+	@Override
+	public void initialize(final UimaContext aContext) throws ResourceInitializationException {
+		annotationTypesForRemoval = (String[]) aContext.getConfigParameterValue(PARAM_ANNOTATION_TYPES);
+		if (annotationTypesForRemoval.length == 0)
+			throw new ResourceInitializationException(new IllegalArgumentException("The list of annotations for removal, given through parameter '" + PARAM_ANNOTATION_TYPES + "' is empty."));
+	}
+
+	/**
+	 * This method is called for each document going through the component. This
+	 * is where the actual work happens.
+	 */
+	@Override
+	public void process(final JCas aJCas) {
+		List<Annotation> removalList = new ArrayList<>();
+		for (String annotationTypeName : annotationTypesForRemoval) {
+			final Type type = aJCas.getTypeSystem().getType(annotationTypeName);
+			aJCas.getAnnotationIndex(type).forEach(removalList::add);
+			removalList.forEach(Annotation::removeFromIndexes);
+		}
+	}
+
+}
diff --git a/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml b/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml
new file mode 100644
index 000000000..141f3ef1d
--- /dev/null
+++ b/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
+  <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
+  <primitive>true</primitive>
+  <annotatorImplementationName>de.julielab.jcore.ae.annotationremoval.AnnotationRemovalAnnotator</annotatorImplementationName>
+  <analysisEngineMetaData>
+    <name>JCoRe Annotation Removal AE</name>
+    <description>Removes annotations from the CAS that belong to one of the types specified as a parameter value in the descriptor.</description>
+    <vendor>JULIE Lab Jena, Germany</vendor>
+    <configurationParameters>
+      <configurationParameter>
+        <name>AnnotationTypes</name>
+        <description>List of qualified UIMA type names for which all annotations should be removed from each CAS.</description>
+        <type>String</type>
+        <multiValued>true</multiValued>
+        <mandatory>true</mandatory>
+      </configurationParameter>
+    </configurationParameters>
+    <configurationParameterSettings/>
+    <typeSystemDescription>
+      <imports>
+        <import name="de.julielab.jcore.types.jcore-all-types"/>
+      </imports>
+    </typeSystemDescription>
+    <fsIndexCollection/>
+    <capabilities/>
+    <operationalProperties>
+      <modifiesCas>true</modifiesCas>
+      <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
+      <outputsNewCASes>false</outputsNewCASes>
+    </operationalProperties>
+  </analysisEngineMetaData>
+</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-annotation-removal-ae/src/test/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotatorTest.java b/jcore-annotation-removal-ae/src/test/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotatorTest.java
new file mode 100644
index 000000000..a401c969f
--- /dev/null
+++ b/jcore-annotation-removal-ae/src/test/java/de/julielab/jcore/ae/annotationremoval/AnnotationRemovalAnnotatorTest.java
@@ -0,0 +1,60 @@
+package de.julielab.jcore.ae.annotationremoval;
+
+import de.julielab.jcore.types.Gene;
+import de.julielab.jcore.types.Sentence;
+import de.julielab.jcore.types.Token;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.fit.factory.AnalysisEngineFactory;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.junit.jupiter.api.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+
+/**
+ * Unit tests for jcore-annotation-removal-ae.
+ */
+public class AnnotationRemovalAnnotatorTest {
+    private final static Logger log = LoggerFactory.getLogger(AnnotationRemovalAnnotatorTest.class);
+
+    @Test
+    public void testAnnotator() throws Exception {
+        final AnalysisEngine engine = AnalysisEngineFactory.createEngine("de.julielab.jcore.ae.annotationremoval.desc.jcore-annotation-removal-ae",
+                AnnotationRemovalAnnotator.PARAM_ANNOTATION_TYPES, new String[]{"de.julielab.jcore.types.Token", "de.julielab.jcore.types.Gene"});
+        final JCas jCas = engine.newJCas();
+        jCas.setDocumentText("There is a gene in this sentence.");
+        addTokens(jCas);
+        new Sentence(jCas, 0, jCas.getDocumentText().length()).addToIndexes();
+        new Gene(jCas, 11, 15).addToIndexes();
+
+        // Check that the annotations we just created are actually there.
+        assertFalse(JCasUtil.select(jCas, Sentence.class).isEmpty());
+        assertFalse(JCasUtil.select(jCas, Token.class).isEmpty());
+        assertFalse(JCasUtil.select(jCas, Gene.class).isEmpty());
+
+        engine.process(jCas);
+
+        // And now check that the annotation that should be removed are really gone.
+        assertFalse(JCasUtil.select(jCas, Sentence.class).isEmpty());
+        assertTrue(JCasUtil.select(jCas, Token.class).isEmpty());
+        assertTrue(JCasUtil.select(jCas, Gene.class).isEmpty());
+    }
+
+    private void addTokens(JCas jCas) {
+        Matcher alphanumericalTokens = Pattern.compile("[A-Za-z0-9]+").matcher(jCas.getDocumentText());
+        while (alphanumericalTokens.find()) {
+            new Token(jCas, alphanumericalTokens.start(), alphanumericalTokens.end()).addToIndexes();
+        }
+        Matcher punctuation = Pattern.compile("\\p{Punct}").matcher(jCas.getDocumentText());
+        while (alphanumericalTokens.find()) {
+            new Token(jCas, punctuation.start(), punctuation.end()).addToIndexes();
+        }
+    }
+}
diff --git a/pom.xml b/pom.xml
index f7f2eabfc..f290a5ae4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,586 +1,706 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-          
+            
+  
   
   
   <modelVersion>4.0.0</modelVersion>
-          
+            
+  
   
   
   <parent>
-                    
+                        
+    
     
     
     <groupId>de.julielab</groupId>
-                    
+                        
+    
     
     
     <artifactId>jcore-parent</artifactId>
-                    
+                        
+    
     
     
     <version>2.5.2-SNAPSHOT</version>
-                  
+                      
+  
   
   
   </parent>
-          
+            
+  
   
   
   <artifactId>jcore-base</artifactId>
-          
+            
+  
   
   
   <packaging>pom</packaging>
-          
+            
+  
   
   
   <name>JCoRe Base</name>
-          
+            
+  
   
   
   <description>The POM for the JCoRe Base projects.</description>
-          
+            
+  
   
   
   <version>2.6.0-SNAPSHOT</version>
-          
+            
+  
   
   
   <organization>
-                    
+                        
+    
     
     
     <name>JULIE Lab, Germany</name>
-                    
+                        
+    
     
     
     <url>http://www.julielab.de</url>
-                  
+                      
+  
   
   
   </organization>
-          
+            
+  
   
   
   <licenses>
-                    
+                        
+    
     
     
     <license>
-                              
+                                    
+      
       
       
       <name>BSD-2-Clause</name>
-                              
+                                    
+      
       
       
       <url>https://opensource.org/licenses/BSD-2-Clause</url>
-                            
+                                  
+    
     
     
     </license>
-                  
+                      
+  
   
   
   </licenses>
-          
+            
+  
   
   
   <url>https://github.com/JULIELab/jcore-base</url>
-          
+            
+  
   
   
   <dependencies>
-                    
+                        
+    
     
     
     <dependency>
-                              
+                                    
+      
       
       
       <groupId>org.apache.uima</groupId>
-                              
+                                    
+      
       
       
       <artifactId>uimaj-core</artifactId>
-                              
+                                    
+      
       
       
       <version>${uima-version}</version>
-                            
+                                  
+    
     
     
     </dependency>
-                    
+                        
+    
     
     
     <dependency>
-                              
+                                    
+      
       
       
       <groupId>org.apache.uima</groupId>
-                              
+                                    
+      
       
       
       <artifactId>uimafit-core</artifactId>
-                              
+                                    
+      
       
       
       <version>${uimafit-version}</version>
-                            
+                                  
+    
     
     
     </dependency>
-                  
+                      
+  
   
   
   </dependencies>
-          
+            
+  
   
   
   <modules>
-                    
+                        
+    
     
     
     <module>jcore-annotation-adder-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-ace-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-acronym-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-acronym-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-banner-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-bc2gm-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-bc2gmformat-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-biolemmatizer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-bionlpformat-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-bionlpformat-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-biosem-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-conll-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-coordination-baseline-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-cord19-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-coreference-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-ct-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-db-checkpoint-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-descriptor-creator</module>
-                    
+                        
+    
     
     
     <module>jcore-dta-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-ec-code-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-elasticsearch-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-embedding-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-event-flattener-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-feature-value-replacement-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-file-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-flair-ner-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-flair-token-embedding-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-flow-controllers</module>
-                    
+                        
+    
     
     
     <module>jcore-gnp-bioc-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-gnp-bioc-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-iexml-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-iexml-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-ign-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-iob-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-jnet-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-jpos-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-jsbd-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-jtbd-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-julielab-entity-evaluator-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-likelihood-assignment-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-likelihood-detection-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-line-multiplier</module>
-                    
+                        
+    
     
     
     <module>jcore-lingpipegazetteer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-lingpipe-porterstemmer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-lingscope-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-linnaeus-species-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-mantra-xml-types</module>
-                    
+                        
+    
     
     
     <module>jcore-medxn-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-msdoc-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-mstparser-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-muc7-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-mutationfinder-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-neo4j-relations-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-chunk-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-parser-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-postag-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-sentence-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-opennlp-token-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-ppd-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-pmc-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-pubtator-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-stanford-lemmatizer-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-topic-indexing-ae</module>
-                    
+                        
+    
     
     
     <module>jcore-topics-writer</module>
-                    
+                        
+    
     
     
     <module>jcore-txt-consumer</module>
-                    
+                        
+    
     
     
     <module>jcore-types</module>
-                    
+                        
+    
     
     
     <module>jcore-utilities</module>
-                    
+                        
+    
     
     
     <module>jcore-xml-mapper</module>
-                    
+                        
+    
     
     
     <module>jcore-xml-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-xmi-reader</module>
-                    
+                        
+    
     
     
     <module>jcore-xmi-writer</module>
-                    
+                        
+    
     
     
     <module>jedis-parent</module>
-                    
+                        
+    
     
     
     <module>jcore-jedis-integration-tests</module>
-                    
+                        
+    
     
     
     <module>jcore-mmax2-reader</module>
-              
+                  
+    
     
     
     <module>jcore-nlmgene-reader</module>
-          
+              
+    
     
     <module>jcore-gnormplus-ae</module>
+          
+    
+    <module>jcore-annotation-removal-ae</module>
       
   </modules>
-          
+            
+  
   
   
   <scm>
-                    
+                        
+    
     
     
     <connection>scm:git:https://github.com/JULIELab/jcore-base
         </connection>
-                    
+                        
+    
     
     
     <developerConnection>scm:git:https://github.com/JULIELab/jcore-base</developerConnection>
-                    
+                        
+    
     
     
     <url>scm:git:https://github.com/JULIELab/jcore-base</url>
-                  
+                      
+  
   
   
   </scm>
-        
+          
+
 
 
From 7ac663bdac425318b891f09ca168af7858d8bea3 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Nov 2022 09:28:08 +0100
Subject: [PATCH 266/269] Version bumps

---
 .../consumer/es/ElasticSearchConsumerIT.java  | 21 ++++++++++++++++++-
 jcore-msdoc-reader/pom.xml                    |  6 +++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
index a3b6507c9..b07d0ca86 100644
--- a/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
+++ b/jcore-elasticsearch-consumer/src/test/java/de/julielab/jcore/consumer/es/ElasticSearchConsumerIT.java
@@ -64,9 +64,10 @@ public void testMinimal() throws Exception {
         final AnalysisEngine consumer = AnalysisEngineFactory.createEngine(ElasticSearchConsumer.class,
                 ElasticSearchConsumer.PARAM_INDEX_NAME, TEST_INDEX,
                 ElasticSearchConsumer.PARAM_URLS, "http://localhost:" + es.getMappedPort(9200),
-                ElasticSearchConsumer.PARAM_FIELD_GENERATORS, new String[]{"de.julielab.jcore.consumer.es.ElasticSearchConsumerIT$TestFieldGenerator"});
+                ElasticSearchConsumer.PARAM_FIELD_GENERATORS, new String[]{"de.julielab.jcore.consumer.es.ElasticSearchConsumerIT$MinimalTestFieldGenerator"});
         consumer.process(jCas);
         consumer.collectionProcessComplete();
+        Thread.sleep(4000);
         final URL url = new URL("http://localhost:" + es.getMappedPort(9200) + "/" + TEST_INDEX + "/_doc/987");
         final ObjectMapper om = new ObjectMapper();
         final Map<?, ?> map = om.readValue(url.openStream(), Map.class);
@@ -153,5 +154,23 @@ public Document addFields(JCas aJCas, Document doc) {
         }
     }
 
+    /**
+     * This class is passed by name as parameter to the test consumer AE.
+     */
+    public static class MinimalTestFieldGenerator extends FieldGenerator {
+        public MinimalTestFieldGenerator(FilterRegistry filterRegistry) {
+            super(filterRegistry);
+        }
+
+        @Override
+        public Document addFields(JCas aJCas, Document doc) {
+            final String docId = JCoReTools.getDocId(aJCas);
+            doc.setId(docId);
+            // we need any field or the document won't be indexed
+            doc.addField("text", "Some text.");
+            return doc;
+        }
+    }
+
 
 }
diff --git a/jcore-msdoc-reader/pom.xml b/jcore-msdoc-reader/pom.xml
index c162caa94..cf462d0cd 100644
--- a/jcore-msdoc-reader/pom.xml
+++ b/jcore-msdoc-reader/pom.xml
@@ -31,19 +31,19 @@
         <dependency>
             <groupId>org.apache.poi</groupId>
             <artifactId>poi</artifactId>
-            <version>3.15</version>
+            <version>5.2.1</version>
         </dependency>
         <dependency>
             <groupId>org.apache.poi</groupId>
             <artifactId>poi-scratchpad</artifactId>
-            <version>3.15</version>
+            <version>5.2.1</version>
         </dependency>
 
 
         <dependency>
             <groupId>org.apache.poi</groupId>
             <artifactId>poi-ooxml</artifactId>
-            <version>3.16</version>
+            <version>5.2.1</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>

From 1d40b9bc9b31db9c2aa58a9ed8b77133347f8b2b Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Nov 2022 20:02:13 +0100
Subject: [PATCH 267/269] Version 2.6.0.

---
 jcore-ace-reader/component.meta               |  2 +-
 jcore-ace-reader/pom.xml                      |  2 +-
 .../reader/ace/desc/jcore-ace-reader.xml      |  2 +-
 jcore-acronym-ae/component.meta               |  2 +-
 jcore-acronym-ae/pom.xml                      |  2 +-
 .../acronymtagger/desc/jcore-acronym-ae.xml   |  2 +-
 .../desc/JulesToolsAEDescriptor.xml           |  2 +-
 .../desc/jcore-acronymtagger-test.xml         |  2 +-
 .../types/StemNetSemanticsTypeSystem.xml      |  2 +-
 jcore-acronym-writer/component.meta           |  2 +-
 jcore-acronym-writer/pom.xml                  |  4 +-
 .../acronyms/desc/jcore-acronym-writer.xml    |  2 +-
 jcore-annotation-adder-ae/component.meta      |  2 +-
 jcore-annotation-adder-ae/pom.xml             |  2 +-
 .../desc/jcore-annotation-adder-ae.xml        |  2 +-
 jcore-annotation-removal-ae/component.meta    |  2 +-
 jcore-annotation-removal-ae/pom.xml           |  4 +-
 .../desc/jcore-annotation-removal-ae.xml      | 13 ++--
 jcore-banner-ae/component.meta                |  2 +-
 jcore-banner-ae/pom.xml                       |  2 +-
 .../jcore/ae/banner/desc/jcore-banner-ae.xml  | 12 ++--
 .../src/main/resources/desc/BANNERAE.xml      |  2 +-
 .../src/main/resources/desc/bannerTS.xml      |  2 +-
 jcore-bc2gm-reader/component.meta             |  2 +-
 jcore-bc2gm-reader/pom.xml                    |  2 +-
 .../reader/bc2gm/desc/jcore-bc2gm-reader.xml  |  2 +-
 jcore-bc2gmformat-writer/component.meta       |  2 +-
 jcore-bc2gmformat-writer/pom.xml              |  2 +-
 .../desc/jcore-bc2gmformat-writer.xml         |  2 +-
 jcore-biolemmatizer-ae/component.meta         |  2 +-
 jcore-biolemmatizer-ae/pom.xml                |  2 +-
 .../desc/jcore-biolemmatizer-ae.xml           |  2 +-
 jcore-bionlpformat-consumer/component.meta    |  2 +-
 jcore-bionlpformat-consumer/pom.xml           |  2 +-
 ...pformat-consumer-biomedical-sharedtask.xml |  2 +-
 .../jcore-bionlpformat-consumer-medical.xml   |  2 +-
 .../jcore-bionlpformat-consumer-segment.xml   |  2 +-
 .../test/resources/types/jcore-all-types.xml  |  2 +-
 .../types/jcore-semantics-biology-types.xml   |  2 +-
 jcore-bionlpformat-reader/component.meta      |  2 +-
 jcore-bionlpformat-reader/pom.xml             |  2 +-
 ...nlpformat-reader-biomedical-sharedtask.xml |  2 +-
 .../jcore-bionlpformat-reader-medical.xml     |  2 +-
 .../jcore-bionlpformat-reader-segment.xml     |  2 +-
 .../bionlpformat/desc/EventReaderTest.xml     |  2 +-
 jcore-biosem-ae/component.meta                |  2 +-
 jcore-biosem-ae/pom.xml                       |  8 +--
 jcore-conll-consumer/component.meta           |  2 +-
 jcore-conll-consumer/pom.xml                  |  2 +-
 .../conll/desc/jcore-conll-consumer.xml       |  2 +-
 jcore-coordination-baseline-ae/component.meta |  2 +-
 jcore-coordination-baseline-ae/pom.xml        |  2 +-
 ...core-coordination-baseline-ae-conjunct.xml |  2 +-
 ...-coordination-baseline-ae-coordination.xml |  2 +-
 .../jcore-coordination-baseline-ae-eee.xml    |  2 +-
 ...core-coordination-baseline-ae-ellipsis.xml |  2 +-
 .../resources/desc/ConjunctAnnotatorTest.xml  |  2 +-
 .../desc/CoordinationAnnotatorTest.xml        |  2 +-
 .../test/resources/desc/EEEAnnotatorTest.xml  |  2 +-
 .../resources/desc/EllipsisAnnotatorTest.xml  |  2 +-
 jcore-cord19-reader/component.meta            |  2 +-
 jcore-cord19-reader/pom.xml                   |  2 +-
 .../desc/jcore-cord19-multiplier-reader.xml   |  2 +-
 .../cord19/desc/jcore-cord19-multiplier.xml   |  2 +-
 jcore-coreference-writer/component.meta       |  2 +-
 jcore-coreference-writer/pom.xml              |  4 +-
 .../desc/jcore-coreference-writer.xml         |  2 +-
 jcore-ct-reader/component.meta                |  2 +-
 jcore-ct-reader/pom.xml                       |  2 +-
 .../ct/desc/jcore-clinicaltrials-reader.xml   |  2 +-
 jcore-db-checkpoint-ae/component.meta         |  2 +-
 jcore-db-checkpoint-ae/pom.xml                |  2 +-
 .../desc/jcore-db-checkpoint-ae.xml           |  2 +-
 .../desc/jcore-db-checkpoint-consumer.xml     |  2 +-
 jcore-db-reader/component.meta                |  2 +-
 jcore-db-reader/pom.xml                       |  4 +-
 .../db/desc/jcore-db-multiplier-reader.xml    |  2 +-
 jcore-descriptor-creator/component.meta       |  2 +-
 jcore-descriptor-creator/pom.xml              |  2 +-
 .../de.julielab.jcore.ae.testae.TestAE.xml    | 62 +++++++++----------
 ...ore.consumer.testconsumer.Testconsumer.xml | 62 +++++++++----------
 ...ltiplier.testmultiplier.TestMultiplier.xml | 62 +++++++++----------
 ...lab.jcore.reader.testreader.TestReader.xml | 62 +++++++++----------
 jcore-dta-reader/component.meta               |  2 +-
 jcore-dta-reader/pom.xml                      |  2 +-
 .../reader/dta/desc/jcore-dta-reader.xml      |  2 +-
 jcore-ec-code-ae/component.meta               |  2 +-
 jcore-ec-code-ae/pom.xml                      |  2 +-
 jcore-elasticsearch-consumer/component.meta   |  2 +-
 jcore-elasticsearch-consumer/pom.xml          |  2 +-
 .../consumer/es/desc/jcore-json-writer.xml    |  2 +-
 .../julielab/jcore/consumer/es/testTypes.xml  |  2 +-
 jcore-embedding-writer/component.meta         |  2 +-
 jcore-embedding-writer/pom.xml                |  2 +-
 .../ew/desc/jcore-embedding-writer.xml        |  2 +-
 jcore-event-flattener-ae/component.meta       |  2 +-
 jcore-event-flattener-ae/pom.xml              |  2 +-
 .../desc/jcore-event-flattener-ae.xml         |  2 +-
 .../component.meta                            |  2 +-
 jcore-feature-value-replacement-ae/pom.xml    |  2 +-
 .../jcore-feature-value-replacement-ae.xml    |  2 +-
 jcore-file-reader/component.meta              |  2 +-
 jcore-file-reader/pom.xml                     |  2 +-
 .../reader/file/desc/jcore-file-reader.xml    | 18 +++---
 jcore-flair-ner-ae/component.meta             |  2 +-
 jcore-flair-ner-ae/pom.xml                    |  6 +-
 .../ae/flairner/desc/jcore-flair-ner-ae.xml   |  2 +-
 jcore-flair-token-embedding-ae/component.meta |  2 +-
 jcore-flair-token-embedding-ae/pom.xml        |  4 +-
 .../desc/jcore-flair-token-embedding-ae.xml   |  2 +-
 jcore-flow-controllers/component.meta         |  2 +-
 jcore-flow-controllers/pom.xml                |  2 +-
 jcore-gnormplus-ae/component.meta             |  2 +-
 jcore-gnormplus-ae/pom.xml                    | 16 ++---
 .../jcore/ae/gnp/desc/jcore-gnormplus-ae.xml  | 17 ++---
 .../desc/jcore-gnormplus-bioc-multiplier.xml  | 15 ++---
 .../jcore-gnormplus-pmc-db-multiplier.xml     | 13 ++--
 .../jcore-gnormplus-xmi-db-multiplier.xml     | 19 +++---
 .../jcore-gnormplus-xml-db-multiplier.xml     | 19 +++---
 jcore-gnp-bioc-reader/component.meta          |  6 +-
 jcore-gnp-bioc-reader/pom.xml                 |  4 +-
 .../desc/jcore-bnp-bioc-multiplier-reader.xml |  9 +--
 .../reader/desc/jcore-bnp-bioc-multiplier.xml | 21 ++++---
 jcore-gnp-bioc-writer/component.meta          |  2 +-
 jcore-gnp-bioc-writer/pom.xml                 |  2 +-
 .../gnp/desc/jcore-gnp-bioc-writer.xml        | 17 ++---
 jcore-iexml-consumer/component.meta           |  2 +-
 jcore-iexml-consumer/pom.xml                  |  4 +-
 .../iexml/desc/jcore-iexml-consumer.xml       |  2 +-
 jcore-iexml-reader/component.meta             |  2 +-
 jcore-iexml-reader/pom.xml                    |  4 +-
 .../reader/iexml/desc/jcore-iexml-reader.xml  |  2 +-
 jcore-ign-reader/component.meta               |  2 +-
 jcore-ign-reader/pom.xml                      |  2 +-
 .../reader/ign/desc/jcore-ign-reader.xml      |  2 +-
 jcore-iob-consumer/component.meta             |  2 +-
 jcore-iob-consumer/pom.xml                    |  2 +-
 .../cas2iob/desc/jcore-iob-consumer.xml       |  7 ++-
 .../cas2iob/desc/ToIOBConsumerTest.xml        |  2 +-
 .../consumer/cas2iob/types/TestTypeSystem.xml |  2 +-
 jcore-jedis-integration-tests/pom.xml         | 10 +--
 .../jcore/ae/jemas/desc/jcore-jemas-ae.xml    |  2 +-
 jcore-jnet-ae/component.meta                  |  2 +-
 jcore-jnet-ae/pom.xml                         |  2 +-
 .../jcore/ae/jnet/desc/jcore-jnet-ae.xml      |  2 +-
 .../ae/jnet/uima/EntityAnnotatorTest.xml      |  2 +-
 .../jcore/ae/jnet/uima/tsDescriptor.xml       |  2 +-
 jcore-jpos-ae/component.meta                  |  2 +-
 jcore-jpos-ae/pom.xml                         |  2 +-
 .../jcore/ae/jpos/desc/jcore-jpos.xml         |  2 +-
 .../test/resources/POSTagAnnotatorTest.xml    |  2 +-
 jcore-jsbd-ae/component.meta                  |  2 +-
 jcore-jsbd-ae/pom.xml                         |  2 +-
 .../jcore/ae/jsbd/desc/jcore-jsbd-ae.xml      |  2 +-
 .../ae/jsbd/desc/SentenceAnnotatorTest.xml    |  2 +-
 .../SentenceAnnotator_with-scope_Test.xml     |  2 +-
 .../ae/jsbd/desc/paragraph-scope-type.xml     |  2 +-
 jcore-jtbd-ae/component.meta                  |  2 +-
 jcore-jtbd-ae/pom.xml                         |  4 +-
 .../jcore/ae/jtbd/desc/jcore-jtbd.xml         |  2 +-
 .../jcore/ae/jtbd/desc/TokenAnnotatorTest.xml |  2 +-
 .../component.meta                            |  2 +-
 .../pom.xml                                   |  4 +-
 ...ore-julielab-entity-evaluator-consumer.xml |  9 +--
 jcore-likelihood-assignment-ae/component.meta |  2 +-
 jcore-likelihood-assignment-ae/pom.xml        |  2 +-
 .../desc/jcore-likelihood-assignment-ae.xml   |  2 +-
 jcore-likelihood-detection-ae/component.meta  |  2 +-
 jcore-likelihood-detection-ae/pom.xml         |  2 +-
 .../desc/jcore-likelihood-detection-ae.xml    |  2 +-
 jcore-line-multiplier/component.meta          |  2 +-
 jcore-line-multiplier/pom.xml                 |  2 +-
 .../line/desc/jcore-line-multiplier-ae.xml    |  2 +-
 .../line/desc/jcore-line-multiplier-ae.xml    |  2 +-
 .../component.meta                            |  2 +-
 jcore-lingpipe-porterstemmer-ae/pom.xml       |  2 +-
 .../desc/jcore-lingpipe-porterstemmer-ae.xml  |  2 +-
 jcore-lingpipegazetteer-ae/component.meta     |  2 +-
 jcore-lingpipegazetteer-ae/pom.xml            |  2 +-
 ...ipe-gazetteer-ae-configurable-resource.xml |  4 +-
 .../desc/jcore-lingpipe-gazetteer-ae.xml      |  2 +-
 .../ApproxGazetteerAnnotatorTest.xml          |  2 +-
 .../resources/ExactGazetteerAnnotatorTest.xml |  2 +-
 jcore-lingscope-ae/component.meta             |  2 +-
 jcore-lingscope-ae/pom.xml                    |  2 +-
 .../ae/lingscope/desc/jcore-lingscope-ae.xml  |  2 +-
 jcore-linnaeus-species-ae/component.meta      |  2 +-
 jcore-linnaeus-species-ae/pom.xml             |  2 +-
 .../ae/linnaeus/desc/jcore-linnaeus-ae.xml    |  2 +-
 jcore-mantra-xml-types/pom.xml                |  2 +-
 jcore-medxn-ae/component.meta                 |  2 +-
 jcore-medxn-ae/pom.xml                        |  2 +-
 .../jcore/ae/medxn/desc/MedNormAE.xml         |  2 +-
 .../desc/jcore-medxn-ae-attributes-german.xml |  2 +-
 .../desc/jcore-medxn-ae-extractor-german.xml  |  2 +-
 jcore-mmax2-reader/component.meta             |  4 +-
 jcore-mmax2-reader/pom.xml                    |  6 +-
 .../cr/mmax2/desc/jcore-mmax2-reader.xml      | 13 ++--
 jcore-msdoc-reader/component.meta             |  2 +-
 jcore-msdoc-reader/pom.xml                    |  2 +-
 .../reader/msdoc/desc/jcore-msdoc-reader.xml  |  2 +-
 jcore-mstparser-ae/component.meta             |  2 +-
 jcore-mstparser-ae/pom.xml                    |  2 +-
 .../ae/mstparser/desc/jcore-mstparser.xml     |  2 +-
 .../desc/MSTParserDescriptorTest.xml          |  2 +-
 jcore-muc7-reader/component.meta              |  2 +-
 jcore-muc7-reader/pom.xml                     |  2 +-
 .../reader/muc7/desc/jcore-muc7-reader.xml    |  2 +-
 .../reader/muc7/desc/jcore-muc7-reader.xml    |  2 +-
 jcore-mutationfinder-ae/component.meta        |  2 +-
 jcore-mutationfinder-ae/pom.xml               |  2 +-
 .../desc/jcore-mutationfinder-ae.xml          |  2 +-
 jcore-neo4j-relations-consumer/component.meta |  2 +-
 jcore-neo4j-relations-consumer/pom.xml        |  8 +--
 .../desc/jcore-neo4j-relations-consumer.xml   | 16 ++---
 jcore-nlmgene-reader/component.meta           |  2 +-
 jcore-nlmgene-reader/pom.xml                  |  4 +-
 .../nlmgene/desc/jcore-nlmgene-reader.xml     | 19 +++---
 jcore-opennlp-chunk-ae/component.meta         |  2 +-
 jcore-opennlp-chunk-ae/pom.xml                |  2 +-
 .../src/test/resources/ChunkAnnotatorTest.xml |  2 +-
 .../ChunkAnnotatorTestDefaultMappings.xml     |  2 +-
 jcore-opennlp-parser-ae/component.meta        |  2 +-
 jcore-opennlp-parser-ae/pom.xml               |  2 +-
 .../desc/jcore-opennlpparser.xml              |  2 +-
 .../desc/jcore-opennlpparser-test.xml         |  2 +-
 jcore-opennlp-postag-ae/component.meta        |  2 +-
 jcore-opennlp-postag-ae/pom.xml               |  2 +-
 .../desc/jcore-opennlppostag.xml              |  2 +-
 .../test/resources/PosTagAnnotatorTest.xml    |  2 +-
 jcore-opennlp-sentence-ae/component.meta      |  2 +-
 jcore-opennlp-sentence-ae/pom.xml             |  2 +-
 .../test/resources/SentenceAnnotatorTest.xml  |  2 +-
 jcore-opennlp-token-ae/component.meta         |  2 +-
 .../desc/TokenAnnotator.xml                   |  2 +-
 jcore-opennlp-token-ae/pom.xml                |  2 +-
 .../src/test/resources/TokenAnnotatorTest.xml |  2 +-
 jcore-pmc-db-reader/component.meta            |  6 +-
 jcore-pmc-db-reader/pom.xml                   |  6 +-
 .../desc/jcore-pmc-db-multiplier-reader.xml   |  2 +-
 .../pmc/desc/jcore-pmc-db-multiplier.xml      |  7 ++-
 jcore-pmc-reader/component.meta               |  2 +-
 jcore-pmc-reader/pom.xml                      |  2 +-
 .../pmc/desc/jcore-pmc-multiplier.xml         |  2 +-
 .../pmc/desc/jcore-pmc-multiplier-reader.xml  |  2 +-
 .../reader/pmc/desc/jcore-pmc-reader.xml      |  2 +-
 jcore-ppd-writer/component.meta               |  2 +-
 jcore-ppd-writer/pom.xml                      |  2 +-
 .../consumer/ppd/desc/jcore-ppd-writer.xml    |  2 +-
 jcore-pubtator-reader/component.meta          |  2 +-
 jcore-pubtator-reader/pom.xml                 |  2 +-
 .../pubtator/desc/jcore-pubtator-reader.xml   |  2 +-
 jcore-stanford-lemmatizer-ae/component.meta   |  2 +-
 jcore-stanford-lemmatizer-ae/pom.xml          |  2 +-
 .../lemma/desc/jcore-stanford-lemmatizer.xml  |  2 +-
 .../desc/jcore-stanford-lemmatizer-ae.xml     |  2 +-
 jcore-topic-indexing-ae/component.meta        |  2 +-
 jcore-topic-indexing-ae/pom.xml               |  4 +-
 .../desc/jcore-topic-indexing-ae.xml          |  2 +-
 jcore-topics-writer/component.meta            |  2 +-
 jcore-topics-writer/pom.xml                   |  2 +-
 .../topics/desc/jcore-topics-writer.xml       |  2 +-
 jcore-txt-consumer/component.meta             |  2 +-
 jcore-txt-consumer/pom.xml                    |  2 +-
 .../consumer/txt/desc/jcore-txt-consumer.xml  |  2 +-
 jcore-types/pom.xml                           |  2 +-
 .../jcore-dbtable-multiplier-types.xml        |  2 +-
 .../jcore-uri-multiplier-types.xml            |  2 +-
 .../types/extensions/jcore-ace-types.xml      |  2 +-
 .../jcore-document-meta-extension-types.xml   |  2 +-
 .../types/extensions/jcore-dta-types.xml      |  2 +-
 .../extensions/jcore-evaluation-types.xml     |  2 +-
 .../types/extensions/jcore-mantra-types.xml   |  2 +-
 .../types/extensions/jcore-medical-types.xml  |  2 +-
 .../types/extensions/jcore-mmax-types.xml     |  2 +-
 .../types/extensions/jcore-muc7-types.xml     |  2 +-
 .../extensions/jcore-semantics-ace-types.xml  |  2 +-
 .../jcore-semantics-bootstrep-types.xml       |  2 +-
 ...core-semantics-mention-extension-types.xml |  2 +-
 .../jcore-semantics-stemnet-types.xml         |  2 +-
 .../extensions/jcore-wikipedia-types.xml      |  2 +-
 .../jcore/types/jcore-affect-types.xml        |  2 +-
 .../julielab/jcore/types/jcore-all-types.xml  |  2 +-
 .../jcore/types/jcore-basic-types.xml         |  2 +-
 .../jcore/types/jcore-casflow-types.xml       |  2 +-
 .../jcore/types/jcore-discourse-types.xml     |  2 +-
 ...core-document-meta-clinicaltrial-types.xml |  2 +-
 .../jcore-document-meta-pubmed-types.xml      |  2 +-
 .../jcore/types/jcore-document-meta-types.xml |  2 +-
 ...document-structure-clinicaltrial-types.xml |  2 +-
 .../jcore-document-structure-pubmed-types.xml |  2 +-
 .../types/jcore-document-structure-types.xml  |  2 +-
 .../jcore/types/jcore-morpho-syntax-types.xml |  2 +-
 .../types/jcore-semantics-biology-types.xml   |  2 +-
 .../types/jcore-semantics-concept-types.xml   |  2 +-
 .../types/jcore-semantics-mention-types.xml   |  2 +-
 .../priorities/jcore-type-priorities.xml      |  2 +-
 jcore-utilities/pom.xml                       |  2 +-
 .../src/test/resources/AETestDescriptor.xml   |  2 +-
 jcore-xmi-db-reader/component.meta            |  2 +-
 jcore-xmi-db-reader/pom.xml                   |  8 +--
 .../desc/jcore-xmi-db-multiplier-reader.xml   |  2 +-
 .../xmi/desc/jcore-xmi-db-multiplier.xml      |  9 +--
 .../reader/xmi/desc/jcore-xmi-db-reader.xml   |  4 +-
 jcore-xmi-db-writer/component.meta            |  2 +-
 jcore-xmi-db-writer/pom.xml                   |  4 +-
 .../consumer/xmi/desc/jcore-xmi-db-writer.xml |  4 +-
 jcore-xmi-reader/component.meta               |  2 +-
 jcore-xmi-reader/pom.xml                      |  2 +-
 .../reader/xmi/desc/jcore-xmi-reader.xml      |  2 +-
 jcore-xmi-writer/component.meta               |  2 +-
 jcore-xmi-writer/pom.xml                      |  2 +-
 .../consumer/xmi/desc/jcore-xmi-writer.xml    |  2 +-
 .../jcore/consumer/xmi/CasToXmiConsumer.xml   |  2 +-
 jcore-xml-db-reader/component.meta            |  2 +-
 jcore-xml-db-reader/pom.xml                   |  6 +-
 .../reader/xml/desc/jcore-xml-db-reader.xml   |  2 +-
 jcore-xml-mapper/pom.xml                      |  2 +-
 .../test/resources/XMLReaderDescriptor.xml    |  2 +-
 ...Descriptor_medline_Unicode_outside_BMP.xml |  2 +-
 ...aderDescriptor_medline_missingInputDir.xml |  2 +-
 ...XMLReaderDescriptor_medline_singleFile.xml |  2 +-
 ...MLReaderDescriptor_medline_singleFile2.xml |  2 +-
 jcore-xml-reader/component.meta               |  2 +-
 jcore-xml-reader/pom.xml                      |  4 +-
 .../reader/xml/desc/XMLMultiplierReader.xml   |  2 +-
 ...edlineReaderDescriptor_missingInputDir.xml |  2 +-
 .../test/resources/PubmedXMLMultiplier.xml    |  2 +-
 .../test/resources/XMLMultiplierReader.xml    |  2 +-
 jedis-parent/pom.xml                          |  2 +-
 pom.xml                                       |  2 +-
 331 files changed, 617 insertions(+), 602 deletions(-)

diff --git a/jcore-ace-reader/component.meta b/jcore-ace-reader/component.meta
index 0ed4db39b..ac1392e63 100644
--- a/jcore-ace-reader/component.meta
+++ b/jcore-ace-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ace-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe ACE Reader"
 }
diff --git a/jcore-ace-reader/pom.xml b/jcore-ace-reader/pom.xml
index b983bbb5a..c4fa13273 100644
--- a/jcore-ace-reader/pom.xml
+++ b/jcore-ace-reader/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml b/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml
index a1eae5b5b..576236d5c 100644
--- a/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml
+++ b/jcore-ace-reader/src/main/resources/de/julielab/jcore/reader/ace/desc/jcore-ace-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>AceReader</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-acronym-ae/component.meta b/jcore-acronym-ae/component.meta
index 5e9a4da4c..b7c013133 100644
--- a/jcore-acronym-ae/component.meta
+++ b/jcore-acronym-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-acronym-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Acronym Tagger"
 }
diff --git a/jcore-acronym-ae/pom.xml b/jcore-acronym-ae/pom.xml
index b4fc4640d..b5e1c0d89 100644
--- a/jcore-acronym-ae/pom.xml
+++ b/jcore-acronym-ae/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml b/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml
index 2ca072f45..df6b3d9cc 100755
--- a/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml
+++ b/jcore-acronym-ae/src/main/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronym-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe AcronymAnnotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml
index 1e2c24294..05179e6b2 100644
--- a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml
+++ b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/JulesToolsAEDescriptor.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JulesToolsDescriptor</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml
index 60c613aaf..f9a981135 100755
--- a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml
+++ b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/desc/jcore-acronymtagger-test.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe AcronymAnnotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml
index 5b37032f1..d918bfcba 100644
--- a/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml
+++ b/jcore-acronym-ae/src/test/resources/de/julielab/jcore/ae/acronymtagger/types/StemNetSemanticsTypeSystem.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>StemNetSemanticsTypeSystem</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>http://www.julielab.de</vendor>
 <imports>
 <import location="JulieTypeSystem.xml" />
diff --git a/jcore-acronym-writer/component.meta b/jcore-acronym-writer/component.meta
index 6869b7664..7cdcd3451 100644
--- a/jcore-acronym-writer/component.meta
+++ b/jcore-acronym-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-acronym-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Acronym Writer"
 }
diff --git a/jcore-acronym-writer/pom.xml b/jcore-acronym-writer/pom.xml
index 7924aaae0..287448025 100644
--- a/jcore-acronym-writer/pom.xml
+++ b/jcore-acronym-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
@@ -38,7 +38,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-types</artifactId>
-            <version>${jcore-version}</version>
+            <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>
diff --git a/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml b/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
index 26840e7c6..31ce7af9a 100644
--- a/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
+++ b/jcore-acronym-writer/src/main/resources/de/julielab/jcore/consumer/acronyms/desc/jcore-acronym-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Acronym Writer</name>
         <description>Writes acronym annotation to a text file.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputFile</name>
diff --git a/jcore-annotation-adder-ae/component.meta b/jcore-annotation-adder-ae/component.meta
index 3978e1017..fe12dbf50 100644
--- a/jcore-annotation-adder-ae/component.meta
+++ b/jcore-annotation-adder-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-annotation-adder-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Annotation Adder"
 }
diff --git a/jcore-annotation-adder-ae/pom.xml b/jcore-annotation-adder-ae/pom.xml
index 88318f459..7cdc4c465 100644
--- a/jcore-annotation-adder-ae/pom.xml
+++ b/jcore-annotation-adder-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
index 20ea1f3d1..585e4eeb7 100644
--- a/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
+++ b/jcore-annotation-adder-ae/src/main/resources/de/julielab/jcore/ae/annotationadder/desc/jcore-annotation-adder-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Annotation Adder</name>
         <description>This component helps to import annotations made on the exact CAS document text by an external process back into the CAS. To this end, the component is prepared to read several data formats. Currently, simple offset-based annotations are supported with configurable UIMA types. The component supports character and token based offsets.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OffsetMode</name>
diff --git a/jcore-annotation-removal-ae/component.meta b/jcore-annotation-removal-ae/component.meta
index 0666d9f0f..04e9d8c1e 100644
--- a/jcore-annotation-removal-ae/component.meta
+++ b/jcore-annotation-removal-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-annotation-removal-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Annotation Removal AE"
 }
diff --git a/jcore-annotation-removal-ae/pom.xml b/jcore-annotation-removal-ae/pom.xml
index f5152245a..e434a54b2 100644
--- a/jcore-annotation-removal-ae/pom.xml
+++ b/jcore-annotation-removal-ae/pom.xml
@@ -10,10 +10,10 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
-    <version>2.6.0-SNAPSHOT</version>
+    
 
     <dependencies>
         <dependency>
diff --git a/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml b/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml
index 141f3ef1d..3cebc1704 100644
--- a/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml
+++ b/jcore-annotation-removal-ae/src/main/resources/de/julielab/jcore/ae/annotationremoval/desc/jcore-annotation-removal-ae.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>true</primitive>
@@ -6,7 +6,8 @@
   <analysisEngineMetaData>
     <name>JCoRe Annotation Removal AE</name>
     <description>Removes annotations from the CAS that belong to one of the types specified as a parameter value in the descriptor.</description>
-    <vendor>JULIE Lab Jena, Germany</vendor>
+    <version>2.6.0</version>
+        <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
         <name>AnnotationTypes</name>
@@ -16,14 +17,14 @@
         <mandatory>true</mandatory>
       </configurationParameter>
     </configurationParameters>
-    <configurationParameterSettings/>
+    <configurationParameterSettings />
     <typeSystemDescription>
       <imports>
-        <import name="de.julielab.jcore.types.jcore-all-types"/>
+        <import name="de.julielab.jcore.types.jcore-all-types" />
       </imports>
     </typeSystemDescription>
-    <fsIndexCollection/>
-    <capabilities/>
+    <fsIndexCollection />
+    <capabilities />
     <operationalProperties>
       <modifiesCas>true</modifiesCas>
       <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-banner-ae/component.meta b/jcore-banner-ae/component.meta
index 2a01d6ff1..4ba9b7c9e 100644
--- a/jcore-banner-ae/component.meta
+++ b/jcore-banner-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-banner-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Banner"
 }
diff --git a/jcore-banner-ae/pom.xml b/jcore-banner-ae/pom.xml
index d155af6b0..a4f8e8d32 100644
--- a/jcore-banner-ae/pom.xml
+++ b/jcore-banner-ae/pom.xml
@@ -74,7 +74,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
     <licenses>
diff --git a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
index 1b6be9026..13f6c7d30 100644
--- a/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
+++ b/jcore-banner-ae/src/main/resources/de/julielab/jcore/ae/banner/desc/jcore-banner-ae.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>true</primitive>
@@ -6,8 +6,8 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.ae.banner.BANNERAnnotator</name>
     <description>Descriptor automatically generated by uimaFIT</description>
-    <version>unknown</version>
-    <vendor>de.julielab.jcore.ae.banner</vendor>
+    <version>2.6.0</version>
+        <vendor>de.julielab.jcore.ae.banner</vendor>
     <configurationParameters>
       <configurationParameter>
         <name>ConfigFile</name>
@@ -31,7 +31,7 @@
         <mandatory>false</mandatory>
       </configurationParameter>
     </configurationParameters>
-    <configurationParameterSettings/>
+    <configurationParameterSettings />
     <typeSystemDescription>
       <imports>
         <import name="de.julielab.jcore.types.jcore-document-structure-types" />
@@ -40,7 +40,7 @@
         <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
       </imports>
     </typeSystemDescription>
-    <fsIndexCollection/>
+    <fsIndexCollection />
     <capabilities>
       <capability>
         <inputs>
@@ -49,7 +49,7 @@
         <outputs>
           <type>de.julielab.jcore.types.Gene</type>
         </outputs>
-        <languagesSupported/>
+        <languagesSupported />
       </capability>
     </capabilities>
     <operationalProperties>
diff --git a/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml b/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml
index 05b35368f..fb2981574 100644
--- a/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml
+++ b/jcore-banner-ae/src/main/resources/desc/BANNERAE.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>BANNERAE</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-banner-ae/src/main/resources/desc/bannerTS.xml b/jcore-banner-ae/src/main/resources/desc/bannerTS.xml
index 70aaf0715..a78fd02a0 100644
--- a/jcore-banner-ae/src/main/resources/desc/bannerTS.xml
+++ b/jcore-banner-ae/src/main/resources/desc/bannerTS.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>bannerTS</name>
   <description>basic typesystem started by sid</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor />
   <types>
     <typeDescription>
diff --git a/jcore-bc2gm-reader/component.meta b/jcore-bc2gm-reader/component.meta
index 3b60c95ed..49b7f8c1a 100644
--- a/jcore-bc2gm-reader/component.meta
+++ b/jcore-bc2gm-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bc2gm-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe BioCreative II Gene Mention Reader"
 }
diff --git a/jcore-bc2gm-reader/pom.xml b/jcore-bc2gm-reader/pom.xml
index f8579d215..9cc023682 100644
--- a/jcore-bc2gm-reader/pom.xml
+++ b/jcore-bc2gm-reader/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml b/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml
index b3b40d26c..7b932fbf9 100644
--- a/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml
+++ b/jcore-bc2gm-reader/src/main/resources/de/julielab/jcore/reader/bc2gm/desc/jcore-bc2gm-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe BioCreative II Gene Mention reader</name>
         <description>This component reads gene annotated sentences in the BioCreative II Gene Mention challenge format. Each CAS will contain one annotated sentence.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>SentencesFile</name>
diff --git a/jcore-bc2gmformat-writer/component.meta b/jcore-bc2gmformat-writer/component.meta
index 2b7c90e41..ee98994c8 100644
--- a/jcore-bc2gmformat-writer/component.meta
+++ b/jcore-bc2gmformat-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bc2gmformat-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe BioCreative II Gene Mention Format Writer"
 }
diff --git a/jcore-bc2gmformat-writer/pom.xml b/jcore-bc2gmformat-writer/pom.xml
index 75acd4004..2f531a820 100644
--- a/jcore-bc2gmformat-writer/pom.xml
+++ b/jcore-bc2gmformat-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml b/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml
index 811375d76..0504d2b1b 100644
--- a/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml
+++ b/jcore-bc2gmformat-writer/src/main/resources/de/julielab/jcore/consumer/bc2gmformat/desc/jcore-bc2gmformat-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe BioCreative II Gene Mention Format writer</name>
         <description>This component writes gene annotations in the CAS to the format employed by the BioCreative II Gene Mention challenge.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputDirectory</name>
diff --git a/jcore-biolemmatizer-ae/component.meta b/jcore-biolemmatizer-ae/component.meta
index 2b698fcb5..4e79fc201 100644
--- a/jcore-biolemmatizer-ae/component.meta
+++ b/jcore-biolemmatizer-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-biolemmatizer-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe BioLemmatizer"
 }
diff --git a/jcore-biolemmatizer-ae/pom.xml b/jcore-biolemmatizer-ae/pom.xml
index cc11be3b1..62e6a6234 100644
--- a/jcore-biolemmatizer-ae/pom.xml
+++ b/jcore-biolemmatizer-ae/pom.xml
@@ -8,7 +8,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.6.0-SNAPSHOT</version>
+		<version>2.6.0</version>
 	</parent>
 
 	<dependencies>
diff --git a/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml b/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
index 9fe2de8b8..9acb95f57 100644
--- a/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
+++ b/jcore-biolemmatizer-ae/src/main/resources/de/julielab/jcore/ae/biolemmatizer/desc/jcore-biolemmatizer-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>BioLemmatizer</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-bionlpformat-consumer/component.meta b/jcore-bionlpformat-consumer/component.meta
index 4071c4a18..e13edd578 100644
--- a/jcore-bionlpformat-consumer/component.meta
+++ b/jcore-bionlpformat-consumer/component.meta
@@ -22,7 +22,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bionlpformat-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe BioNLP Format Consumer"
 }
diff --git a/jcore-bionlpformat-consumer/pom.xml b/jcore-bionlpformat-consumer/pom.xml
index a2cdae928..676993028 100644
--- a/jcore-bionlpformat-consumer/pom.xml
+++ b/jcore-bionlpformat-consumer/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml
index 3d358227d..5b908ba63 100644
--- a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml
+++ b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-biomedical-sharedtask.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe BioNLP Event Consumer</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml
index 547769316..0cb5ea0e1 100644
--- a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml
+++ b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-medical.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe BioNLP Format Event Consumer (Medical)</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml
index be36250a4..57287e038 100644
--- a/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml
+++ b/jcore-bionlpformat-consumer/src/main/resources/de/julielab/jcore/consumer/bionlpformat/desc/jcore-bionlpformat-consumer-segment.xml
@@ -7,7 +7,7 @@
     <analysisEngineMetaData>
         <name>JCoRe BioNLP Format Segment Consumer</name>
         <description />
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor />
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml
index 7c320da41..76d19c9c8 100644
--- a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml
+++ b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-all-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe All Types</name>
   <description>This is just a convenience file, assembling all JCoRe types</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import name="de.julielab.jcore.types.jcore-basic-types" />
diff --git a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml
index c01c57fe9..a525162fe 100644
--- a/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml
+++ b/jcore-bionlpformat-consumer/src/test/resources/types/jcore-semantics-biology-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics Biology Types</name>
   <description>The type system contains types of the biomedical domain.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
diff --git a/jcore-bionlpformat-reader/component.meta b/jcore-bionlpformat-reader/component.meta
index 229346ad7..60e877ec5 100644
--- a/jcore-bionlpformat-reader/component.meta
+++ b/jcore-bionlpformat-reader/component.meta
@@ -22,7 +22,7 @@
     "maven-artifact": {
         "artifactId": "jcore-bionlpformat-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe BioNLP Format Reader"
 }
diff --git a/jcore-bionlpformat-reader/pom.xml b/jcore-bionlpformat-reader/pom.xml
index 94aa1584f..1c966f9e8 100644
--- a/jcore-bionlpformat-reader/pom.xml
+++ b/jcore-bionlpformat-reader/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml
index 0ba9c91cf..66a5945ca 100644
--- a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml
+++ b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-biomedical-sharedtask.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe BioNLP Event Reader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml
index 810dfac8c..602240c4e 100644
--- a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml
+++ b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-medical.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>BioNLP Format Reader Medical</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml
index 1f4944403..7ed45b45a 100644
--- a/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml
+++ b/jcore-bionlpformat-reader/src/main/resources/de/julielab/jcore/reader/bionlpformat/desc/jcore-bionlpformat-reader-segment.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>BioNLP Format Reader Segment</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml b/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml
index 3813fdc7d..33b41c2c8 100644
--- a/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml
+++ b/jcore-bionlpformat-reader/src/test/resources/de/julielab/jcore/reader/bionlpformat/desc/EventReaderTest.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>EventReader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-biosem-ae/component.meta b/jcore-biosem-ae/component.meta
index efff383f6..08c8a1bba 100644
--- a/jcore-biosem-ae/component.meta
+++ b/jcore-biosem-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-biosem-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe BioSem Event Annotator"
 }
diff --git a/jcore-biosem-ae/pom.xml b/jcore-biosem-ae/pom.xml
index 5e86a75ff..7a667db9b 100644
--- a/jcore-biosem-ae/pom.xml
+++ b/jcore-biosem-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-biosem-ae</artifactId>
     <name>JCoRe BioSem Event Annotator</name>
@@ -32,13 +32,13 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-bionlpformat-reader</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>biosem-event-extractor</artifactId>
-            <version>1.1.8-SNAPSHOT</version>
+            <version>1.1.7</version>
             <exclusions>
                 <exclusion>
                     <groupId>commons-cli</groupId>
@@ -54,7 +54,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-bionlpformat-consumer</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-conll-consumer/component.meta b/jcore-conll-consumer/component.meta
index 87ff59f38..2e94ca29d 100644
--- a/jcore-conll-consumer/component.meta
+++ b/jcore-conll-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-conll-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe CONLL Consumer"
 }
diff --git a/jcore-conll-consumer/pom.xml b/jcore-conll-consumer/pom.xml
index bbab62b95..cff35237d 100644
--- a/jcore-conll-consumer/pom.xml
+++ b/jcore-conll-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-conll-consumer</artifactId>
 
diff --git a/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml b/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml
index 854c345d4..288790254 100644
--- a/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml
+++ b/jcore-conll-consumer/src/main/resources/de/julielab/jcore/consumer/conll/desc/jcore-conll-consumer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Conll Consumer</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-coordination-baseline-ae/component.meta b/jcore-coordination-baseline-ae/component.meta
index 361310479..622f6ef43 100644
--- a/jcore-coordination-baseline-ae/component.meta
+++ b/jcore-coordination-baseline-ae/component.meta
@@ -26,7 +26,7 @@
     "maven-artifact": {
         "artifactId": "jcore-coordination-baseline-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Coordination Tagger Baseline"
 }
diff --git a/jcore-coordination-baseline-ae/pom.xml b/jcore-coordination-baseline-ae/pom.xml
index 0b54fac37..64cc11f48 100644
--- a/jcore-coordination-baseline-ae/pom.xml
+++ b/jcore-coordination-baseline-ae/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml
index 40bb374a8..4da7a5bbe 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-conjunct.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe ConjunctAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml
index 55b4377d0..706c3df7e 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-coordination.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe CoordinationAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml
index 434bfd967..41bb97345 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-eee.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe EEEAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml
index a508d4ab7..bb4bfb5c1 100644
--- a/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml
+++ b/jcore-coordination-baseline-ae/src/main/resources/de/julielab/jcore/ae/coordbaseline/desc/jcore-coordination-baseline-ae-ellipsis.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe EllipsisAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml
index be03ff4bb..29f9e5d35 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/ConjunctAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>ConjunctAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml
index a256a83b6..c3245f36b 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/CoordinationAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>CoordinationAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml
index 4b470443d..4fa87c0a9 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/EEEAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>EEEAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml b/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml
index 422a96e06..85ce7558b 100644
--- a/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml
+++ b/jcore-coordination-baseline-ae/src/test/resources/desc/EllipsisAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>EllipsisAnnotator</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-cord19-reader/component.meta b/jcore-cord19-reader/component.meta
index 66bd41580..fd42cd349 100644
--- a/jcore-cord19-reader/component.meta
+++ b/jcore-cord19-reader/component.meta
@@ -19,7 +19,7 @@
     "maven-artifact": {
         "artifactId": "jcore-cord19-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe CORD-19 Reader"
 }
diff --git a/jcore-cord19-reader/pom.xml b/jcore-cord19-reader/pom.xml
index 89f6b0fb4..833b22db6 100644
--- a/jcore-cord19-reader/pom.xml
+++ b/jcore-cord19-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml
index fc54b7b2e..4cdd4203f 100644
--- a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml
+++ b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe CORD-19 Multiplier Reader</name>
         <description>This component reads file paths to JSON files and the CORD-19 (https://pages.semanticscholar.org/coronavirus-research) meta data file to send them to CAS multipliers.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml
index 812eeb5c6..c3da5e650 100644
--- a/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml
+++ b/jcore-cord19-reader/src/main/resources/de/julielab/jcore/reader/cord19/desc/jcore-cord19-multiplier.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe CORD-19 CAS Multiplier</name>
         <description>This component reads the CORD-19 (https://pages.semanticscholar.org/coronavirus-research) JSON format into UIMA CAS instances.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-coreference-writer/component.meta b/jcore-coreference-writer/component.meta
index 77f18d497..bbfba5b64 100644
--- a/jcore-coreference-writer/component.meta
+++ b/jcore-coreference-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-coreference-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Coreference Writer"
 }
diff --git a/jcore-coreference-writer/pom.xml b/jcore-coreference-writer/pom.xml
index 1bafb6e13..ad4aac828 100644
--- a/jcore-coreference-writer/pom.xml
+++ b/jcore-coreference-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
@@ -38,7 +38,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-types</artifactId>
-            <version>${jcore-version}</version>
+            <version>${jcore-types-version}</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>
diff --git a/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml b/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml
index 855be5b78..b31bb30bb 100644
--- a/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml
+++ b/jcore-coreference-writer/src/main/resources/de/julielab/jcore/consumer/coreference/desc/jcore-coreference-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Coreference Writer</name>
         <description>Writes coreference annotation to a text file.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputFile</name>
diff --git a/jcore-ct-reader/component.meta b/jcore-ct-reader/component.meta
index 309b82f92..6e0600b4f 100644
--- a/jcore-ct-reader/component.meta
+++ b/jcore-ct-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ct-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Clinical Trials Reader"
 }
diff --git a/jcore-ct-reader/pom.xml b/jcore-ct-reader/pom.xml
index 0630444f6..4ea1f5969 100644
--- a/jcore-ct-reader/pom.xml
+++ b/jcore-ct-reader/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml b/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml
index 33e4a0f03..b0eaa2ae4 100644
--- a/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml
+++ b/jcore-ct-reader/src/main/resources/de/julielab/jcore/reader/ct/desc/jcore-clinicaltrials-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe Clinical Trials Reader</name>
         <description>This component reads the XML format provided by ClinicalTrials.gov. To this end, the JCoRe type system contains a number of types specifically created for this kind of document. Note that the CAS text created by this reader might be confusing without checking the corresponding annotations. This is due to the fact that the CT XML contains multiple enumerations which are not very well reflected in plain text. Also, enumerations with subitems, such as the outcomes, are not displayed in the expected groups of items. Instead, each item type is displayed separately. This could be changed, if necessary. Since all items are correctly annotated by their category, this might not even be an issue, depending on the downstream tasks.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>InputDirectory</name>
diff --git a/jcore-db-checkpoint-ae/component.meta b/jcore-db-checkpoint-ae/component.meta
index 958bc8f17..db83ca2a7 100644
--- a/jcore-db-checkpoint-ae/component.meta
+++ b/jcore-db-checkpoint-ae/component.meta
@@ -19,7 +19,7 @@
     "maven-artifact": {
         "artifactId": "jcore-db-checkpoint-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Database Checkpoint AE"
 }
diff --git a/jcore-db-checkpoint-ae/pom.xml b/jcore-db-checkpoint-ae/pom.xml
index f7ed71533..13fede4b9 100644
--- a/jcore-db-checkpoint-ae/pom.xml
+++ b/jcore-db-checkpoint-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jedis-parent</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     
diff --git a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml
index 8264367e1..6340c7355 100644
--- a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml
+++ b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Database Checkpoint AE</name>
         <description>This component can be used when using a JCoRe database reader that reads from a CoStoSys/JeDIS subset. Enters the configured component name in the 'last component' column. Can also mark documents as being completely processed.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CheckpointName</name>
diff --git a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml
index 59b0bf054..be7df82ea 100644
--- a/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml
+++ b/jcore-db-checkpoint-ae/src/main/resources/de/julielab/jcore/ae/checkpoint/desc/jcore-db-checkpoint-consumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Database Checkpoint Writer</name>
         <description>This component can be used when using a JCoRe database reader that reads from a CoStoSys/JeDIS subset. Enters the configured component name in the 'last component' column. Can also mark documents as being completely processed.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CheckpointName</name>
diff --git a/jcore-db-reader/component.meta b/jcore-db-reader/component.meta
index 78b3ba1ad..1272e620f 100644
--- a/jcore-db-reader/component.meta
+++ b/jcore-db-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-db-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Database Reader"
 }
diff --git a/jcore-db-reader/pom.xml b/jcore-db-reader/pom.xml
index fd3a657b1..577dca679 100644
--- a/jcore-db-reader/pom.xml
+++ b/jcore-db-reader/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
@@ -45,7 +45,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-mapper</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml b/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
index 593b1ef99..81fd1c7a5 100644
--- a/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
+++ b/jcore-db-reader/src/main/resources/de/julielab/jcore/reader/db/desc/jcore-db-multiplier-reader.xml
@@ -10,7 +10,7 @@
             sent by this reader. The component leverages the corpus storage system (CoStoSys) for this purpose and is
             part of the Jena Document Information System, JeDIS.
         </description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>ResetTable</name>
diff --git a/jcore-descriptor-creator/component.meta b/jcore-descriptor-creator/component.meta
index 6eae55fd0..ac2d6ce7d 100644
--- a/jcore-descriptor-creator/component.meta
+++ b/jcore-descriptor-creator/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-descriptor-creator",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Descriptor Creator"
 }
diff --git a/jcore-descriptor-creator/pom.xml b/jcore-descriptor-creator/pom.xml
index 0c410747d..5c82749e4 100644
--- a/jcore-descriptor-creator/pom.xml
+++ b/jcore-descriptor-creator/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     
     <artifactId>jcore-descriptor-creator</artifactId>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
index 3cf0a3a39..173ce62f6 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,41 +6,41 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.testae.TestAE</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>unknown</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.ae.testae</vendor>
-        <configurationParameters/>
-        <configurationParameterSettings/>
+        <configurationParameters />
+        <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
-                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
-                <import name="de.julielab.jcore.types.jcore-basic-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
+                <import name="de.julielab.jcore.types.jcore-discourse-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
+                <import name="de.julielab.jcore.types.jcore-basic-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
index cf47fdd0f..f26725794 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,41 +6,41 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.consumer.testconsumer.Testconsumer</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>unknown</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.consumer.testconsumer</vendor>
-        <configurationParameters/>
-        <configurationParameterSettings/>
+        <configurationParameters />
+        <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
-                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
-                <import name="de.julielab.jcore.types.jcore-basic-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
+                <import name="de.julielab.jcore.types.jcore-discourse-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
+                <import name="de.julielab.jcore.types.jcore-basic-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
index 703b7b436..9eae20293 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,41 +6,41 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.multiplier.testmultiplier.TestMultiplier</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>unknown</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.multiplier.testmultiplier</vendor>
-        <configurationParameters/>
-        <configurationParameterSettings/>
+        <configurationParameters />
+        <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
-                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
-                <import name="de.julielab.jcore.types.jcore-basic-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
+                <import name="de.julielab.jcore.types.jcore-discourse-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
+                <import name="de.julielab.jcore.types.jcore-basic-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
index 24cc9ac66..baf0587c5 100644
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
+++ b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
@@ -1,45 +1,45 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <implementationName>de.julielab.jcore.reader.testreader.TestReader</implementationName>
     <processingResourceMetaData>
         <name>de.julielab.jcore.reader.testreader.TestReader</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>unknown</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.reader.testreader</vendor>
-        <configurationParameters/>
-        <configurationParameterSettings/>
+        <configurationParameters />
+        <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types"/>
-                <import name="de.julielab.jcore.types.jcore-discourse-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types"/>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types"/>
-                <import name="de.julielab.jcore.types.jcore-basic-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
+                <import name="de.julielab.jcore.types.jcore-discourse-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
+                <import name="de.julielab.jcore.types.jcore-basic-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
diff --git a/jcore-dta-reader/component.meta b/jcore-dta-reader/component.meta
index ee9b729df..c097a90ef 100644
--- a/jcore-dta-reader/component.meta
+++ b/jcore-dta-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-dta-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe DTA Reader"
 }
diff --git a/jcore-dta-reader/pom.xml b/jcore-dta-reader/pom.xml
index 7f5b51af2..706a0f2c5 100644
--- a/jcore-dta-reader/pom.xml
+++ b/jcore-dta-reader/pom.xml
@@ -9,7 +9,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.6.0-SNAPSHOT</version>
+		<version>2.6.0</version>
 	</parent>
 	<build>
 		<resources>
diff --git a/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml b/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml
index 8bc431330..c6827e0d4 100644
--- a/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml
+++ b/jcore-dta-reader/src/main/resources/de/julielab/jcore/reader/dta/desc/jcore-dta-reader.xml
@@ -5,7 +5,7 @@
 	<processingResourceMetaData>
 		<name>JCoRe DTA Reader</name>
 		<description />
-		<version>2.6.0-SNAPSHOT</version>
+		<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 		<configurationParameters>
 			<configurationParameter>
diff --git a/jcore-ec-code-ae/component.meta b/jcore-ec-code-ae/component.meta
index 22af189d5..4bfd58959 100644
--- a/jcore-ec-code-ae/component.meta
+++ b/jcore-ec-code-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ecn-code-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Enzyme Commission Number AE"
 }
diff --git a/jcore-ec-code-ae/pom.xml b/jcore-ec-code-ae/pom.xml
index 6f0c55f60..d234bff87 100644
--- a/jcore-ec-code-ae/pom.xml
+++ b/jcore-ec-code-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-elasticsearch-consumer/component.meta b/jcore-elasticsearch-consumer/component.meta
index b2f0e7a71..366bc2715 100644
--- a/jcore-elasticsearch-consumer/component.meta
+++ b/jcore-elasticsearch-consumer/component.meta
@@ -18,7 +18,7 @@
     "maven-artifact": {
         "artifactId": "jcore-elasticsearch-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe ElasticSearch Consumer"
 }
diff --git a/jcore-elasticsearch-consumer/pom.xml b/jcore-elasticsearch-consumer/pom.xml
index 9fa242c0c..c54f0c3a5 100644
--- a/jcore-elasticsearch-consumer/pom.xml
+++ b/jcore-elasticsearch-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-elasticsearch-consumer</artifactId>
     <name>JCoRe ElasticSearch Consumer</name>
diff --git a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml
index 485ebb2ce..47d8daa69 100644
--- a/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml
+++ b/jcore-elasticsearch-consumer/src/main/resources/de/julielab/jcore/consumer/es/desc/jcore-json-writer.xml
@@ -5,7 +5,7 @@
     <annotatorImplementationName>de.julielab.jcore.consumer.es.JsonWriter</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCoRe JSON Writer</name>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputDestination</name>
diff --git a/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml b/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml
index dfdd4d093..7ff4dc049 100644
--- a/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml
+++ b/jcore-elasticsearch-consumer/src/test/resources/de/julielab/jcore/consumer/es/testTypes.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>testTypes</name>
   <description>Some types suited for unit tests.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor />
   <imports>
     <import name="de.julielab.jcore.types.jcore-document-meta-types" />
diff --git a/jcore-embedding-writer/component.meta b/jcore-embedding-writer/component.meta
index 0c6301641..bfa25267d 100644
--- a/jcore-embedding-writer/component.meta
+++ b/jcore-embedding-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-embedding-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Embedding Writer"
 }
diff --git a/jcore-embedding-writer/pom.xml b/jcore-embedding-writer/pom.xml
index d294419fd..b5896bbf2 100644
--- a/jcore-embedding-writer/pom.xml
+++ b/jcore-embedding-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml b/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml
index 46f458d8b..491922b81 100644
--- a/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml
+++ b/jcore-embedding-writer/src/main/resources/de/julielab/jcore/consumer/ew/desc/jcore-embedding-writer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Flair Embedding Writer</name>
     <description>Given a Flair compatible embedding and a UIMA annotation type, this component prints the embeddings of tokens annotated with the annotation to a file.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <configurationParameters>
       <configurationParameter>
         <name>UseGzip</name>
diff --git a/jcore-event-flattener-ae/component.meta b/jcore-event-flattener-ae/component.meta
index afc1e729e..2fc02a11f 100644
--- a/jcore-event-flattener-ae/component.meta
+++ b/jcore-event-flattener-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-event-flattener-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Event Flattener AE"
 }
diff --git a/jcore-event-flattener-ae/pom.xml b/jcore-event-flattener-ae/pom.xml
index 91788a532..cb83be2ac 100644
--- a/jcore-event-flattener-ae/pom.xml
+++ b/jcore-event-flattener-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-event-flattener-ae</artifactId>
     <name>JCoRe Event Flattener AE</name>
diff --git a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
index 296872b61..a7af948d2 100644
--- a/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
+++ b/jcore-event-flattener-ae/src/main/resources/de/julielab/jcore/ae/eventflattener/desc/jcore-event-flattener-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.eventflattener.EventFlattener</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.ae.eventflattener</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-feature-value-replacement-ae/component.meta b/jcore-feature-value-replacement-ae/component.meta
index dfb623568..2451095b0 100644
--- a/jcore-feature-value-replacement-ae/component.meta
+++ b/jcore-feature-value-replacement-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-feature-value-replacement-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Feature Value Replacement AE"
 }
diff --git a/jcore-feature-value-replacement-ae/pom.xml b/jcore-feature-value-replacement-ae/pom.xml
index 8395dfefd..e68ec49e8 100644
--- a/jcore-feature-value-replacement-ae/pom.xml
+++ b/jcore-feature-value-replacement-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-feature-value-replacement-ae</artifactId>
     <name>JCoRe Feature Value Replacement AE</name>
diff --git a/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml b/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml
index 42c3e36a8..8676f0848 100644
--- a/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml
+++ b/jcore-feature-value-replacement-ae/src/main/resources/de/julielab/jcore/ae/fvr/desc/jcore-feature-value-replacement-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.fvr.FeatureValueReplacementAnnotator</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.ae.fvr</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-file-reader/component.meta b/jcore-file-reader/component.meta
index 9aabd9c66..a166fe8b5 100644
--- a/jcore-file-reader/component.meta
+++ b/jcore-file-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-file-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe File Reader"
 }
diff --git a/jcore-file-reader/pom.xml b/jcore-file-reader/pom.xml
index 58ddcfcc6..38940e7ef 100644
--- a/jcore-file-reader/pom.xml
+++ b/jcore-file-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-file-reader</artifactId>
     <name>JCoRe File Reader</name>
diff --git a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
index 1297241d1..a5ed06515 100644
--- a/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
+++ b/jcore-file-reader/src/main/resources/de/julielab/jcore/reader/file/desc/jcore-file-reader.xml
@@ -4,8 +4,8 @@
     <implementationName>de.julielab.jcore.reader.file.main.FileReader</implementationName>
     <processingResourceMetaData>
         <name>JCoRe File Reader</name>
-        <description/>
-        <version>2.6.0-SNAPSHOT</version>
+        <description />
+        <version>2.6.0</version>
         <vendor>JULIELab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -145,20 +145,20 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
             </imports>
         </typeSystemDescription>
-        <typePriorities/>
-        <fsIndexCollection/>
+        <typePriorities />
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
+                <inputs />
                 <outputs>
                     <type allAnnotatorFeatures="true">de.julielab.jcore.types.pubmed.Header</type>
                     <type allAnnotatorFeatures="true">de.julielab.jcore.types.Date</type>
                 </outputs>
-                <languagesSupported/>
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
@@ -167,5 +167,5 @@
             <outputsNewCASes>true</outputsNewCASes>
         </operationalProperties>
     </processingResourceMetaData>
-    <resourceManagerConfiguration/>
+    <resourceManagerConfiguration />
 </collectionReaderDescription>
\ No newline at end of file
diff --git a/jcore-flair-ner-ae/component.meta b/jcore-flair-ner-ae/component.meta
index 09250babf..ee7f7bc6c 100644
--- a/jcore-flair-ner-ae/component.meta
+++ b/jcore-flair-ner-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-flair-ner-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Flair NER AE"
 }
diff --git a/jcore-flair-ner-ae/pom.xml b/jcore-flair-ner-ae/pom.xml
index f608f17a3..7c2dd185c 100644
--- a/jcore-flair-ner-ae/pom.xml
+++ b/jcore-flair-ner-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
@@ -21,7 +21,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>java-stdio-ipc</artifactId>
-            <version>1.0.2</version>
+            <version>1.0.3</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
@@ -43,7 +43,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-annotation-adder-ae</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>ch.qos.logback</groupId>
diff --git a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml
index bccfd8ddc..f4ca36655 100644
--- a/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml
+++ b/jcore-flair-ner-ae/src/main/resources/de/julielab/jcore/ae/flairner/desc/jcore-flair-ner-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Flair Named Entity Recognizer</name>
         <description>This component starts a child process to a python interpreter and loads a Flair sequence tagging model. Sentences are taken from the CAS, sent to Flair for tagging and the results are written into the CAS. The annotation type to use can be configured. It must be a subtype of de.julielab.jcore.types.EntityMention. The tag of each entity is written to the specificType feature.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>AnnotationType</name>
diff --git a/jcore-flair-token-embedding-ae/component.meta b/jcore-flair-token-embedding-ae/component.meta
index cc7ef4681..da21368ce 100644
--- a/jcore-flair-token-embedding-ae/component.meta
+++ b/jcore-flair-token-embedding-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-flair-token-embedding-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Flair Token Embedding Annotator"
 }
diff --git a/jcore-flair-token-embedding-ae/pom.xml b/jcore-flair-token-embedding-ae/pom.xml
index f05e54f89..18e2e2a77 100644
--- a/jcore-flair-token-embedding-ae/pom.xml
+++ b/jcore-flair-token-embedding-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
@@ -30,7 +30,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>java-stdio-ipc</artifactId>
-            <version>1.0.1</version>
+            <version>1.0.3</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>
diff --git a/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml b/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml
index 3b342d593..343cbce3b 100644
--- a/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml
+++ b/jcore-flair-token-embedding-ae/src/main/resources/de/julielab/jcore/ae/fte/desc/jcore-flair-token-embedding-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Flair Token Embedding Annotator</name>
     <description>Adds the Flair compatible embedding vectors to the token annotations.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <configurationParameters>
       <configurationParameter>
         <name>EmbeddingPath</name>
diff --git a/jcore-flow-controllers/component.meta b/jcore-flow-controllers/component.meta
index d8f783bd8..bbae688c0 100644
--- a/jcore-flow-controllers/component.meta
+++ b/jcore-flow-controllers/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-flow-controllers",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Flow Controllers"
 }
diff --git a/jcore-flow-controllers/pom.xml b/jcore-flow-controllers/pom.xml
index fe3e3ff4e..1bd6ede1d 100644
--- a/jcore-flow-controllers/pom.xml
+++ b/jcore-flow-controllers/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
 
diff --git a/jcore-gnormplus-ae/component.meta b/jcore-gnormplus-ae/component.meta
index b44cae343..ae9063b04 100644
--- a/jcore-gnormplus-ae/component.meta
+++ b/jcore-gnormplus-ae/component.meta
@@ -31,7 +31,7 @@
     "maven-artifact": {
         "artifactId": "jcore-gnormplus-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe GNormPlus Annotator"
 }
diff --git a/jcore-gnormplus-ae/pom.xml b/jcore-gnormplus-ae/pom.xml
index 0056bb402..ab8d4d208 100644
--- a/jcore-gnormplus-ae/pom.xml
+++ b/jcore-gnormplus-ae/pom.xml
@@ -10,41 +10,41 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
-    <version>2.6.0-SNAPSHOT</version>
+    
 
     <dependencies>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-gnp-bioc-writer</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-gnp-bioc-reader</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xmi-db-reader</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-db-reader</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-pmc-db-reader</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-gnormplus</artifactId>
-            <version>1.0.0-SNAPSHOT</version>
+            <version>1.0.0</version>
         </dependency>
         <dependency>
             <groupId>ch.qos.logback</groupId>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml
index 9cd5e46a5..ae82b1037 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/ae/gnp/desc/jcore-gnormplus-ae.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <primitive>true</primitive>
@@ -6,7 +6,8 @@
   <analysisEngineMetaData>
     <name>JCoRe GNormPlus Annotator</name>
     <description>Wrapper for the JULIE Lab variant of the GNormPlus gene ID mapper.</description>
-    <vendor>JULIE Lab Jena, Germany</vendor>
+    <version>2.6.0</version>
+        <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
         <name>AddGenes</name>
@@ -60,20 +61,20 @@
     </configurationParameterSettings>
     <typeSystemDescription>
       <imports>
-        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-        <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+        <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
       </imports>
     </typeSystemDescription>
-    <fsIndexCollection/>
+    <fsIndexCollection />
     <capabilities>
       <capability>
-        <inputs/>
+        <inputs />
         <outputs>
           <type>de.julielab.jcore.types.ConceptMention</type>
           <type>de.julielab.jcore.types.Organism</type>
         </outputs>
-        <languagesSupported/>
+        <languagesSupported />
       </capability>
     </capabilities>
     <operationalProperties>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
index 174371c8b..ba7fc2a1c 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-bioc-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe GNormPlus BioC Multiplier</name>
         <description>A CAS multiplier to be used with the GNormPlus BioC Format multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -74,20 +75,20 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
+                <inputs />
                 <outputs>
                     <type>de.julielab.jcore.types.ConceptMention</type>
                     <type>de.julielab.jcore.types.Organism</type>
                 </outputs>
-                <languagesSupported/>
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
index a5639b8a7..29212813a 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-pmc-db-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe GNormPlus PMC Database Multiplier</name>
         <description>A CAS multiplier to be used with the DB PMC multiplier reader in place of the DB PMC multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -118,19 +119,19 @@
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
                 <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-casflow-types"/>
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-casflow-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
+                <inputs />
                 <outputs>
                     <type>de.julielab.jcore.types.ConceptMention</type>
                     <type>de.julielab.jcore.types.Organism</type>
                 </outputs>
-                <languagesSupported/>
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
index 9b9c310f0..cc3c750ea 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xmi-db-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe GNormPlus XMI Database Multiplier</name>
         <description>A CAS multiplier to be used with the DB XMI multiplier reader. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -80,22 +81,22 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-casflow-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-casflow-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
+                <inputs />
                 <outputs>
                     <type>de.julielab.jcore.types.ConceptMention</type>
                     <type>de.julielab.jcore.types.Organism</type>
                 </outputs>
-                <languagesSupported/>
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
index 36c71c403..f300ca78c 100644
--- a/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
+++ b/jcore-gnormplus-ae/src/main/resources/de/julielab/jcore/multiplier/gnp/desc/jcore-gnormplus-xml-db-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe GNormPlus XML Database Multiplier</name>
         <description>A CAS multiplier to be used with the DB XML multiplier reader in place of the DB XML multiplier. It wraps the JULIE Lab variant of the GNormPlus gene ID mapper. It is a multiplier because this enables batch-processing of documents with GNormPlus which makes the processing more efficient.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -123,22 +124,22 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types"/>
-                <import name="de.julielab.jcore.types.jcore-casflow-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
+                <import name="de.julielab.jcore.types.jcore-casflow-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
+                <inputs />
                 <outputs>
                     <type>de.julielab.jcore.types.ConceptMention</type>
                     <type>de.julielab.jcore.types.Organism</type>
                 </outputs>
-                <languagesSupported/>
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-gnp-bioc-reader/component.meta b/jcore-gnp-bioc-reader/component.meta
index 91006a646..de865415a 100644
--- a/jcore-gnp-bioc-reader/component.meta
+++ b/jcore-gnp-bioc-reader/component.meta
@@ -1,7 +1,7 @@
 {
     "categories": [
-        "reader",
-        "multiplier"
+        "multiplier",
+        "reader"
     ],
     "description": "A reader for the BioC format used by GNormPlus. Reads the text and the annotations, both species and genes.",
     "descriptors": [
@@ -19,7 +19,7 @@
     "maven-artifact": {
         "artifactId": "jcore-gnp-bioc-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe GNormPlus BioC Reader"
 }
diff --git a/jcore-gnp-bioc-reader/pom.xml b/jcore-gnp-bioc-reader/pom.xml
index ccbfad282..01593d1c7 100644
--- a/jcore-gnp-bioc-reader/pom.xml
+++ b/jcore-gnp-bioc-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
@@ -54,7 +54,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>costosys</artifactId>
-            <version>[1.6.0, )</version>
+            <version>1.5.2</version>
         </dependency>
     </dependencies>
     <name>JCoRe GNormPlus BioC Reader</name>
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
index 7081ae596..947a31d8b 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier-reader.xml
@@ -1,10 +1,11 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <implementationName>de.julielab.jcore.reader.GNormPlusFormatMultiplierReader</implementationName>
     <processingResourceMetaData>
         <name>JCoRe GNormPlus Format Multiplier Reader</name>
         <description>A reader for the BioC XML format used by GNormPlus. Requires the matching multiplier.</description>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>InputPath</name>
@@ -44,11 +45,11 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
diff --git a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
index a742ca577..bbbf18e18 100644
--- a/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
+++ b/jcore-gnp-bioc-reader/src/main/resources/de/julielab/jcore/reader/desc/jcore-bnp-bioc-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe GNormPlus BioC Format Multiplier</name>
         <description>Multiplier for GNormPlusFormatMultiplierReader. Takes URIs pointing to BioC collection files that contain annotations created by GNormPlus. For each such file, reads all documents and returns CASes for them until all documents in all collections have been read into a CAS.</description>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CostosysConfigFile</name>
@@ -22,25 +23,25 @@
                 <mandatory>false</mandatory>
             </configurationParameter>
         </configurationParameters>
-        <configurationParameterSettings/>
+        <configurationParameterSettings />
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-xmi-splitter-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-xmi-splitter-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
+                <inputs />
                 <outputs>
                     <type>de.julielab.jcore.types.Gene</type>
                     <type>de.julielab.jcore.types.Organism</type>
                 </outputs>
-                <languagesSupported/>
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-gnp-bioc-writer/component.meta b/jcore-gnp-bioc-writer/component.meta
index 78c499835..f8b942bf4 100644
--- a/jcore-gnp-bioc-writer/component.meta
+++ b/jcore-gnp-bioc-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-gnp-bioc-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe GNormPlus BioC Writer"
 }
diff --git a/jcore-gnp-bioc-writer/pom.xml b/jcore-gnp-bioc-writer/pom.xml
index 93aa158ea..12e5354c1 100644
--- a/jcore-gnp-bioc-writer/pom.xml
+++ b/jcore-gnp-bioc-writer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
index 54b6a2046..06a19dd75 100644
--- a/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
+++ b/jcore-gnp-bioc-writer/src/main/resources/de/julielab/jcore/consumer/gnp/desc/jcore-gnp-bioc-writer.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe GNormPlus BioC Writer</name>
         <description>Writes CAS documents into the BioC XML format used by the gene tagger and normalizer GNormPlus.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -60,17 +61,17 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
-                <inputs/>
-                <outputs/>
-                <languagesSupported/>
+                <inputs />
+                <outputs />
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-iexml-consumer/component.meta b/jcore-iexml-consumer/component.meta
index 0ec142ad7..6156d38c4 100644
--- a/jcore-iexml-consumer/component.meta
+++ b/jcore-iexml-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-iexml-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe IEXML Consumer"
 }
diff --git a/jcore-iexml-consumer/pom.xml b/jcore-iexml-consumer/pom.xml
index 5d7a199a2..5337141d4 100644
--- a/jcore-iexml-consumer/pom.xml
+++ b/jcore-iexml-consumer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <description>Generates stand-off IEXML files as used in the Mantra challenge.</description>
 
@@ -74,7 +74,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-mantra-xml-types</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>
diff --git a/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml b/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml
index 98c581be2..7c2ac53ad 100644
--- a/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml
+++ b/jcore-iexml-consumer/src/main/resources/de/julielab/jcore/consumer/iexml/desc/jcore-iexml-consumer.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe IEXML Consumer</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-iexml-reader/component.meta b/jcore-iexml-reader/component.meta
index 15d5600c1..5054c474a 100644
--- a/jcore-iexml-reader/component.meta
+++ b/jcore-iexml-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-iexml-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe IEXML Reader"
 }
diff --git a/jcore-iexml-reader/pom.xml b/jcore-iexml-reader/pom.xml
index 3d1e90378..20516c98e 100644
--- a/jcore-iexml-reader/pom.xml
+++ b/jcore-iexml-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <build>
@@ -75,7 +75,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-mantra-xml-types</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>org.junit.jupiter</groupId>
diff --git a/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml b/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml
index 933482a5a..f26160c4b 100644
--- a/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml
+++ b/jcore-iexml-reader/src/main/resources/de/julielab/jcore/reader/iexml/desc/jcore-iexml-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe IEXML Reader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-ign-reader/component.meta b/jcore-ign-reader/component.meta
index 798abe608..82c8dd0a4 100644
--- a/jcore-ign-reader/component.meta
+++ b/jcore-ign-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ign-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe IGN Reader"
 }
diff --git a/jcore-ign-reader/pom.xml b/jcore-ign-reader/pom.xml
index 8bd754eaa..96be0ee0d 100644
--- a/jcore-ign-reader/pom.xml
+++ b/jcore-ign-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-ign-reader</artifactId>
 
diff --git a/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml b/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml
index 3205766bd..e0cf416e5 100644
--- a/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml
+++ b/jcore-ign-reader/src/main/resources/de/julielab/jcore/reader/ign/desc/jcore-ign-reader.xml
@@ -7,7 +7,7 @@
     <description>The IGNReader reads IGN corpus files in BioC-format.
 
 There are XML files comprising the actual text (as well as passage and sentence annotations) and there are separate XML files comprising the annotations.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-iob-consumer/component.meta b/jcore-iob-consumer/component.meta
index 9e0e62410..e8ae24666 100644
--- a/jcore-iob-consumer/component.meta
+++ b/jcore-iob-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-iob-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe CAS to IOB Consumer"
 }
diff --git a/jcore-iob-consumer/pom.xml b/jcore-iob-consumer/pom.xml
index e4751ee20..74fcec1f2 100644
--- a/jcore-iob-consumer/pom.xml
+++ b/jcore-iob-consumer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml b/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
index aa07c6d66..3f8e58712 100644
--- a/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
+++ b/jcore-iob-consumer/src/main/resources/de/julielab/jcore/consumer/cas2iob/desc/jcore-iob-consumer.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe IOB Writer</name>
         <description>This component help to write CAS entity or chunk annotations into a text file in IOB format.</description>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>outFolder</name>
@@ -92,8 +93,8 @@
                 <import name="de.julielab.jcore.types.jcore-document-structure-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml
index deb5a9318..aef1816a7 100644
--- a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml
+++ b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/desc/ToIOBConsumerTest.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>ToIOBConsumerTest</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml
index 0a7a01cf3..05c84c7b1 100644
--- a/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml
+++ b/jcore-iob-consumer/src/test/resources/de/julielab/jcore/consumer/cas2iob/types/TestTypeSystem.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>TestTypeSystem</name>
 <description>including julie morpho-syntax and semantics</description>
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <imports>
 <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
diff --git a/jcore-jedis-integration-tests/pom.xml b/jcore-jedis-integration-tests/pom.xml
index 2bcc39022..79aa622d6 100644
--- a/jcore-jedis-integration-tests/pom.xml
+++ b/jcore-jedis-integration-tests/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <modelVersion>4.0.0</modelVersion>
@@ -16,22 +16,22 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-db-reader</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xmi-db-writer</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-checkpoint-ae</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-flow-controllers</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml b/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml
index c44952183..3d2f6c9fd 100644
--- a/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml
+++ b/jcore-jemas-ae/src/main/resources/de/julielab/jcore/ae/jemas/desc/jcore-jemas-ae.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe JEmAS</name>
     <description>A UIMA-based implementation of the core functionality of JEmAS, the Jena Emotion Analysis System.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jnet-ae/component.meta b/jcore-jnet-ae/component.meta
index 74ba27806..b39b004c5 100644
--- a/jcore-jnet-ae/component.meta
+++ b/jcore-jnet-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jnet-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe JNET AE"
 }
diff --git a/jcore-jnet-ae/pom.xml b/jcore-jnet-ae/pom.xml
index a1e73f5fd..d67df3ba0 100644
--- a/jcore-jnet-ae/pom.xml
+++ b/jcore-jnet-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <build>
diff --git a/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml b/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml
index 8f602da33..f4b666e6d 100644
--- a/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml
+++ b/jcore-jnet-ae/src/main/resources/de/julielab/jcore/ae/jnet/desc/jcore-jnet-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe JNET AE</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml
index 34cfdc1e9..b07631439 100644
--- a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml
+++ b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/EntityAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>EntityTaggerAnnotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml
index a71ebef34..6bfe94e8e 100644
--- a/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml
+++ b/jcore-jnet-ae/src/test/resources/de/julielab/jcore/ae/jnet/uima/tsDescriptor.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>aceComplete</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <imports>
         <import name="de.julielab.jcore.types.jcore-basic-types" />
diff --git a/jcore-jpos-ae/component.meta b/jcore-jpos-ae/component.meta
index eb0b7ae53..6cacfad71 100644
--- a/jcore-jpos-ae/component.meta
+++ b/jcore-jpos-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jpos-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe JPOS AE"
 }
diff --git a/jcore-jpos-ae/pom.xml b/jcore-jpos-ae/pom.xml
index 4f195e62d..04e41a7e3 100644
--- a/jcore-jpos-ae/pom.xml
+++ b/jcore-jpos-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <build>
diff --git a/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml b/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml
index 37870472c..e8777ae38 100644
--- a/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml
+++ b/jcore-jpos-ae/src/main/resources/de/julielab/jcore/ae/jpos/desc/jcore-jpos.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe JPOS AE</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
    <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml b/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml
index 5a179961d..3c7f1b099 100644
--- a/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml
+++ b/jcore-jpos-ae/src/test/resources/POSTagAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JPOSAnnotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab</vendor>
    <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jsbd-ae/component.meta b/jcore-jsbd-ae/component.meta
index 5da0bb330..5ab9a4df2 100644
--- a/jcore-jsbd-ae/component.meta
+++ b/jcore-jsbd-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jsbd-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Sentence Annotator"
 }
diff --git a/jcore-jsbd-ae/pom.xml b/jcore-jsbd-ae/pom.xml
index c23dc7e7c..b0b6524c2 100644
--- a/jcore-jsbd-ae/pom.xml
+++ b/jcore-jsbd-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <build>
diff --git a/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml b/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml
index 409bda28e..b1293df62 100644
--- a/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml
+++ b/jcore-jsbd-ae/src/main/resources/de/julielab/jcore/ae/jsbd/desc/jcore-jsbd-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>de.julielab.jcore.ae.jsbd.main.SentenceAnnotator</name>
         <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.ae.jsbd.main</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml
index 1e1aaa26e..0bcda6a91 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Sentence Annotator</name>
     <description>This is the UIMA Wrapper for the JULIE Sentence Boundary Detector.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
index 835faf684..41089e381 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/SentenceAnnotator_with-scope_Test.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Sentence Annotator</name>
     <description>This is the UIMA Wrapper for the JULIE Sentence Boundary Detector.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml
index cd826ac73..8b95a7994 100644
--- a/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml
+++ b/jcore-jsbd-ae/src/test/resources/de/julielab/jcore/ae/jsbd/desc/paragraph-scope-type.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>test-entity-type.xml</name>
   <description>A mini type system with one type only, used for testing consistency preservation</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import name="de.julielab.jcore.types.jcore-basic-types" />
diff --git a/jcore-jtbd-ae/component.meta b/jcore-jtbd-ae/component.meta
index aa682f5da..0cd1c8929 100644
--- a/jcore-jtbd-ae/component.meta
+++ b/jcore-jtbd-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-jtbd-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Token Annotator"
 }
diff --git a/jcore-jtbd-ae/pom.xml b/jcore-jtbd-ae/pom.xml
index d4a7430a4..e811fa22f 100644
--- a/jcore-jtbd-ae/pom.xml
+++ b/jcore-jtbd-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <build>
@@ -89,7 +89,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-utilities</artifactId>
-            <version>${project.parent.version}</version>
+            <version>${jcore-utilities-version}</version>
         </dependency>
         <dependency>
             <groupId>cc.mallet</groupId>
diff --git a/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml b/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml
index a207b07d1..3e8e5a5e0 100644
--- a/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml
+++ b/jcore-jtbd-ae/src/main/resources/de/julielab/jcore/ae/jtbd/desc/jcore-jtbd.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Token Annotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml b/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml
index 083790957..415da5d4c 100644
--- a/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml
+++ b/jcore-jtbd-ae/src/test/resources/de/julielab/jcore/ae/jtbd/desc/TokenAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Token Annotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-julielab-entity-evaluator-consumer/component.meta b/jcore-julielab-entity-evaluator-consumer/component.meta
index dc65ea34a..78d9a4f68 100644
--- a/jcore-julielab-entity-evaluator-consumer/component.meta
+++ b/jcore-julielab-entity-evaluator-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-julielab-entity-evaluator-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe JULIE Lab Entity Evaluator Converter"
 }
diff --git a/jcore-julielab-entity-evaluator-consumer/pom.xml b/jcore-julielab-entity-evaluator-consumer/pom.xml
index e0e543814..4b5547be5 100644
--- a/jcore-julielab-entity-evaluator-consumer/pom.xml
+++ b/jcore-julielab-entity-evaluator-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-julielab-entity-evaluator-consumer</artifactId>
     <name>JCoRe JULIE Lab Entity Evaluator Converter</name>
@@ -18,7 +18,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-entity-evaluator</artifactId>
-            <version>1.2.0</version>
+            <version>1.3.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml b/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
index 6e95e4205..f46b9c244 100644
--- a/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
+++ b/jcore-julielab-entity-evaluator-consumer/src/main/resources/de/julielab/jcore/consumer/entityevaluator/desc/jcore-julielab-entity-evaluator-consumer.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Entity Evaluator and TSV Consumer</name>
         <description>This component was originally created to output the tab separated format used the JULIE Entity Evaluator. However, this component can be used to create a TSV file from any annotation or annotation set. The component allows to define columns by specifying the annotation type to draw feature values from and a feature path that specifies the location of the desired feature. All feature paths will be applied to each configured annotation, returning null values if an annotation does not exhibit a value for a column's feature path.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -128,9 +129,9 @@
                 </value>
             </nameValuePair>
         </configurationParameterSettings>
-        <typeSystemDescription/>
-        <fsIndexCollection/>
-        <capabilities/>
+        <typeSystemDescription />
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-likelihood-assignment-ae/component.meta b/jcore-likelihood-assignment-ae/component.meta
index 1055a51ab..f73f0297a 100644
--- a/jcore-likelihood-assignment-ae/component.meta
+++ b/jcore-likelihood-assignment-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-likelihood-assignment-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Likelihood Assignment AE"
 }
diff --git a/jcore-likelihood-assignment-ae/pom.xml b/jcore-likelihood-assignment-ae/pom.xml
index 7876117d8..d28f1775b 100644
--- a/jcore-likelihood-assignment-ae/pom.xml
+++ b/jcore-likelihood-assignment-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
index b768176d2..2db5339a6 100644
--- a/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
+++ b/jcore-likelihood-assignment-ae/src/main/resources/de/julielab/jcore/ae/likelihoodassignment/desc/jcore-likelihood-assignment-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Likelihood Assignment AE</name>
         <description>Analysis Engine to assign likelihood indicators to their corresponding entities and events.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>AssignmentStrategy</name>
diff --git a/jcore-likelihood-detection-ae/component.meta b/jcore-likelihood-detection-ae/component.meta
index 3f80906be..068a3ab10 100644
--- a/jcore-likelihood-detection-ae/component.meta
+++ b/jcore-likelihood-detection-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-likelihood-detection-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Likelihood Detection AE"
 }
diff --git a/jcore-likelihood-detection-ae/pom.xml b/jcore-likelihood-detection-ae/pom.xml
index 1bee1538d..0fc7e7fff 100644
--- a/jcore-likelihood-detection-ae/pom.xml
+++ b/jcore-likelihood-detection-ae/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml b/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml
index bbd5b55bf..9e3a492f4 100644
--- a/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml
+++ b/jcore-likelihood-detection-ae/src/main/resources/de/julielab/jcore/ae/likelihooddetection/desc/jcore-likelihood-detection-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Likelihood Detection AE</name>
         <description>Analysis Engine to detect epistemic modal expressions and assign the appropriate likelihood category.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>LikelihoodDict</name>
diff --git a/jcore-line-multiplier/component.meta b/jcore-line-multiplier/component.meta
index 864a9954c..38394f9cd 100644
--- a/jcore-line-multiplier/component.meta
+++ b/jcore-line-multiplier/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-line-multiplier",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Line Multiplier"
 }
diff --git a/jcore-line-multiplier/pom.xml b/jcore-line-multiplier/pom.xml
index 2bd30a4d4..650c68038 100644
--- a/jcore-line-multiplier/pom.xml
+++ b/jcore-line-multiplier/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml b/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
index 524ca369e..f58d9d2ed 100644
--- a/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
+++ b/jcore-line-multiplier/src/main/resources/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Line Multiplier</name>
     <description>Splits incoming CAS document texts on line breaks and returns one CAS for each non-blank line.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <configurationParameters>
       <configurationParameter>
         <name>NumberLinesPerCAS</name>
diff --git a/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml b/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
index 524ca369e..f58d9d2ed 100644
--- a/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
+++ b/jcore-line-multiplier/target/classes/de/julielab/jcore/multiplier/line/desc/jcore-line-multiplier-ae.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Line Multiplier</name>
     <description>Splits incoming CAS document texts on line breaks and returns one CAS for each non-blank line.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <configurationParameters>
       <configurationParameter>
         <name>NumberLinesPerCAS</name>
diff --git a/jcore-lingpipe-porterstemmer-ae/component.meta b/jcore-lingpipe-porterstemmer-ae/component.meta
index af7dce999..843a38e95 100644
--- a/jcore-lingpipe-porterstemmer-ae/component.meta
+++ b/jcore-lingpipe-porterstemmer-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-lingpipe-porterstemmer-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Lingpipe Porter Stemmer AE"
 }
diff --git a/jcore-lingpipe-porterstemmer-ae/pom.xml b/jcore-lingpipe-porterstemmer-ae/pom.xml
index 615c960a0..6cd1f56ca 100644
--- a/jcore-lingpipe-porterstemmer-ae/pom.xml
+++ b/jcore-lingpipe-porterstemmer-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-lingpipe-porterstemmer-ae</artifactId>
     <name>JCoRe Lingpipe Porter Stemmer AE</name>
diff --git a/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml b/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml
index d24a10c0d..c432b936e 100644
--- a/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml
+++ b/jcore-lingpipe-porterstemmer-ae/src/main/resources/de/julielab/jcore/ae/lingpipe/porterstemmer/desc/jcore-lingpipe-porterstemmer-ae.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Lingpipe Porterstemmer AE</name>
     <description>Adds a StemmedForm to each token in the CAS. The offsets and the value feature of each StemmedForm are set to the stem as returned by the Porter stemmer algorithm as implemented by Lingpipe.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab, Germany</vendor>
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-lingpipegazetteer-ae/component.meta b/jcore-lingpipegazetteer-ae/component.meta
index 4ba7d7658..6b1d1c0bf 100644
--- a/jcore-lingpipegazetteer-ae/component.meta
+++ b/jcore-lingpipegazetteer-ae/component.meta
@@ -18,7 +18,7 @@
     "maven-artifact": {
         "artifactId": "jcore-lingpipe-gazetteer-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Lingpipe Gazetteer AE"
 }
diff --git a/jcore-lingpipegazetteer-ae/pom.xml b/jcore-lingpipegazetteer-ae/pom.xml
index 3941b37bd..3046249fb 100644
--- a/jcore-lingpipegazetteer-ae/pom.xml
+++ b/jcore-lingpipegazetteer-ae/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
 
diff --git a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
index c070abd9e..6c8aad79c 100644
--- a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
+++ b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae-configurable-resource.xml
@@ -16,7 +16,7 @@
             embedded into the descriptor. The current parameter settings will work but may be changed. Refer to
             https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipegazetteer-ae for more information.
         </description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>julielab</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -51,7 +51,7 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
             </imports>
         </typeSystemDescription>
         <typePriorities />
diff --git a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml
index e448c764c..4882f43cb 100644
--- a/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml
+++ b/jcore-lingpipegazetteer-ae/src/main/resources/de/julielab/jcore/ae/lingpipegazetteer/desc/jcore-lingpipe-gazetteer-ae.xml
@@ -14,7 +14,7 @@
             and some parameter settings for dictionary processing and tagging. Refer to
             https://github.com/JULIELab/jcore-base/tree/master/jcore-lingpipegazetteer-ae for more information.
         </description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>julielab</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml b/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml
index 9e4cc5a3d..3f778c495 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/ApproxGazetteerAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>GazetteerAnnotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml b/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml
index 2c6e0779a..c1d7f4f90 100644
--- a/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml
+++ b/jcore-lingpipegazetteer-ae/src/test/resources/ExactGazetteerAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>GazetteerAnnotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-lingscope-ae/component.meta b/jcore-lingscope-ae/component.meta
index 3a73c19a3..c8ad54900 100644
--- a/jcore-lingscope-ae/component.meta
+++ b/jcore-lingscope-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-lingscope-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Lingscope AE"
 }
diff --git a/jcore-lingscope-ae/pom.xml b/jcore-lingscope-ae/pom.xml
index 60cdb8dd6..e73d0386b 100644
--- a/jcore-lingscope-ae/pom.xml
+++ b/jcore-lingscope-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml b/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml
index dea73edd6..8442297cb 100644
--- a/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml
+++ b/jcore-lingscope-ae/src/main/resources/de/julielab/jcore/ae/lingscope/desc/jcore-lingscope-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Lingscope AE</name>
         <description>This component uses the Lingscope negation/hedge detection algorithm and models to annotate negation/hedge cues and the scope to which the cues apply.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>CueModel</name>
diff --git a/jcore-linnaeus-species-ae/component.meta b/jcore-linnaeus-species-ae/component.meta
index 8bc1674bb..5484fb351 100644
--- a/jcore-linnaeus-species-ae/component.meta
+++ b/jcore-linnaeus-species-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-linnaeus-species-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Linnaeus Species Tagger"
 }
diff --git a/jcore-linnaeus-species-ae/pom.xml b/jcore-linnaeus-species-ae/pom.xml
index 354a3c751..78e432a3b 100644
--- a/jcore-linnaeus-species-ae/pom.xml
+++ b/jcore-linnaeus-species-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-linnaeus-species-ae</artifactId>
     <name>JCoRe Linnaeus Species Tagger</name>
diff --git a/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml b/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml
index d3ab9d56b..4668483c6 100644
--- a/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml
+++ b/jcore-linnaeus-species-ae/src/main/resources/de/julielab/jcore/ae/linnaeus/desc/jcore-linnaeus-ae.xml
@@ -5,7 +5,7 @@
     <annotatorImplementationName>de.julielab.jcore.ae.linnaeus.LinnaeusSpeciesAnnotator</annotatorImplementationName>
     <analysisEngineMetaData>
         <name>JCore LINNAEUS Species AE</name>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters />
         <configurationParameterSettings />
         <typeSystemDescription>
diff --git a/jcore-mantra-xml-types/pom.xml b/jcore-mantra-xml-types/pom.xml
index ea6b45d42..d44972ddf 100644
--- a/jcore-mantra-xml-types/pom.xml
+++ b/jcore-mantra-xml-types/pom.xml
@@ -6,7 +6,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <name>JCoRe Mantra XML Types</name>
     <url>https://github.com/JULIELab/jcore-base/tree/master/jcore-mantra-xml-types</url>
diff --git a/jcore-medxn-ae/component.meta b/jcore-medxn-ae/component.meta
index c1c026762..1abfe0206 100644
--- a/jcore-medxn-ae/component.meta
+++ b/jcore-medxn-ae/component.meta
@@ -22,7 +22,7 @@
     "maven-artifact": {
         "artifactId": "jcore-medxn-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe MedXN"
 }
diff --git a/jcore-medxn-ae/pom.xml b/jcore-medxn-ae/pom.xml
index 0eaff3697..28ff0577c 100644
--- a/jcore-medxn-ae/pom.xml
+++ b/jcore-medxn-ae/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-medxn-ae</artifactId>
     <name>JCoRe MedXN</name>
diff --git a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml
index be6bb7375..c96350f68 100644
--- a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml
+++ b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/MedNormAE.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.medxn.ae.desc.MedNormAE</name>
     <description>make a normalized medication description based on  RxNorm standard </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml
index 199f1607d..54ca2cf90 100644
--- a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml
+++ b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-attributes-german.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.medxn.ae.desc.MedAttrAE</name>
     <description>medication attribute tagger using regEx</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters searchStrategy="language_fallback" />
     <configurationParameterSettings />
diff --git a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml
index 25468e126..282491298 100644
--- a/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml
+++ b/jcore-medxn-ae/src/main/resources/de/julielab/jcore/ae/medxn/desc/jcore-medxn-ae-extractor-german.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>de.julielab.jcore.medxn.ae.desc.jcore-medxn-ae-extractor-german</name>
     <description>Associate medication and the corresponding attributes</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-mmax2-reader/component.meta b/jcore-mmax2-reader/component.meta
index d0e5293fb..e1e8462db 100644
--- a/jcore-mmax2-reader/component.meta
+++ b/jcore-mmax2-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-mmax2-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
-    "name": "JCoRe MMAX2 reader"
+    "name": "JCoRe MMAX2 reader."
 }
diff --git a/jcore-mmax2-reader/pom.xml b/jcore-mmax2-reader/pom.xml
index 39f6d714e..30e662ad4 100644
--- a/jcore-mmax2-reader/pom.xml
+++ b/jcore-mmax2-reader/pom.xml
@@ -10,10 +10,10 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
-    <version>2.6.0-SNAPSHOT</version>
+    
 
     <dependencies>
         <dependency>
@@ -37,7 +37,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-mmax-to-iob-iexml-converter</artifactId>
-            <version>1.0.2-SNAPSHOT</version>
+            <version></version>
         </dependency>
         <dependency>
             <groupId>org.apache.commons</groupId>
diff --git a/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml b/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
index 6d5978b54..b25622530 100644
--- a/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
+++ b/jcore-mmax2-reader/src/main/resources/de/julielab/jcore/cr/mmax2/desc/jcore-mmax2-reader.xml
@@ -1,11 +1,12 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <implementationName>de.julielab.jcore.cr.mmax2.MMAX2Reader</implementationName>
   <processingResourceMetaData>
     <name>JCoRe MMAX2 reader</name>
     <description>Collection reader for MMAX2 annotation projects.</description>
-    <vendor>JULIE Lab Jena, Germany</vendor>
+    <version>2.6.0</version>
+        <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
         <name>RemoveOverlappingShorterAnnotations</name>
@@ -53,12 +54,12 @@
     </configurationParameterSettings>
     <typeSystemDescription>
       <imports>
-        <import name="de.julielab.jcore.types.jcore-morpho-syntax-types"/>
-        <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+        <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
+        <import name="de.julielab.jcore.types.jcore-document-meta-types" />
       </imports>
     </typeSystemDescription>
-    <fsIndexCollection/>
-    <capabilities/>
+    <fsIndexCollection />
+    <capabilities />
     <operationalProperties>
       <modifiesCas>true</modifiesCas>
       <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
diff --git a/jcore-msdoc-reader/component.meta b/jcore-msdoc-reader/component.meta
index eac523555..33c76c42e 100644
--- a/jcore-msdoc-reader/component.meta
+++ b/jcore-msdoc-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-msdoc-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe MSdoc Reader"
 }
diff --git a/jcore-msdoc-reader/pom.xml b/jcore-msdoc-reader/pom.xml
index cf462d0cd..abed82145 100644
--- a/jcore-msdoc-reader/pom.xml
+++ b/jcore-msdoc-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-msdoc-reader</artifactId>
     <name>JCoRe MSdoc Reader</name>
diff --git a/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml b/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml
index 146d1f488..992334132 100644
--- a/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml
+++ b/jcore-msdoc-reader/src/main/resources/de/julielab/jcore/reader/msdoc/desc/jcore-msdoc-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe MSdoc Reader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIELab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-mstparser-ae/component.meta b/jcore-mstparser-ae/component.meta
index ba2e43335..906bf56e4 100644
--- a/jcore-mstparser-ae/component.meta
+++ b/jcore-mstparser-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-mstparser-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe MST Parser AE"
 }
diff --git a/jcore-mstparser-ae/pom.xml b/jcore-mstparser-ae/pom.xml
index 08d948e99..eec1d63d5 100644
--- a/jcore-mstparser-ae/pom.xml
+++ b/jcore-mstparser-ae/pom.xml
@@ -54,7 +54,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>..</relativePath>
     </parent>
     <dependencies>
diff --git a/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml b/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml
index 36ef089e1..30da736ac 100644
--- a/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml
+++ b/jcore-mstparser-ae/src/main/resources/de/julielab/jcore/ae/mstparser/desc/jcore-mstparser.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe MST Parser Annotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml b/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml
index 9442a4955..9f66c4074 100644
--- a/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml
+++ b/jcore-mstparser-ae/src/test/resources/de/julielab/jcore/ae/mstparser/desc/MSTParserDescriptorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe MST Parser Annotator</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-muc7-reader/component.meta b/jcore-muc7-reader/component.meta
index 7e16b6b2c..ae898f70c 100644
--- a/jcore-muc7-reader/component.meta
+++ b/jcore-muc7-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-muc7-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe MUC7 Reader"
 }
diff --git a/jcore-muc7-reader/pom.xml b/jcore-muc7-reader/pom.xml
index b06e1cbbf..9fbc80750 100644
--- a/jcore-muc7-reader/pom.xml
+++ b/jcore-muc7-reader/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml b/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
index be43fa1c1..deecc0e59 100644
--- a/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
+++ b/jcore-muc7-reader/src/main/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe MUC7 Reader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml b/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
index e089a5ab2..d3359b86a 100644
--- a/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
+++ b/jcore-muc7-reader/src/test/resources/de/julielab/jcore/reader/muc7/desc/jcore-muc7-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>JCoRe MUC7 Reader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-mutationfinder-ae/component.meta b/jcore-mutationfinder-ae/component.meta
index 6a13f809a..a72f76a2c 100644
--- a/jcore-mutationfinder-ae/component.meta
+++ b/jcore-mutationfinder-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-mutationfinder-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Mutation Finder AE"
 }
diff --git a/jcore-mutationfinder-ae/pom.xml b/jcore-mutationfinder-ae/pom.xml
index b6d707627..bf2fc7bd4 100644
--- a/jcore-mutationfinder-ae/pom.xml
+++ b/jcore-mutationfinder-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <modelVersion>4.0.0</modelVersion>
     <name>JCoRe Mutation Finder AE</name>
diff --git a/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml b/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml
index 4bde7de35..7cf388c99 100644
--- a/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml
+++ b/jcore-mutationfinder-ae/src/main/resources/de/julielab/jcore/ae/mutationfinder/desc/jcore-mutationfinder-ae.xml
@@ -7,7 +7,7 @@
         <name>JCoRe Mutation Annotator</name>
         <description>An analysis engine to recognize mentions of gene point mutations in document text. This is a wrapper around the original MutationFinder (http://mutationfinder.sourceforge.net/), published in the following paper: MutationFinder: A high-performance system for extracting point mutation mentions from text
 J. Gregory Caporaso, William A. Baumgartner Jr., David A. Randolph, K. Bretonnel Cohen, and Lawrence Hunter; Bioinformatics, 2007 23(14):1862-1865; doi:10.1093/bioinformatics/btm235;</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab, Germany</vendor>
         <configurationParameters />
         <configurationParameterSettings />
diff --git a/jcore-neo4j-relations-consumer/component.meta b/jcore-neo4j-relations-consumer/component.meta
index e988fe410..43cb60101 100644
--- a/jcore-neo4j-relations-consumer/component.meta
+++ b/jcore-neo4j-relations-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-neo4j-relations-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Neo4j Relations Consumer"
 }
diff --git a/jcore-neo4j-relations-consumer/pom.xml b/jcore-neo4j-relations-consumer/pom.xml
index 8dbc00784..670b0449c 100644
--- a/jcore-neo4j-relations-consumer/pom.xml
+++ b/jcore-neo4j-relations-consumer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
 
@@ -37,7 +37,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-checkpoint-ae</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>org.neo4j.test</groupId>
@@ -48,12 +48,12 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-neo4j-plugins-concepts-representation</artifactId>
-            <version>3.1.0-SNAPSHOT</version>
+            <version>3.0.1</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-neo4j-plugins-concepts</artifactId>
-            <version>3.1.0-SNAPSHOT</version>
+            <version>3.0.1</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
index bebaa2135..4e1449c27 100644
--- a/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
+++ b/jcore-neo4j-relations-consumer/src/main/resources/de/julielab/jcore/consumer/neo4jrelations/desc/jcore-neo4j-relations-consumer.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Neo4j Relations Consumer</name>
         <description>This component assumes that a Neo4j server with an installed julieliab-neo4j-plugins-concepts plugin installed. It then sends FlattenedRelation instances with more then one arguments to Neo4j. Note that this requires the event arguments to have a ResourceEntry list to obtain database concept IDs from.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab, Germany</vendor>
         <copyright>JULIE Lab</copyright>
         <configurationParameters>
@@ -63,20 +63,20 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types"/>
+                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
+        <fsIndexCollection />
         <capabilities>
             <capability>
                 <inputs>
                     <type>de.julielab.jcore.types.EventMention</type>
                 </inputs>
-                <outputs/>
-                <languagesSupported/>
+                <outputs />
+                <languagesSupported />
             </capability>
         </capabilities>
         <operationalProperties>
diff --git a/jcore-nlmgene-reader/component.meta b/jcore-nlmgene-reader/component.meta
index cca571781..57b636559 100644
--- a/jcore-nlmgene-reader/component.meta
+++ b/jcore-nlmgene-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-nlmgene-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe NLM-Gene Reader"
 }
diff --git a/jcore-nlmgene-reader/pom.xml b/jcore-nlmgene-reader/pom.xml
index ba1cf2294..ce98b1697 100644
--- a/jcore-nlmgene-reader/pom.xml
+++ b/jcore-nlmgene-reader/pom.xml
@@ -10,10 +10,10 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
-    <version>2.6.0-SNAPSHOT</version>
+    
 
     <dependencies>
         <dependency>
diff --git a/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
index 3f8940a5a..9bf087ce7 100644
--- a/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
+++ b/jcore-nlmgene-reader/src/main/resources/de/julielab/jcore/reader/nlmgene/desc/jcore-nlmgene-reader.xml
@@ -1,11 +1,12 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
   <implementationName>de.julielab.jcore.reader.nlmgene.NLMGeneReader</implementationName>
   <processingResourceMetaData>
     <name>JCoRe NLM-Gene Reader</name>
     <description>Collection reader for the BioC format of the NLM-Gene corpus.</description>
-    <vendor>JULIE Lab Jena, Germany</vendor>
+    <version>2.6.0</version>
+        <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
         <name>InputDirectory</name>
@@ -22,23 +23,23 @@
         <mandatory>false</mandatory>
       </configurationParameter>
     </configurationParameters>
-    <configurationParameterSettings/>
+    <configurationParameterSettings />
     <typeSystemDescription>
       <imports>
-        <import name="de.julielab.jcore.types.jcore-semantics-biology-types"/>
-        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types"/>
-        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+        <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
+        <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
+        <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
       </imports>
     </typeSystemDescription>
-    <fsIndexCollection/>
+    <fsIndexCollection />
     <capabilities>
       <capability>
-        <inputs/>
+        <inputs />
         <outputs>
           <type>de.julielab.jcore.types.Gene</type>
           <type>de.julielab.jcore.types.ResourceEntry</type>
         </outputs>
-        <languagesSupported/>
+        <languagesSupported />
       </capability>
     </capabilities>
     <operationalProperties>
diff --git a/jcore-opennlp-chunk-ae/component.meta b/jcore-opennlp-chunk-ae/component.meta
index 5254bb51d..472b579c9 100644
--- a/jcore-opennlp-chunk-ae/component.meta
+++ b/jcore-opennlp-chunk-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-chunk-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Open NLP Chunker"
 }
diff --git a/jcore-opennlp-chunk-ae/pom.xml b/jcore-opennlp-chunk-ae/pom.xml
index c89174100..6ef500507 100644
--- a/jcore-opennlp-chunk-ae/pom.xml
+++ b/jcore-opennlp-chunk-ae/pom.xml
@@ -14,7 +14,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml
index dc8612e2d..a3e373d28 100644
--- a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml
+++ b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>jcore-openlp-chunk-ae</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml
index 9f2a29ee1..d4281c635 100644
--- a/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml
+++ b/jcore-opennlp-chunk-ae/src/test/resources/ChunkAnnotatorTestDefaultMappings.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>jcore-openlp-chunk-ae</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>julielab</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-parser-ae/component.meta b/jcore-opennlp-parser-ae/component.meta
index c73a0bec7..cdb9e0e2f 100644
--- a/jcore-opennlp-parser-ae/component.meta
+++ b/jcore-opennlp-parser-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-parser-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe OpenNLP Constituency Parser"
 }
diff --git a/jcore-opennlp-parser-ae/pom.xml b/jcore-opennlp-parser-ae/pom.xml
index 07da362a9..18be8ac33 100644
--- a/jcore-opennlp-parser-ae/pom.xml
+++ b/jcore-opennlp-parser-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <build>
         <plugins>
diff --git a/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml b/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml
index afa247920..e98a8e50f 100644
--- a/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml
+++ b/jcore-opennlp-parser-ae/src/main/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe OpenNLP Constituency Parser AE</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml b/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml
index 60e9d9e45..cacd88573 100644
--- a/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml
+++ b/jcore-opennlp-parser-ae/src/test/resources/de/julielab/jcore/ae/opennlpparser/desc/jcore-opennlpparser-test.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JCoRe OpenNLP Parser Test</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-postag-ae/component.meta b/jcore-opennlp-postag-ae/component.meta
index 4996af5fb..792b8ced1 100644
--- a/jcore-opennlp-postag-ae/component.meta
+++ b/jcore-opennlp-postag-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-postag-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe OpenNLP POS Tagger"
 }
diff --git a/jcore-opennlp-postag-ae/pom.xml b/jcore-opennlp-postag-ae/pom.xml
index 77ca254eb..d59112f75 100644
--- a/jcore-opennlp-postag-ae/pom.xml
+++ b/jcore-opennlp-postag-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <build>
         <pluginManagement>
diff --git a/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml b/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml
index ce2228cca..b7927c192 100644
--- a/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml
+++ b/jcore-opennlp-postag-ae/src/main/resources/de/julielab/jcore/ae/opennlppostag/desc/jcore-opennlppostag.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>JCoRe OpenNLP POS Tagger</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml b/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml
index ff5e2768a..c40b894e7 100644
--- a/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml
+++ b/jcore-opennlp-postag-ae/src/test/resources/PosTagAnnotatorTest.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>jcore-opennlp-postag-ae</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-opennlp-sentence-ae/component.meta b/jcore-opennlp-sentence-ae/component.meta
index 7980c80b1..15519490d 100644
--- a/jcore-opennlp-sentence-ae/component.meta
+++ b/jcore-opennlp-sentence-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-sentence-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe OpenNLP Sentence Splitter"
 }
diff --git a/jcore-opennlp-sentence-ae/pom.xml b/jcore-opennlp-sentence-ae/pom.xml
index d1c2cd2c3..bfbbabdf0 100644
--- a/jcore-opennlp-sentence-ae/pom.xml
+++ b/jcore-opennlp-sentence-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml b/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml
index 249d1030e..49bd07b2d 100644
--- a/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml
+++ b/jcore-opennlp-sentence-ae/src/test/resources/SentenceAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>jcore-opennlp-sentence-ae</name>
 <description>sentence splitter based on opennlp</description>
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-token-ae/component.meta b/jcore-opennlp-token-ae/component.meta
index f394a600a..02ee26d5d 100644
--- a/jcore-opennlp-token-ae/component.meta
+++ b/jcore-opennlp-token-ae/component.meta
@@ -9,7 +9,7 @@
     "maven-artifact": {
         "artifactId": "jcore-opennlp-token-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe OpenNLP Tokenizer"
 }
diff --git a/jcore-opennlp-token-ae/desc/TokenAnnotator.xml b/jcore-opennlp-token-ae/desc/TokenAnnotator.xml
index ea840ac0c..459b6dac9 100644
--- a/jcore-opennlp-token-ae/desc/TokenAnnotator.xml
+++ b/jcore-opennlp-token-ae/desc/TokenAnnotator.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>jcore-opennlp-token-ae</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-opennlp-token-ae/pom.xml b/jcore-opennlp-token-ae/pom.xml
index 419b52446..5343425aa 100644
--- a/jcore-opennlp-token-ae/pom.xml
+++ b/jcore-opennlp-token-ae/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml b/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml
index 749f145df..490c73e69 100644
--- a/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml
+++ b/jcore-opennlp-token-ae/src/test/resources/TokenAnnotatorTest.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>jcore-opennlp-token-ae</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters>
 <configurationParameter>
diff --git a/jcore-pmc-db-reader/component.meta b/jcore-pmc-db-reader/component.meta
index 667465029..3cb02b74f 100644
--- a/jcore-pmc-db-reader/component.meta
+++ b/jcore-pmc-db-reader/component.meta
@@ -1,7 +1,7 @@
 {
     "categories": [
-        "reader",
-        "multiplier"
+        "multiplier",
+        "reader"
     ],
     "description": "JeDIS database reader for PMC base documents.",
     "descriptors": [
@@ -19,7 +19,7 @@
     "maven-artifact": {
         "artifactId": "jcore-pmc-db-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe PubMed Central DB Reader"
 }
diff --git a/jcore-pmc-db-reader/pom.xml b/jcore-pmc-db-reader/pom.xml
index 5efb1a8b2..65e909de5 100644
--- a/jcore-pmc-db-reader/pom.xml
+++ b/jcore-pmc-db-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jedis-parent</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
 
@@ -31,12 +31,12 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-reader</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-pmc-reader</artifactId>
-            <version>${project.parent.version}</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
index 6bfd2a7c3..66c46729a 100644
--- a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier-reader.xml
@@ -10,7 +10,7 @@
             sent by this reader. The component leverages the corpus storage system (CoStoSys) for this purpose and is
             part of the Jena Document Information System, JeDIS.
         </description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
index 324fdf489..c9f9ca13d 100644
--- a/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
+++ b/jcore-pmc-db-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-db-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe PMC Database Multiplier</name>
         <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <copyright>JULIE Lab Jena, Germany</copyright>
         <configurationParameters>
@@ -67,8 +68,8 @@
                 <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-pmc-reader/component.meta b/jcore-pmc-reader/component.meta
index 6cfbb0efc..9f322da43 100644
--- a/jcore-pmc-reader/component.meta
+++ b/jcore-pmc-reader/component.meta
@@ -23,7 +23,7 @@
     "maven-artifact": {
         "artifactId": "jcore-pmc-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe PubMed Central Reader"
 }
diff --git a/jcore-pmc-reader/pom.xml b/jcore-pmc-reader/pom.xml
index 1db051d3c..a8369ce37 100644
--- a/jcore-pmc-reader/pom.xml
+++ b/jcore-pmc-reader/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml
index aafb02d0a..c8dbc0610 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/multiplier/pmc/desc/jcore-pmc-multiplier.xml
@@ -8,7 +8,7 @@
         <description>This multiplier expect to receive URIs to NXML documents in the form of JCoReURI feature
             structures. All JCoReURI FS in the annotation indexes are read and output as new CASes.
         </description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters />
         <configurationParameterSettings />
         <typeSystemDescription>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
index 5f1655fc7..81a3f80d4 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-multiplier-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe Pubmed Central Multiplier Reader</name>
         <description>Reads a directory of NXML files, possibly assembled into ZIP archives. Requires the Pubmed Central Multiplier to follow in the pipeline. This reader only sends URIs referencing the NXML files to the multiplier that then does the parsing.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>SendCasToLast</name>
diff --git a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
index 478806bfb..6df46fc6d 100644
--- a/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
+++ b/jcore-pmc-reader/src/main/resources/de/julielab/jcore/reader/pmc/desc/jcore-pmc-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe Pubmed Central Reader</name>
         <description>Reads Pubmed Central documents from the NXML format</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>Input</name>
diff --git a/jcore-ppd-writer/component.meta b/jcore-ppd-writer/component.meta
index 9264bc68e..457dfc7a3 100644
--- a/jcore-ppd-writer/component.meta
+++ b/jcore-ppd-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-ppd-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Piped Format Writer"
 }
diff --git a/jcore-ppd-writer/pom.xml b/jcore-ppd-writer/pom.xml
index c5dc78e43..e5649259e 100644
--- a/jcore-ppd-writer/pom.xml
+++ b/jcore-ppd-writer/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml b/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml
index fdf9505bf..11c5b3e39 100644
--- a/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml
+++ b/jcore-ppd-writer/src/main/resources/de/julielab/jcore/consumer/ppd/desc/jcore-ppd-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe PPD Writer</name>
         <description>This component writes CAS annotation data to the pipe-separated format. For example, writing tokens with their PoS would result in text like 'The|DET tree|NN is|VBZ green|ADJ'. The component can be configured for an arbitrary number of annotations to be added to each token.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>TypeToLabelMappings</name>
diff --git a/jcore-pubtator-reader/component.meta b/jcore-pubtator-reader/component.meta
index ea6504c28..26181da79 100644
--- a/jcore-pubtator-reader/component.meta
+++ b/jcore-pubtator-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-pubtator-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe PubTator Reader"
 }
diff --git a/jcore-pubtator-reader/pom.xml b/jcore-pubtator-reader/pom.xml
index 3440bb1fc..c311f8fac 100644
--- a/jcore-pubtator-reader/pom.xml
+++ b/jcore-pubtator-reader/pom.xml
@@ -12,7 +12,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>..</relativePath>
     </parent>
     <dependencies>
diff --git a/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml b/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml
index 76985dfd7..58cd8067a 100644
--- a/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml
+++ b/jcore-pubtator-reader/src/main/resources/de/julielab/jcore/reader/pubtator/desc/jcore-pubtator-reader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>jcore-pubtator-reader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-stanford-lemmatizer-ae/component.meta b/jcore-stanford-lemmatizer-ae/component.meta
index 2da48cfa7..10ad8a71e 100644
--- a/jcore-stanford-lemmatizer-ae/component.meta
+++ b/jcore-stanford-lemmatizer-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-stanford-lemmatizer-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Stanford Lemmatizer"
 }
diff --git a/jcore-stanford-lemmatizer-ae/pom.xml b/jcore-stanford-lemmatizer-ae/pom.xml
index 71790e6a6..01f130a8c 100644
--- a/jcore-stanford-lemmatizer-ae/pom.xml
+++ b/jcore-stanford-lemmatizer-ae/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-stanford-lemmatizer-ae</artifactId>
     <name>JCoRe Stanford Lemmatizer</name>
diff --git a/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml b/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml
index 2e4b3d99a..b1ff669c4 100644
--- a/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml
+++ b/jcore-stanford-lemmatizer-ae/src/main/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>JCoRe Stanford Lemmatizer</name>
     <description>This is the UIMA Wrapper for the Stanford CoreNLP Lemmatizer component.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters />
     <configurationParameterSettings />
diff --git a/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml b/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml
index 653ecbe8e..867658d58 100644
--- a/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml
+++ b/jcore-stanford-lemmatizer-ae/src/test/resources/de/julielab/jcore/ae/stanford/lemma/desc/jcore-stanford-lemmatizer-ae.xml
@@ -7,7 +7,7 @@
 		<name>jcore-stanford-lemmatizer-ae</name>
 		<description>This is the UIMA Wrapper for the Stanford CoreNLP
 			Lemmatizer component.</description>
-		<version>2.6.0-SNAPSHOT</version>
+		<version>2.6.0</version>
         <vendor />
 		<configurationParameters />
 		<configurationParameterSettings />
diff --git a/jcore-topic-indexing-ae/component.meta b/jcore-topic-indexing-ae/component.meta
index c6cdef338..a43efef61 100644
--- a/jcore-topic-indexing-ae/component.meta
+++ b/jcore-topic-indexing-ae/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-topic-indexing-ae",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe-Topic-Labeling-AE"
 }
diff --git a/jcore-topic-indexing-ae/pom.xml b/jcore-topic-indexing-ae/pom.xml
index 5137fab5d..e399f4c39 100644
--- a/jcore-topic-indexing-ae/pom.xml
+++ b/jcore-topic-indexing-ae/pom.xml
@@ -9,7 +9,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
@@ -53,7 +53,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xmi-reader</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
             <scope>test</scope>
         </dependency>
         <dependency>
diff --git a/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml b/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml
index 6db4c1c87..078ceb6b4 100644
--- a/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml
+++ b/jcore-topic-indexing-ae/src/main/resources/de/julielab/jcore/ae/topicindexing/desc/jcore-topic-indexing-ae.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Topic Indexer</name>
         <description>This component assigns topics relative to a given topic model to the encoutered documents. The topic model is one trained by the julielab-topic-modeling project.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>TopicModelConfig</name>
diff --git a/jcore-topics-writer/component.meta b/jcore-topics-writer/component.meta
index 32ac48b74..f501752f1 100644
--- a/jcore-topics-writer/component.meta
+++ b/jcore-topics-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-topics-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe Topics Writer"
 }
diff --git a/jcore-topics-writer/pom.xml b/jcore-topics-writer/pom.xml
index 644c48f2b..d570cd667 100644
--- a/jcore-topics-writer/pom.xml
+++ b/jcore-topics-writer/pom.xml
@@ -10,7 +10,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml b/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml
index 23aab97c9..a1ed20a19 100644
--- a/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml
+++ b/jcore-topics-writer/src/main/resources/de/julielab/jcore/consumer/topics/desc/jcore-topics-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Topics Writer</name>
         <description>Writes the topic weights, given the jcore-topic-indexing-ae running before, into a simple text file. Thus, the output consists of a sequency of double numbers encodes as strings, separated by tab characters. The topic ID is just the 0-based index of each number, from left to right in the written file. The first entry of each file is the document ID.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>OutputDirectory</name>
diff --git a/jcore-txt-consumer/component.meta b/jcore-txt-consumer/component.meta
index c10e83c2c..818930805 100644
--- a/jcore-txt-consumer/component.meta
+++ b/jcore-txt-consumer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-txt-consumer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe TXT Consumer"
 }
diff --git a/jcore-txt-consumer/pom.xml b/jcore-txt-consumer/pom.xml
index 3c4fdb273..6c2e384cf 100644
--- a/jcore-txt-consumer/pom.xml
+++ b/jcore-txt-consumer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-txt-consumer</artifactId>
     <name>JCoRe TXT Consumer</name>
diff --git a/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml b/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml
index 55deaaea9..7146fd7f7 100644
--- a/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml
+++ b/jcore-txt-consumer/src/main/resources/de/julielab/jcore/consumer/txt/desc/jcore-txt-consumer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe Text Consumer</name>
         <description>Stores the CAS document text in files. Either in tokenized sentences plus optional PoS tags or just the original document text. The text files can also be stored in GZIP format or batch-wise in ZIP archives.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>de.julielab.jcore.consumer.txt</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-types/pom.xml b/jcore-types/pom.xml
index 99b9f0134..b2a52a8f8 100644
--- a/jcore-types/pom.xml
+++ b/jcore-types/pom.xml
@@ -8,7 +8,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.6.0-SNAPSHOT</version>
+		<version>2.6.0</version>
 	</parent>
 	
 	<build>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
index 06aa1902b..fd003f5c2 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-dbtable-multiplier-types.xml
@@ -10,7 +10,7 @@
         base document and annotations that have been previously created and stored in separate tables. This is
         part of the Jena Document Information System, JeDIS.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <types>
         <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
index fe06797e8..bb457ab06 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/casmultiplier/jcore-uri-multiplier-types.xml
@@ -4,7 +4,7 @@
   <description>This is a type system for usage with a CAS multiplier. It should not be included into the
   jcore-all-types type system. This particular type system holds a single URI that points to the resource that
   should be split into CASes by the multiplier.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml
index d5e851681..39357e325 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-ace-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe ACE Types</name>
 <description>The jcore-ace-types TS represents the complete ACE Annotation in CAS format.</description>
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="../jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
index ec60f5b33..dc727cdff 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-document-meta-extension-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Document Meta Types</name>
   <description>The types of this type system reflect meta data about documents for rather specific use cases.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor />
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml
index 89d99ec9a..5abb1cc26 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-dta-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 	<name>jcore-dta-types</name>
 	<description />
-	<version>2.6.0-SNAPSHOT</version>
+	<version>2.6.0</version>
         <vendor />
 	<types>
 			<typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml
index e02013a5f..285c5600b 100755
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-evaluation-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Evaluation Types</name>
   <description>This type system is an extension of the JCoRe type system to cover evaluation Annotations like missing or additional annotations for evaluation purposes.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml
index 32ce435a4..fa4afe671 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mantra-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Mantra Types</name>
   <description>The type system contains types for working with documents in the context of the MANTRA project.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml
index af5a69392..6f181906a 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-medical-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>jcore-medical-types</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
   <imports>
     <import location="../jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml
index fbec38980..06ef422dd 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-mmax-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe MMAX Types</name>
   <description>The type system contains types for the import of MMAX2 annotations.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml
index 8aa7a5303..2bed1a349 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-muc7-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe MUC7 Type System</name>
 <description>This type system contains types covering annotations for the MUC7 data.</description>
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="../jcore-document-meta-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml
index 2a2059bf3..2d834a6df 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-ace-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe Semantics ACE Types</name>
 <description>The type system contains ACE types of the ACE taxonomy.</description>
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="../jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml
index a2f40fc70..37a80cc11 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-bootstrep-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics BOOTStrep Types</name>
   <description>The type system is an extension of the JCoRe core type system for types required in the context of the BOOTStrep project.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-semantics-biology-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml
index 62cabb2d4..377618374 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-mention-extension-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>JCoRe Semantics Mention Types Extension</name>
     <description>JCoRe type extensions to the JCoRe Semantics Mention types. Required for some processing or representation, these types do not extend the actual semantics of the core type system.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml
index 68f6711bd..6238a9640 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-semantics-stemnet-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics StemNet Typs</name>
   <description>The type system contains types of the StemNet project.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-semantics-biology-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml
index cb9265d5b..1bea4abdf 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/extensions/jcore-wikipedia-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Wikipedia Types</name>
   <description>The type system contains types for the annotation of meta information of Wikipedia pages.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="../jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml
index 69183e809..92a6adc0e 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-affect-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>jcore-affect-types</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
   <imports>
     <import location="jcore-document-meta-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml
index d2d038014..6922c15db 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-all-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>JCoRe All Types</name>
     <description>This is just a convenience file, assembling all JCoRe types</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml
index c1105adcc..8cfc25831 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-basic-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Basic Types</name>
   <description>The type system contains the basic annotation types.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <types>
     <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
index 1f371bdf6..bc335c293 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-casflow-types.xml
@@ -5,7 +5,7 @@
         FlowControllers. The types herein serve to indicate which components should be visited for the CAS
         carrying annotations of this type.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <types>
         <typeDescription>
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
index 01d7e272e..e8897be62 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-discourse-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Discourse Types</name>
   <description>Discourse types such as coreference relations.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml
index c39e6dd15..24abc85ec 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-clinicaltrial-types.xml
@@ -5,7 +5,7 @@
         document meta information (bibliographical and content information),
         especially for PubMed abstracts.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-meta-pubmed-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml
index 2deb2853d..64bbd5e32 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-pubmed-types.xml
@@ -5,7 +5,7 @@
         document meta information (bibliographical and content information),
         especially for PubMed abstracts.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-meta-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml
index 2865894e7..f0324e628 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-meta-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Document Meta Types</name>
   <description>The type system contains types for the annotation of document meta information (bibliographical and content information).</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml
index 77b328da5..4422696fe 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-clinicaltrial-types.xml
@@ -4,7 +4,7 @@
     <description>This type system contains document structure types specific to the clinical trails XML format as
         retrieved from https://clinicaltrials.gov/.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml
index b575084d5..d13edaf0d 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-pubmed-types.xml
@@ -4,7 +4,7 @@
     <description>This type system contains document structure types specific to PubMed or MEDLINE, e.g. detailed
         descriptions of structured abstracts.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-document-structure-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml
index 5159c11aa..4e8fcf501 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-document-structure-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Document Structure Types</name>
   <description>The type system contains the types for the annotation of document sutructure, e.g. titles, abstract text, captions etc.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml
index 0ff447c77..72adcfed0 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-morpho-syntax-types.xml
@@ -4,7 +4,7 @@
     <description>The type system contains types for the annotation of morpho-syntactic and syntactic analysis
         (constituncy-based and dependecy-based parsing) results.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml
index e421aa1c6..e7bdf766c 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-biology-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <name>JCoRe Semantics Biology Types</name>
     <description>The type system contains types of the biomedical domain.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
     <imports>
         <import location="jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml
index 7b4b3d008..7bfb0c3ad 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-concept-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
 <name>JCoRe Semantics Concept Types</name>
 <description>The type system contains core semantic types definitions such as entity, relation and event.</description>
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
 <imports>
 <import location="jcore-semantics-mention-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml
index 87718af50..64bc7b357 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/jcore-semantics-mention-types.xml
@@ -2,7 +2,7 @@
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier">
   <name>JCoRe Semantics Mention Types</name>
   <description>The type system contains core semantic types definitions such as entity, relation and event. The types in this type system refer to actual text occurrences.</description>
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
   <imports>
     <import location="jcore-basic-types.xml" />
diff --git a/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml b/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml
index 53c5d882f..b8ee7c4b1 100644
--- a/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml
+++ b/jcore-types/src/main/resources/de/julielab/jcore/types/priorities/jcore-type-priorities.xml
@@ -2,7 +2,7 @@
 <typePriorities xmlns="http://uima.apache.org/resourceSpecifier">
     <name>jcore-type-priorities</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
 	<priorityList>
 	    <type>de.julielab.jcore.types.Title</type>
diff --git a/jcore-utilities/pom.xml b/jcore-utilities/pom.xml
index 5a6ad681f..77fa5f3b5 100644
--- a/jcore-utilities/pom.xml
+++ b/jcore-utilities/pom.xml
@@ -10,7 +10,7 @@
 	<parent>
 		<groupId>de.julielab</groupId>
 		<artifactId>jcore-base</artifactId>
-		<version>2.6.0-SNAPSHOT</version>
+		<version>2.6.0</version>
 	</parent>
 	
 	<dependencies>
diff --git a/jcore-utilities/src/test/resources/AETestDescriptor.xml b/jcore-utilities/src/test/resources/AETestDescriptor.xml
index ab602e7c9..f438cd6b6 100644
--- a/jcore-utilities/src/test/resources/AETestDescriptor.xml
+++ b/jcore-utilities/src/test/resources/AETestDescriptor.xml
@@ -6,7 +6,7 @@
 <analysisEngineMetaData>
 <name>JulesToolsDescriptor</name>
 <description />
-<version>2.6.0-SNAPSHOT</version>
+<version>2.6.0</version>
         <vendor />
 <configurationParameters />
 <configurationParameterSettings />
diff --git a/jcore-xmi-db-reader/component.meta b/jcore-xmi-db-reader/component.meta
index c7c922807..e49317b51 100644
--- a/jcore-xmi-db-reader/component.meta
+++ b/jcore-xmi-db-reader/component.meta
@@ -23,7 +23,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-db-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe XMI Database Reader"
 }
diff --git a/jcore-xmi-db-reader/pom.xml b/jcore-xmi-db-reader/pom.xml
index 6cd48ce47..fa3b75799 100644
--- a/jcore-xmi-db-reader/pom.xml
+++ b/jcore-xmi-db-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <artifactId>jcore-xmi-db-reader</artifactId>
@@ -18,7 +18,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-reader</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>org.testng</groupId>
@@ -59,13 +59,13 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-db-reader</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
             <scope>test</scope>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xmi-db-writer</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
             <scope>test</scope>
         </dependency>
     </dependencies>
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml
index 9ef28be72..fa03c02ba 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe XMI Database Multiplier Reader</name>
         <description>This is an extension of the DBMultiplierReader to handle JeDIS XMI annotation module data.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <configurationParameters>
             <configurationParameter>
                 <name>ReadsBaseDocument</name>
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
index 007e3ee33..7ba47c81b 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-multiplier.xml
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+<?xml version='1.0' encoding='UTF-8'?>
 <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
     <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
     <primitive>true</primitive>
@@ -6,6 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe XMI Database Multiplier</name>
         <description>A multiplier that receives document IDs to read from a database table from the DBMultiplierReader. The reader also delivers the path to the corpus storage system (CoStoSys) configuration and additional tables for joining with the main data table. This multiplier class is abstract and cannot be used directly.Extending classes must implement the next() method to actually read documents from the database and populate CASes with them. This component is a part of the Jena Document Information System, JeDIS.</description>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <copyright>JULIE Lab Jena, Germany</copyright>
         <configurationParameters>
@@ -34,13 +35,13 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
                 <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
             </imports>
         </typeSystemDescription>
-        <fsIndexCollection/>
-        <capabilities/>
+        <fsIndexCollection />
+        <capabilities />
         <operationalProperties>
             <modifiesCas>true</modifiesCas>
             <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
diff --git a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
index fb634e618..bc148ea7e 100644
--- a/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
+++ b/jcore-xmi-db-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-db-reader.xml
@@ -5,7 +5,7 @@
     <processingResourceMetaData>
         <name>JCoRe XMI Database Reader</name>
         <description>A database readerthat expects serialized UIMA CAS objects in XMI format as input. The reader has the capability to read segmented annotation graphs that have been stored by the jcore-xmi-db-writer. This component is part of the Jena Document Information System, JeDIS.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -169,7 +169,7 @@
         <typeSystemDescription>
             <imports>
                 <import name="de.julielab.jcore.types.jcore-xmi-splitter-types" />
-<!--                <import name="de.julielab.jcore.types.jcore-all-types" />-->
+
             </imports>
         </typeSystemDescription>
         <fsIndexCollection />
diff --git a/jcore-xmi-db-writer/component.meta b/jcore-xmi-db-writer/component.meta
index 3c65e61ac..55d656ba9 100644
--- a/jcore-xmi-db-writer/component.meta
+++ b/jcore-xmi-db-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-db-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe XMI Database Writer"
 }
diff --git a/jcore-xmi-db-writer/pom.xml b/jcore-xmi-db-writer/pom.xml
index 2b4a326f4..b959967ea 100644
--- a/jcore-xmi-db-writer/pom.xml
+++ b/jcore-xmi-db-writer/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <artifactId>jedis-parent</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
     <artifactId>jcore-xmi-db-writer</artifactId>
@@ -144,7 +144,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-checkpoint-ae</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml b/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
index 01f4ca1e3..da64061af 100644
--- a/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
+++ b/jcore-xmi-db-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-db-writer.xml
@@ -6,7 +6,7 @@
     <analysisEngineMetaData>
         <name>JCoRe XMI Database Writer</name>
         <description>This component is capable of storing the standard UIMA serialization of documents in one or even multiple database tables. The UIMA serialization format is XMI, an XML format that expressed an annotation graph. This component either stores the whole annotation graph in XMI format in a database row, together with the document ID. Alternatively, it makes use of the jcore-xmi-splitter to segment the annotation graph with respect to a user specified list of annotation types. Then, the XMI data of each annotation type is extracted from the document XMI data and stored in a separate table. The tables are created automatically according to the primary key of the active table schema in the Corpus Storage System (CoStoSys) configuration file that is also given as a parameter. The jcore-xmi-db-reader is capable of reading this kind of distributed annotation graph and reassemble a valid XMI document which then cas be deserialized into a CAS. This consumer is UIMA DUCC compatible. It requires the collection reader to forward the work item CAS to the consumer. This is required so the consumer knows that a work item has been finished and that all cached data - in this case the XMI data - should be flushed. This is important! Without the forwarding of the work item CAS, the last batch of cached XMI data will not be written into the database. This component is part of the Jena Document Information System, JeDIS.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab Jena, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
@@ -190,7 +190,7 @@
         </configurationParameterSettings>
         <typeSystemDescription>
             <imports>
-                <import name="de.julielab.jcore.types.jcore-document-meta-types"/>
+                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
                 <import name="de.julielab.jcore.types.jcore-xmi-splitter-types" />
                 <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
                 <import name="org.apache.uima.ducc.FlowControllerTS" />
diff --git a/jcore-xmi-reader/component.meta b/jcore-xmi-reader/component.meta
index 347606dc4..57ad76f80 100644
--- a/jcore-xmi-reader/component.meta
+++ b/jcore-xmi-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe XMI Collection Reader"
 }
diff --git a/jcore-xmi-reader/pom.xml b/jcore-xmi-reader/pom.xml
index e7630643a..a6017493b 100644
--- a/jcore-xmi-reader/pom.xml
+++ b/jcore-xmi-reader/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
 
diff --git a/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml b/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml
index c6c747371..d21e7b29b 100644
--- a/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml
+++ b/jcore-xmi-reader/src/main/resources/de/julielab/jcore/reader/xmi/desc/jcore-xmi-reader.xml
@@ -6,7 +6,7 @@
     <name>XmiCollectionReader</name>
     <description>A CollectionReader which reads CAS data stored as XMI files from the file system. The reader grounds on IBM's XmiCollectionReader delivered with older versions of UIMA and has been extended by the Julie Lab team at the University of Jena.
 This XMI reader is capable of reading (g)zipped XMI files and is able to recursively search subdirectories of a delivered root directory for XMI files.</description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xmi-writer/component.meta b/jcore-xmi-writer/component.meta
index ef645b6dd..7afe174fc 100644
--- a/jcore-xmi-writer/component.meta
+++ b/jcore-xmi-writer/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xmi-writer",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe XMI Writer"
 }
diff --git a/jcore-xmi-writer/pom.xml b/jcore-xmi-writer/pom.xml
index 586126e26..0babbc06a 100644
--- a/jcore-xmi-writer/pom.xml
+++ b/jcore-xmi-writer/pom.xml
@@ -11,7 +11,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <dependencies>
         <dependency>
diff --git a/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml b/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml
index aaeb7196c..a4af702ed 100644
--- a/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml
+++ b/jcore-xmi-writer/src/main/resources/de/julielab/jcore/consumer/xmi/desc/jcore-xmi-writer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>XMIWriter</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml b/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml
index c11ac0001..7538342bb 100644
--- a/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml
+++ b/jcore-xmi-writer/src/test/resources/de/julielab/jcore/consumer/xmi/CasToXmiConsumer.xml
@@ -6,7 +6,7 @@
   <analysisEngineMetaData>
     <name>XMIWriter</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-db-reader/component.meta b/jcore-xml-db-reader/component.meta
index 0d7fce2f6..37ac82af4 100644
--- a/jcore-xml-db-reader/component.meta
+++ b/jcore-xml-db-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xml-db-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe XML Database Reader"
 }
diff --git a/jcore-xml-db-reader/pom.xml b/jcore-xml-db-reader/pom.xml
index 24dd2febd..99ecaa819 100644
--- a/jcore-xml-db-reader/pom.xml
+++ b/jcore-xml-db-reader/pom.xml
@@ -15,7 +15,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jedis-parent</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <relativePath>../jedis-parent</relativePath>
     </parent>
 
@@ -23,7 +23,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-db-reader</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
@@ -51,7 +51,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-mapper</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>de.julielab</groupId>
diff --git a/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml b/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml
index a7d8fe03c..a4539bcc8 100644
--- a/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml
+++ b/jcore-xml-db-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/jcore-xml-db-reader.xml
@@ -7,7 +7,7 @@
         <description>A collection reader that receives XML document data from a PostgreSQL database. It employs the
         jcore-xml-mapper to populate UIMA CAS instances with the XML data according to a mapping file. For the same
         functionality without using a database, refer to the jcore-xml-reader.</description>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
         <vendor>JULIE Lab, Germany</vendor>
         <configurationParameters>
             <configurationParameter>
diff --git a/jcore-xml-mapper/pom.xml b/jcore-xml-mapper/pom.xml
index c2fa73802..85aa0825f 100644
--- a/jcore-xml-mapper/pom.xml
+++ b/jcore-xml-mapper/pom.xml
@@ -13,7 +13,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
 
     <dependencies>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml
index a1bebd5a0..99d571eff 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml
index 0ce228185..417fa726e 100755
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_Unicode_outside_BMP.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml
index b501db9fa..bb46aef5a 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_missingInputDir.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml
index af51a64c1..caa322dba 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
index f1872c028..a7f19bbf8 100644
--- a/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
+++ b/jcore-xml-mapper/src/test/resources/XMLReaderDescriptor_medline_singleFile2.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>XMLReader</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-reader/component.meta b/jcore-xml-reader/component.meta
index 97de60fef..9f7f54c1e 100644
--- a/jcore-xml-reader/component.meta
+++ b/jcore-xml-reader/component.meta
@@ -14,7 +14,7 @@
     "maven-artifact": {
         "artifactId": "jcore-xml-reader",
         "groupId": "de.julielab",
-        "version": "2.6.0-SNAPSHOT"
+        "version": "2.6.0"
     },
     "name": "JCoRe XML Reader"
 }
diff --git a/jcore-xml-reader/pom.xml b/jcore-xml-reader/pom.xml
index 7d2ec2b1f..348c8879e 100644
--- a/jcore-xml-reader/pom.xml
+++ b/jcore-xml-reader/pom.xml
@@ -5,7 +5,7 @@
     <parent>
         <groupId>de.julielab</groupId>
         <artifactId>jcore-base</artifactId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <artifactId>jcore-xml-reader</artifactId>
     <name>JCoRe XML Reader</name>
@@ -14,7 +14,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>jcore-xml-mapper</artifactId>
-            <version>2.6.0-SNAPSHOT</version>
+            <version>2.6.0</version>
         </dependency>
         <dependency>
             <groupId>org.slf4j</groupId>
diff --git a/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml b/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml
index f13e7b82c..be9956b4d 100644
--- a/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml
+++ b/jcore-xml-reader/src/main/resources/de/julielab/jcore/reader/xml/desc/XMLMultiplierReader.xml
@@ -8,7 +8,7 @@
       This reader is to be used with the JCoRe XML CAS Multiplier. The reader merely distributes the files to
       be read. The actual parsing is done by the multiplier.
     </description>
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml b/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml
index f1aaab0c6..68d33c44e 100644
--- a/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml
+++ b/jcore-xml-reader/src/test/resources/MedlineReaderDescriptor_missingInputDir.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>MedlineReaderDescriptor_missingInputDir</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml b/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml
index 964ccdf74..1a8b378ab 100644
--- a/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml
+++ b/jcore-xml-reader/src/test/resources/PubmedXMLMultiplier.xml
@@ -5,7 +5,7 @@
   <analysisEngineMetaData>
     <name>PubmedXMLMultiplierDescriptor</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters searchStrategy="language_fallback">
       <configurationParameter>
diff --git a/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml b/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml
index 5d7c405fb..c32e2ae7e 100644
--- a/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml
+++ b/jcore-xml-reader/src/test/resources/XMLMultiplierReader.xml
@@ -5,7 +5,7 @@
   <processingResourceMetaData>
     <name>MedlineReaderDescriptor_missingInputDir</name>
     <description />
-    <version>2.6.0-SNAPSHOT</version>
+    <version>2.6.0</version>
         <vendor />
     <configurationParameters>
       <configurationParameter>
diff --git a/jedis-parent/pom.xml b/jedis-parent/pom.xml
index 51791107a..b09f97f48 100644
--- a/jedis-parent/pom.xml
+++ b/jedis-parent/pom.xml
@@ -4,7 +4,7 @@
     <parent>
         <artifactId>jcore-base</artifactId>
         <groupId>de.julielab</groupId>
-        <version>2.6.0-SNAPSHOT</version>
+        <version>2.6.0</version>
     </parent>
     <packaging>pom</packaging>
     <modelVersion>4.0.0</modelVersion>
diff --git a/pom.xml b/pom.xml
index f290a5ae4..d76a2e989 100644
--- a/pom.xml
+++ b/pom.xml
@@ -65,7 +65,7 @@
   
   
-  <version>2.6.0-SNAPSHOT</version>
+  <version>2.6.0</version>
             
   
From 9d717f3cfbb5a51c3b43d839e3c06554e11393a1 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Nov 2022 20:10:56 +0100
Subject: [PATCH 268/269] Adding missing versions.

---
 jcore-mmax2-reader/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jcore-mmax2-reader/pom.xml b/jcore-mmax2-reader/pom.xml
index 30e662ad4..812127c84 100644
--- a/jcore-mmax2-reader/pom.xml
+++ b/jcore-mmax2-reader/pom.xml
@@ -37,7 +37,7 @@
         <dependency>
             <groupId>de.julielab</groupId>
             <artifactId>julielab-mmax-to-iob-iexml-converter</artifactId>
-            <version></version>
+            <version>1.0.2</version>
         </dependency>
         <dependency>
             <groupId>org.apache.commons</groupId>

From fef2034643b2d8b0feafb5d41b7480ba5a038bc2 Mon Sep 17 00:00:00 2001
From: khituras <chew@gmx.net>
Date: Wed, 16 Nov 2022 20:49:20 +0100
Subject: [PATCH 269/269] Fix a test, bump jcore-parent version.

---
 .../jcore/misc/DescriptorCreatorTest.java     |  6 ++-
 .../de.julielab.jcore.ae.testae.TestAE.xml    | 50 -------------------
 ...ore.consumer.testconsumer.Testconsumer.xml | 50 -------------------
 ...ltiplier.testmultiplier.TestMultiplier.xml | 50 -------------------
 ...lab.jcore.reader.testreader.TestReader.xml | 49 ------------------
 pom.xml                                       |  2 +-
 6 files changed, 5 insertions(+), 202 deletions(-)
 delete mode 100644 jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
 delete mode 100644 jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
 delete mode 100644 jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
 delete mode 100644 jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml

diff --git a/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java b/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java
index 41a146892..31961e62d 100644
--- a/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java
+++ b/jcore-descriptor-creator/src/test/java/de/julielab/jcore/misc/DescriptorCreatorTest.java
@@ -1,6 +1,7 @@
 package de.julielab.jcore.misc;
 
 import de.julielab.java.utilities.IOStreamUtilities;
+import org.apache.commons.io.FileUtils;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
@@ -9,6 +10,7 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.nio.file.Path;
+import java.util.Arrays;
 import java.util.stream.Stream;
 
 import static java.util.stream.Collectors.joining;
@@ -20,13 +22,13 @@ public class DescriptorCreatorTest {
 	@BeforeAll
 	@AfterAll
 	public static void shutdown() throws IOException {
-		//FileUtils.deleteDirectory(new File(Arrays.asList("src", "test", "resources", "de").stream().collect(joining(File.separator))));
+		FileUtils.deleteDirectory(new File(Arrays.asList("src", "test", "resources", "de").stream().collect(joining(File.separator))));
 	}
 	@Test
 	public void testRun() throws Exception {
 		DescriptorCreator creator = new DescriptorCreator();
 		String outputRoot = "src" + File.separator + "test" + File.separator + "resources" + File.separator;
-		creator.run(outputRoot);	
+		creator.run("de.julielab.jcore", outputRoot);
 		File crDir = new File(outputRoot + Stream.of("de", "julielab", "jcore", "reader", "testreader", "desc").collect(joining(File.separator)));
 		File aeDir = new File(outputRoot + Stream.of("de", "julielab", "jcore", "ae", "testae", "desc").collect(joining(File.separator)));
 		File consumerDir = new File(outputRoot + Stream.of("de", "julielab", "jcore", "consumer", "testconsumer", "desc").collect(joining(File.separator)));
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
deleted file mode 100644
index 173ce62f6..000000000
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/ae/testae/desc/de.julielab.jcore.ae.testae.TestAE.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.ae.testae.TestAE</annotatorImplementationName>
-    <analysisEngineMetaData>
-        <name>de.julielab.jcore.ae.testae.TestAE</name>
-        <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0</version>
-        <vendor>de.julielab.jcore.ae.testae</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
-        <typeSystemDescription>
-            <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-            </imports>
-        </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
-</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
deleted file mode 100644
index f26725794..000000000
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/consumer/testconsumer/desc/de.julielab.jcore.consumer.testconsumer.Testconsumer.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.consumer.testconsumer.Testconsumer</annotatorImplementationName>
-    <analysisEngineMetaData>
-        <name>de.julielab.jcore.consumer.testconsumer.Testconsumer</name>
-        <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0</version>
-        <vendor>de.julielab.jcore.consumer.testconsumer</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
-        <typeSystemDescription>
-            <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-            </imports>
-        </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
-</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
deleted file mode 100644
index 9eae20293..000000000
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/multiplier/testmultiplier/desc/de.julielab.jcore.multiplier.testmultiplier.TestMultiplier.xml
+++ /dev/null
@@ -1,50 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <primitive>true</primitive>
-    <annotatorImplementationName>de.julielab.jcore.multiplier.testmultiplier.TestMultiplier</annotatorImplementationName>
-    <analysisEngineMetaData>
-        <name>de.julielab.jcore.multiplier.testmultiplier.TestMultiplier</name>
-        <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0</version>
-        <vendor>de.julielab.jcore.multiplier.testmultiplier</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
-        <typeSystemDescription>
-            <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-            </imports>
-        </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>true</multipleDeploymentAllowed>
-            <outputsNewCASes>false</outputsNewCASes>
-        </operationalProperties>
-    </analysisEngineMetaData>
-</analysisEngineDescription>
\ No newline at end of file
diff --git a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml b/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
deleted file mode 100644
index baf0587c5..000000000
--- a/jcore-descriptor-creator/src/test/resources/de/julielab/jcore/reader/testreader/desc/de.julielab.jcore.reader.testreader.TestReader.xml
+++ /dev/null
@@ -1,49 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<collectionReaderDescription xmlns="http://uima.apache.org/resourceSpecifier">
-    <frameworkImplementation>org.apache.uima.java</frameworkImplementation>
-    <implementationName>de.julielab.jcore.reader.testreader.TestReader</implementationName>
-    <processingResourceMetaData>
-        <name>de.julielab.jcore.reader.testreader.TestReader</name>
-        <description>Descriptor automatically generated by uimaFIT</description>
-        <version>2.6.0</version>
-        <vendor>de.julielab.jcore.reader.testreader</vendor>
-        <configurationParameters />
-        <configurationParameterSettings />
-        <typeSystemDescription>
-            <imports>
-                <import name="de.julielab.jcore.types.extensions.jcore-mantra-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-biology-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-concept-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-mmax-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-dbtable-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-muc7-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-types" />
-                <import name="de.julielab.jcore.types.jcore-document-structure-pubmed-types" />
-                <import name="de.julielab.jcore.types.casmultiplier.jcore-uri-multiplier-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-wikipedia-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-document-meta-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-semantics-mention-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-medical-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-dta-types" />
-                <import name="de.julielab.jcore.types.jcore-discourse-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-ace-types" />
-                <import name="de.julielab.jcore.types.jcore-morpho-syntax-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-stemnet-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-bootstrep-types" />
-                <import name="de.julielab.jcore.types.jcore-basic-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-semantics-mention-extension-types" />
-                <import name="de.julielab.jcore.types.jcore-document-meta-pubmed-types" />
-                <import name="de.julielab.jcore.types.extensions.jcore-evaluation-types" />
-            </imports>
-        </typeSystemDescription>
-        <fsIndexCollection />
-        <capabilities />
-        <operationalProperties>
-            <modifiesCas>true</modifiesCas>
-            <multipleDeploymentAllowed>false</multipleDeploymentAllowed>
-            <outputsNewCASes>true</outputsNewCASes>
-        </operationalProperties>
-    </processingResourceMetaData>
-</collectionReaderDescription>
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index d76a2e989..16312b2db 100644
--- a/pom.xml
+++ b/pom.xml
@@ -29,7 +29,7 @@
     
     
-    <version>2.5.2-SNAPSHOT</version>
+    <version>2.5.2</version>