diff --git a/pom.xml b/pom.xml
index ecf1f59..cddd31f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -220,6 +220,16 @@
+
+
+
+
+ org.springframework.build
+ aws-maven
+ 5.0.0.RELEASE
+
+
+
@@ -387,5 +397,20 @@
+
+
+ gov.nih.nlm.nls
+ metamaplite
+ 3.1-SNAPSHOT
+
+
+
+
+
+ maven.imi.medunigraz.at
+ S3 Maven Repository
+ s3://maven.imi.medunigraz.at/release
+
+
diff --git a/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java b/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java
index f97541f..6e13616 100644
--- a/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java
+++ b/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java
@@ -1,18 +1,12 @@
package at.medunigraz.imi.bst.n2c2.model;
+import at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper.MetaMapLiteFacade;
+
import java.text.ParseException;
import java.text.SimpleDateFormat;
-import java.time.Instant;
-import java.time.LocalDate;
-import java.time.Period;
-import java.time.ZoneId;
-import java.time.ZonedDateTime;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.HashMap;
-import java.util.Map;
+import java.time.*;
+import java.util.*;
public class Patient {
@@ -47,6 +41,18 @@ public String getText() {
public Eligibility getEligibility(Criterion criterion) {
return criteria.get(criterion);
}
+
+ public List getCUIs() {
+ return MetaMapLiteFacade.getInstance().map(getText());
+ }
+
+ public Set getUniqueCUIs() {
+ return MetaMapLiteFacade.getInstance().uniqueMap(getText());
+ }
+
+ public String getAnnotatedText() {
+ return MetaMapLiteFacade.getInstance().annotate(getText());
+ }
/**
* getAllVisits() returns all the visits of one patient as
diff --git a/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/ConceptMapper.java b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/ConceptMapper.java
new file mode 100644
index 0000000..6c3734e
--- /dev/null
+++ b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/ConceptMapper.java
@@ -0,0 +1,19 @@
+package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * @author Michel Oleynik
+ * @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/ConceptMapper.java
+ */
+public interface ConceptMapper {
+ List map(String text);
+
+ String annotate(String text);
+
+ default Set uniqueMap(String text) {
+ return new HashSet(map(text));
+ }
+}
diff --git a/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacade.java b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacade.java
new file mode 100644
index 0000000..34d4d83
--- /dev/null
+++ b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacade.java
@@ -0,0 +1,170 @@
+package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;
+
+import bioc.BioCDocument;
+import gov.nih.nlm.nls.metamap.document.FreeText;
+import gov.nih.nlm.nls.metamap.lite.types.ConceptInfo;
+import gov.nih.nlm.nls.metamap.lite.types.Entity;
+import gov.nih.nlm.nls.metamap.lite.types.Ev;
+import gov.nih.nlm.nls.ner.MetaMapLite;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
+/**
+ * Facade for MetaMapLite (https://metamap.nlm.nih.gov/MetaMapLite.shtml).
+ * Requires an UMLS license.
+ *
+ * @author Michel Oleynik
+ * @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/metamap/MetaMapLiteFacade.java
+ */
+public class MetaMapLiteFacade implements ConceptMapper {
+
+ private static final Logger LOG = LogManager.getLogger();
+
+ private static MetaMapLiteFacade instance = null;
+ private static Properties properties;
+ private MetaMapLite metaMapLiteInst;
+
+ private MetaMapLiteFacade() {
+ LOG.info("Building MetaMap instance...");
+
+ initProperties();
+
+ try {
+ metaMapLiteInst = new MetaMapLite(properties);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ LOG.info("Building MetaMap instance finished.");
+ }
+
+ public static MetaMapLiteFacade getInstance() {
+ if (instance == null) {
+ instance = new MetaMapLiteFacade();
+ }
+ return instance;
+ }
+
+ private static void initProperties() {
+ properties = MetaMapLite.getDefaultConfiguration();
+
+ String configPropertyFilename = System.getProperty("metamaplite.property.file",
+ MetaMapLiteFacade.class.getResource("/metamaplite.properties").getFile());
+
+ try {
+ properties.load(new FileReader(configPropertyFilename));
+ } catch (IOException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ MetaMapLite.expandModelsDir(properties);
+ MetaMapLite.expandIndexDir(properties);
+ }
+
+ public static boolean isModelsDirValid() {
+ initProperties();
+ return (new File(properties.getProperty("opennlp.models.directory"))).canRead();
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#map(java.lang.String)
+ */
+ public List map(String text) {
+ List ret = new ArrayList();
+
+ List entityList = process(text);
+
+ for (Entity entity : entityList) {
+ // TODO Should submatches be skipped as in annotate()?
+ for (Ev ev : entity.getEvSet()) {
+ ret.add(ev.getConceptInfo().getCUI());
+ LOG.trace(ev);
+ }
+ }
+
+ return ret;
+ }
+
+ private List process(String text) {
+ int length = text.length();
+ LOG.debug("Processing \"{}\"...", text.substring(0, Math.min(length, 20)));
+
+ long start = System.currentTimeMillis();
+
+ BioCDocument document = FreeText.instantiateBioCDocument(text);
+ document.setID("1");
+ List documentList = new ArrayList();
+ documentList.add(document);
+
+ List entityList = null;
+ try {
+ entityList = metaMapLiteInst.processDocumentList(documentList);
+ } catch (Exception e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+
+ long end = System.currentTimeMillis();
+
+ float duration = (end - start + 1) / 1000f;
+
+ LOG.debug("Processed {} chars in {} sec ({} chars/sec).", length, duration, length / duration);
+
+ return entityList;
+ }
+
+ /*
+ * (non-Javadoc)
+ * @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#annotate(java.lang.String)
+ */
+ public String annotate(String text) {
+ List entityList = process(text);
+
+ int length = text.length();
+
+ StringBuilder sb = new StringBuilder(length);
+
+ int i = 0;
+ for (Entity entity : entityList) {
+ int start = entity.getStart();
+
+ // Skip submatches
+ if (start < i) {
+ continue;
+ }
+
+ String matched = entity.getMatchedText();
+
+ sb.append(text, i, start);
+ sb.append("<");
+ sb.append(matched);
+ sb.append("|");
+
+ for (Ev ev : entity.getEvSet()) {
+ ConceptInfo conceptInfo = ev.getConceptInfo();
+ sb.append(conceptInfo.getCUI());
+ sb.append(":");
+ sb.append(conceptInfo.getPreferredName());
+ sb.append("|");
+ }
+ sb.append(">");
+
+ i = entity.getStart() + entity.getLength();
+ }
+
+ sb.append(text, i, length);
+
+ return sb.toString();
+ }
+
+}
\ No newline at end of file
diff --git a/src/main/resources/metamaplite.properties b/src/main/resources/metamaplite.properties
new file mode 100644
index 0000000..f6945dc
--- /dev/null
+++ b/src/main/resources/metamaplite.properties
@@ -0,0 +1,8 @@
+opennlp.models.directory=data/models
+metamaplite.index.directory=data/ivf/strict
+metamaplite.excluded.termsfile=data/specialterms.txt
+metamaplite.segmentation.method=BLANKLINES
+metamaplite.sourceset=all
+metamaplite.semanticgroup=all
+#metamaplite.sourceset = SNOMEDCT_US
+#metamaplite.semanticgroup = neop
\ No newline at end of file
diff --git a/src/test/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacadeTest.java b/src/test/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacadeTest.java
new file mode 100644
index 0000000..eda65f5
--- /dev/null
+++ b/src/test/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacadeTest.java
@@ -0,0 +1,79 @@
+package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;
+
+import org.junit.Assume;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import static org.junit.Assert.assertEquals;
+
+public class MetaMapLiteFacadeTest {
+
+ private static final String BREAST_CANCER = "The patient has breast cancer.";
+
+ @Before
+ public void setUp() {
+ Assume.assumeTrue(MetaMapLiteFacade.isModelsDirValid());
+ }
+
+ @Test
+ public void testMap() {
+ MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance();
+
+ List expected = new ArrayList();
+ expected.add("C0030705"); // Patients
+ expected.add("C0006142"); // Malignant neoplasm of breast
+ expected.add("C0678222"); // Breast Carcinoma
+ List actual = mm.map(BREAST_CANCER);
+
+ assertEquals(expected, actual);
+ }
+
+ @Test
+ public void testUniqueMap() {
+ final String doubledText = BREAST_CANCER + ". " + BREAST_CANCER;
+ MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance();
+
+ List expectedList = new ArrayList();
+ // Expects doubled CUIs
+ expectedList.add("C0030705"); // Patients
+ expectedList.add("C0006142"); // Malignant neoplasm of breast
+ expectedList.add("C0678222"); // Breast Carcinoma
+ expectedList.add("C0030705"); // Patients
+ expectedList.add("C0006142"); // Malignant neoplasm of breast
+ expectedList.add("C0678222"); // Breast Carcinoma
+ List actualList = mm.map(doubledText);
+ assertEquals(expectedList, actualList);
+
+ Set expectedSet = new HashSet();
+ expectedSet.add("C0006142"); // Malignant neoplasm of breast
+ expectedSet.add("C0678222"); // Breast Carcinoma
+ expectedSet.add("C0030705"); // Patients
+ Set actualSet = mm.uniqueMap(doubledText);
+ assertEquals(expectedSet, actualSet);
+ }
+
+ @Test
+ public void testAnnotate() {
+ MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance();
+
+ // Basic test
+ String actual = mm.annotate(BREAST_CANCER);
+ String expected = "The has .";
+ assertEquals(expected, actual);
+
+ // Submatches
+ actual = mm.annotate("History of present illness");
+ expected = "";
+ assertEquals(expected, actual);
+
+ // Double spacing
+ actual = mm.annotate("headache. headache.");
+ expected = ". .";
+ assertEquals(expected, actual);
+ }
+}
\ No newline at end of file