diff --git a/pom.xml b/pom.xml index ecf1f59..cddd31f 100644 --- a/pom.xml +++ b/pom.xml @@ -220,6 +220,16 @@ + + + + + org.springframework.build + aws-maven + 5.0.0.RELEASE + + + @@ -387,5 +397,20 @@ + + + gov.nih.nlm.nls + metamaplite + 3.1-SNAPSHOT + + + + + + maven.imi.medunigraz.at + S3 Maven Repository + s3://maven.imi.medunigraz.at/release + + diff --git a/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java b/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java index f97541f..6e13616 100644 --- a/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java +++ b/src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java @@ -1,18 +1,12 @@ package at.medunigraz.imi.bst.n2c2.model; +import at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper.MetaMapLiteFacade; + import java.text.ParseException; import java.text.SimpleDateFormat; -import java.time.Instant; -import java.time.LocalDate; -import java.time.Period; -import java.time.ZoneId; -import java.time.ZonedDateTime; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.HashMap; -import java.util.Map; +import java.time.*; +import java.util.*; public class Patient { @@ -47,6 +41,18 @@ public String getText() { public Eligibility getEligibility(Criterion criterion) { return criteria.get(criterion); } + + public List getCUIs() { + return MetaMapLiteFacade.getInstance().map(getText()); + } + + public Set getUniqueCUIs() { + return MetaMapLiteFacade.getInstance().uniqueMap(getText()); + } + + public String getAnnotatedText() { + return MetaMapLiteFacade.getInstance().annotate(getText()); + } /** * getAllVisits() returns all the visits of one patient as diff --git a/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/ConceptMapper.java b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/ConceptMapper.java new file mode 100644 index 0000000..6c3734e --- /dev/null +++ b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/ConceptMapper.java @@ -0,0 +1,19 @@ +package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * @author Michel Oleynik + * @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/ConceptMapper.java + */ +public interface ConceptMapper { + List map(String text); + + String annotate(String text); + + default Set uniqueMap(String text) { + return new HashSet(map(text)); + } +} diff --git a/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacade.java b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacade.java new file mode 100644 index 0000000..34d4d83 --- /dev/null +++ b/src/main/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacade.java @@ -0,0 +1,170 @@ +package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper; + +import bioc.BioCDocument; +import gov.nih.nlm.nls.metamap.document.FreeText; +import gov.nih.nlm.nls.metamap.lite.types.ConceptInfo; +import gov.nih.nlm.nls.metamap.lite.types.Entity; +import gov.nih.nlm.nls.metamap.lite.types.Ev; +import gov.nih.nlm.nls.ner.MetaMapLite; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; + +/** + * Facade for MetaMapLite (https://metamap.nlm.nih.gov/MetaMapLite.shtml). + * Requires an UMLS license. + * + * @author Michel Oleynik + * @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/metamap/MetaMapLiteFacade.java + */ +public class MetaMapLiteFacade implements ConceptMapper { + + private static final Logger LOG = LogManager.getLogger(); + + private static MetaMapLiteFacade instance = null; + private static Properties properties; + private MetaMapLite metaMapLiteInst; + + private MetaMapLiteFacade() { + LOG.info("Building MetaMap instance..."); + + initProperties(); + + try { + metaMapLiteInst = new MetaMapLite(properties); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + LOG.info("Building MetaMap instance finished."); + } + + public static MetaMapLiteFacade getInstance() { + if (instance == null) { + instance = new MetaMapLiteFacade(); + } + return instance; + } + + private static void initProperties() { + properties = MetaMapLite.getDefaultConfiguration(); + + String configPropertyFilename = System.getProperty("metamaplite.property.file", + MetaMapLiteFacade.class.getResource("/metamaplite.properties").getFile()); + + try { + properties.load(new FileReader(configPropertyFilename)); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + MetaMapLite.expandModelsDir(properties); + MetaMapLite.expandIndexDir(properties); + } + + public static boolean isModelsDirValid() { + initProperties(); + return (new File(properties.getProperty("opennlp.models.directory"))).canRead(); + } + + /* + * (non-Javadoc) + * @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#map(java.lang.String) + */ + public List map(String text) { + List ret = new ArrayList(); + + List entityList = process(text); + + for (Entity entity : entityList) { + // TODO Should submatches be skipped as in annotate()? + for (Ev ev : entity.getEvSet()) { + ret.add(ev.getConceptInfo().getCUI()); + LOG.trace(ev); + } + } + + return ret; + } + + private List process(String text) { + int length = text.length(); + LOG.debug("Processing \"{}\"...", text.substring(0, Math.min(length, 20))); + + long start = System.currentTimeMillis(); + + BioCDocument document = FreeText.instantiateBioCDocument(text); + document.setID("1"); + List documentList = new ArrayList(); + documentList.add(document); + + List entityList = null; + try { + entityList = metaMapLiteInst.processDocumentList(documentList); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + long end = System.currentTimeMillis(); + + float duration = (end - start + 1) / 1000f; + + LOG.debug("Processed {} chars in {} sec ({} chars/sec).", length, duration, length / duration); + + return entityList; + } + + /* + * (non-Javadoc) + * @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#annotate(java.lang.String) + */ + public String annotate(String text) { + List entityList = process(text); + + int length = text.length(); + + StringBuilder sb = new StringBuilder(length); + + int i = 0; + for (Entity entity : entityList) { + int start = entity.getStart(); + + // Skip submatches + if (start < i) { + continue; + } + + String matched = entity.getMatchedText(); + + sb.append(text, i, start); + sb.append("<"); + sb.append(matched); + sb.append("|"); + + for (Ev ev : entity.getEvSet()) { + ConceptInfo conceptInfo = ev.getConceptInfo(); + sb.append(conceptInfo.getCUI()); + sb.append(":"); + sb.append(conceptInfo.getPreferredName()); + sb.append("|"); + } + sb.append(">"); + + i = entity.getStart() + entity.getLength(); + } + + sb.append(text, i, length); + + return sb.toString(); + } + +} \ No newline at end of file diff --git a/src/main/resources/metamaplite.properties b/src/main/resources/metamaplite.properties new file mode 100644 index 0000000..f6945dc --- /dev/null +++ b/src/main/resources/metamaplite.properties @@ -0,0 +1,8 @@ +opennlp.models.directory=data/models +metamaplite.index.directory=data/ivf/strict +metamaplite.excluded.termsfile=data/specialterms.txt +metamaplite.segmentation.method=BLANKLINES +metamaplite.sourceset=all +metamaplite.semanticgroup=all +#metamaplite.sourceset = SNOMEDCT_US +#metamaplite.semanticgroup = neop \ No newline at end of file diff --git a/src/test/java/at/medunigraz/imi/bst/n2c2/config/ConfigTest.java b/src/test/java/at/medunigraz/imi/bst/n2c2/config/ConfigTest.java index 078ec15..d2ed242 100644 --- a/src/test/java/at/medunigraz/imi/bst/n2c2/config/ConfigTest.java +++ b/src/test/java/at/medunigraz/imi/bst/n2c2/config/ConfigTest.java @@ -8,6 +8,6 @@ public class ConfigTest { @Test public void getSVMCost() { - assertEquals(1, Config.SVM_COST_MAKES_DECISIONS, 0.00001); + assertEquals("Your config.properties was not properly generated. Running `mvn clean test` may fix it.", 1, Config.SVM_COST_MAKES_DECISIONS, 0.00001); } } \ No newline at end of file diff --git a/src/test/java/at/medunigraz/imi/bst/n2c2/integration/PatientDAOConceptMapperIntegrationTest.java b/src/test/java/at/medunigraz/imi/bst/n2c2/integration/PatientDAOConceptMapperIntegrationTest.java new file mode 100644 index 0000000..95e39fb --- /dev/null +++ b/src/test/java/at/medunigraz/imi/bst/n2c2/integration/PatientDAOConceptMapperIntegrationTest.java @@ -0,0 +1,66 @@ +package at.medunigraz.imi.bst.n2c2.integration; + +import at.medunigraz.imi.bst.n2c2.dao.PatientDAO; +import at.medunigraz.imi.bst.n2c2.model.Patient; +import at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper.MetaMapLiteFacade; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.xml.sax.SAXException; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertTrue; + +@Category(IntegrationTest.class) +public class PatientDAOConceptMapperIntegrationTest { + + private static final File SAMPLE = new File(PatientDAOConceptMapperIntegrationTest.class.getResource("/gold-standard/sample.xml").getPath()); + private Patient patient; + + public PatientDAOConceptMapperIntegrationTest() throws IOException, SAXException { + patient = new PatientDAO().fromXML(SAMPLE); + } + + @Before + public void setUp() { + Assume.assumeTrue(MetaMapLiteFacade.isModelsDirValid()); + } + + @Test + public void getCUIs() { + List expected = new ArrayList<>(); + expected.add("C0043094"); // Weight Gain + expected.add("C0013404"); // Dyspnea + expected.add("C0020580"); // Hypesthesia + + List actual = patient.getCUIs(); + assertTrue(actual.containsAll(expected)); + } + + @Test + public void getUniqueCUIs() { + Set expected = new HashSet<>(); + expected.add("C0043094"); // Weight Gain + expected.add("C0013404"); // Dyspnea + expected.add("C0020580"); // Hypesthesia + + Set actual = patient.getUniqueCUIs(); + assertTrue(actual.containsAll(expected)); + } + + @Test + public void getAnnotatedText() { + String expected = " is concerned about "; + + String actual = patient.getAnnotatedText(); + assertTrue(actual.contains(expected)); + } + +} diff --git a/src/test/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacadeTest.java b/src/test/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacadeTest.java new file mode 100644 index 0000000..b1d7da5 --- /dev/null +++ b/src/test/java/at/medunigraz/imi/bst/n2c2/preprocess/conceptmapper/MetaMapLiteFacadeTest.java @@ -0,0 +1,81 @@ +package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper; + +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +public class MetaMapLiteFacadeTest { + + private static final String BREAST_CANCER = "The patient has breast cancer."; + + @Before + public void setUp() { + Assume.assumeTrue(MetaMapLiteFacade.isModelsDirValid()); + } + + @Test + public void testMap() { + MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance(); + + List expected = new ArrayList(); + expected.add("C0030705"); // Patients + expected.add("C0006142"); // Malignant neoplasm of breast + expected.add("C0678222"); // Breast Carcinoma + List actual = mm.map(BREAST_CANCER); + + assertEquals(expected, actual); + } + + @Test + public void testUniqueMap() { + final String doubledText = BREAST_CANCER + ". " + BREAST_CANCER; + MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance(); + + List expectedList = new ArrayList(); + // Expects doubled CUIs + expectedList.add("C0030705"); // Patients + expectedList.add("C0006142"); // Malignant neoplasm of breast + expectedList.add("C0678222"); // Breast Carcinoma + expectedList.add("C0030705"); // Patients + expectedList.add("C0006142"); // Malignant neoplasm of breast + expectedList.add("C0678222"); // Breast Carcinoma + List actualList = mm.map(doubledText); + assertEquals(expectedList, actualList); + + Set expectedSet = new HashSet(); + expectedSet.add("C0006142"); // Malignant neoplasm of breast + expectedSet.add("C0678222"); // Breast Carcinoma + expectedSet.add("C0030705"); // Patients + Set actualSet = mm.uniqueMap(doubledText); + assertEquals(expectedSet, actualSet); + } + + @Test + public void testAnnotate() { + MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance(); + + // Basic test + String actual = mm.annotate(BREAST_CANCER); + String expected = "The has ."; + assertEquals(expected, actual); + + // Submatches + actual = mm.annotate("History of present illness"); + // TODO debug why expected changed + //expected = ""; + expected = ""; + assertEquals(expected, actual); + + // Double spacing + actual = mm.annotate("headache. headache."); + expected = ". ."; + assertEquals(expected, actual); + } +} \ No newline at end of file