Skip to content

Commit

Permalink
Add MetaMapLite support (fixes bst-mug#2)
Browse files Browse the repository at this point in the history
MetaMapLite (https://metamap.nlm.nih.gov/MetaMapLite.shtml) is not FOSS, but a free UMLS license can be obtained on the website https://utslogin.nlm.nih.gov/cas/login.

The current pom.xml points to a private Maven repository with pre-compiled JARs, but one can use their local repository using the MetaMapLite instructions for "Using Maven" (https://metamap.nlm.nih.gov/Docs/README_MetaMapLite_3.1.html).

OpenNLP models, the index directory and the termsfile required by MetaMapLite are also available on the MetaMapLite distribution and should be set on this project's metamaplite.properties.
  • Loading branch information
michelole committed Apr 19, 2018
1 parent 2b71284 commit ed07e87
Show file tree
Hide file tree
Showing 6 changed files with 317 additions and 10 deletions.
25 changes: 25 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,16 @@
<!-- END added for deeplearning4j -->
<!-- ............................ -->
</plugins>

<extensions>
<!-- Required for metamaplite, non-FOSS, deployed on a private Maven repository -->
<extension>
<groupId>org.springframework.build</groupId>
<artifactId>aws-maven</artifactId>
<version>5.0.0.RELEASE</version>
</extension>
</extensions>

</build>

<!-- BEGIN added for deeplearning4j -->
Expand Down Expand Up @@ -387,5 +397,20 @@
<!--<artifactId>snowball-stemmer</artifactId>-->
<!--<version>1.3.0.581.1</version>-->
<!--</dependency>-->
<dependency>
<!-- MetaMapLite is not FOSS -->
<groupId>gov.nih.nlm.nls</groupId>
<artifactId>metamaplite</artifactId>
<version>3.1-SNAPSHOT</version>
</dependency>
</dependencies>

<repositories>
<!-- Private Maven repository used to host MetaMapLite -->
<repository>
<id>maven.imi.medunigraz.at</id>
<name>S3 Maven Repository</name>
<url>s3://maven.imi.medunigraz.at/release</url>
</repository>
</repositories>
</project>
26 changes: 16 additions & 10 deletions src/main/java/at/medunigraz/imi/bst/n2c2/model/Patient.java
Original file line number Diff line number Diff line change
@@ -1,18 +1,12 @@
package at.medunigraz.imi.bst.n2c2.model;


import at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper.MetaMapLiteFacade;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.time.Instant;
import java.time.LocalDate;
import java.time.Period;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.time.*;
import java.util.*;


public class Patient {
Expand Down Expand Up @@ -47,6 +41,18 @@ public String getText() {
public Eligibility getEligibility(Criterion criterion) {
return criteria.get(criterion);
}

public List<String> getCUIs() {
return MetaMapLiteFacade.getInstance().map(getText());
}

public Set<String> getUniqueCUIs() {
return MetaMapLiteFacade.getInstance().uniqueMap(getText());
}

public String getAnnotatedText() {
return MetaMapLiteFacade.getInstance().annotate(getText());
}

/**
* getAllVisits() returns all the visits of one patient as
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
* @author Michel Oleynik <[email protected]>
* @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/ConceptMapper.java
*/
public interface ConceptMapper {
List<String> map(String text);

String annotate(String text);

default Set<String> uniqueMap(String text) {
return new HashSet<String>(map(text));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;

import bioc.BioCDocument;
import gov.nih.nlm.nls.metamap.document.FreeText;
import gov.nih.nlm.nls.metamap.lite.types.ConceptInfo;
import gov.nih.nlm.nls.metamap.lite.types.Entity;
import gov.nih.nlm.nls.metamap.lite.types.Ev;
import gov.nih.nlm.nls.ner.MetaMapLite;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

/**
* Facade for MetaMapLite (https://metamap.nlm.nih.gov/MetaMapLite.shtml).
* Requires an UMLS license.
*
* @author Michel Oleynik <[email protected]>
* @link https://github.com/michelole/reassess/blob/master/src/main/java/at/medunigraz/imi/reassess/conceptmapper/metamap/MetaMapLiteFacade.java
*/
public class MetaMapLiteFacade implements ConceptMapper {

private static final Logger LOG = LogManager.getLogger();

private static MetaMapLiteFacade instance = null;
private static Properties properties;
private MetaMapLite metaMapLiteInst;

private MetaMapLiteFacade() {
LOG.info("Building MetaMap instance...");

initProperties();

try {
metaMapLiteInst = new MetaMapLite(properties);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

LOG.info("Building MetaMap instance finished.");
}

public static MetaMapLiteFacade getInstance() {
if (instance == null) {
instance = new MetaMapLiteFacade();
}
return instance;
}

private static void initProperties() {
properties = MetaMapLite.getDefaultConfiguration();

String configPropertyFilename = System.getProperty("metamaplite.property.file",
MetaMapLiteFacade.class.getResource("/metamaplite.properties").getFile());

try {
properties.load(new FileReader(configPropertyFilename));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

MetaMapLite.expandModelsDir(properties);
MetaMapLite.expandIndexDir(properties);
}

public static boolean isModelsDirValid() {
initProperties();
return (new File(properties.getProperty("opennlp.models.directory"))).canRead();
}

/*
* (non-Javadoc)
* @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#map(java.lang.String)
*/
public List<String> map(String text) {
List<String> ret = new ArrayList<String>();

List<Entity> entityList = process(text);

for (Entity entity : entityList) {
// TODO Should submatches be skipped as in annotate()?
for (Ev ev : entity.getEvSet()) {
ret.add(ev.getConceptInfo().getCUI());
LOG.trace(ev);
}
}

return ret;
}

private List<Entity> process(String text) {
int length = text.length();
LOG.debug("Processing \"{}\"...", text.substring(0, Math.min(length, 20)));

long start = System.currentTimeMillis();

BioCDocument document = FreeText.instantiateBioCDocument(text);
document.setID("1");
List<BioCDocument> documentList = new ArrayList<BioCDocument>();
documentList.add(document);

List<Entity> entityList = null;
try {
entityList = metaMapLiteInst.processDocumentList(documentList);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

long end = System.currentTimeMillis();

float duration = (end - start + 1) / 1000f;

LOG.debug("Processed {} chars in {} sec ({} chars/sec).", length, duration, length / duration);

return entityList;
}

/*
* (non-Javadoc)
* @see at.medunigraz.imi.reassess.conceptmapper.ConceptMapper#annotate(java.lang.String)
*/
public String annotate(String text) {
List<Entity> entityList = process(text);

int length = text.length();

StringBuilder sb = new StringBuilder(length);

int i = 0;
for (Entity entity : entityList) {
int start = entity.getStart();

// Skip submatches
if (start < i) {
continue;
}

String matched = entity.getMatchedText();

sb.append(text, i, start);
sb.append("<");
sb.append(matched);
sb.append("|");

for (Ev ev : entity.getEvSet()) {
ConceptInfo conceptInfo = ev.getConceptInfo();
sb.append(conceptInfo.getCUI());
sb.append(":");
sb.append(conceptInfo.getPreferredName());
sb.append("|");
}
sb.append(">");

i = entity.getStart() + entity.getLength();
}

sb.append(text, i, length);

return sb.toString();
}

}
8 changes: 8 additions & 0 deletions src/main/resources/metamaplite.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
opennlp.models.directory=data/models
metamaplite.index.directory=data/ivf/strict
metamaplite.excluded.termsfile=data/specialterms.txt
metamaplite.segmentation.method=BLANKLINES
metamaplite.sourceset=all
metamaplite.semanticgroup=all
#metamaplite.sourceset = SNOMEDCT_US
#metamaplite.semanticgroup = neop
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package at.medunigraz.imi.bst.n2c2.preprocess.conceptmapper;

import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.junit.Assert.assertEquals;

public class MetaMapLiteFacadeTest {

private static final String BREAST_CANCER = "The patient has breast cancer.";

@Before
public void setUp() {
Assume.assumeTrue(MetaMapLiteFacade.isModelsDirValid());
}

@Test
public void testMap() {
MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance();

List<String> expected = new ArrayList<String>();
expected.add("C0030705"); // Patients
expected.add("C0006142"); // Malignant neoplasm of breast
expected.add("C0678222"); // Breast Carcinoma
List<String> actual = mm.map(BREAST_CANCER);

assertEquals(expected, actual);
}

@Test
public void testUniqueMap() {
final String doubledText = BREAST_CANCER + ". " + BREAST_CANCER;
MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance();

List<String> expectedList = new ArrayList<String>();
// Expects doubled CUIs
expectedList.add("C0030705"); // Patients
expectedList.add("C0006142"); // Malignant neoplasm of breast
expectedList.add("C0678222"); // Breast Carcinoma
expectedList.add("C0030705"); // Patients
expectedList.add("C0006142"); // Malignant neoplasm of breast
expectedList.add("C0678222"); // Breast Carcinoma
List<String> actualList = mm.map(doubledText);
assertEquals(expectedList, actualList);

Set<String> expectedSet = new HashSet<String>();
expectedSet.add("C0006142"); // Malignant neoplasm of breast
expectedSet.add("C0678222"); // Breast Carcinoma
expectedSet.add("C0030705"); // Patients
Set<String> actualSet = mm.uniqueMap(doubledText);
assertEquals(expectedSet, actualSet);
}

@Test
public void testAnnotate() {
MetaMapLiteFacade mm = MetaMapLiteFacade.getInstance();

// Basic test
String actual = mm.annotate(BREAST_CANCER);
String expected = "The <patient|C0030705:Patients|> has <breast cancer|C0006142:Malignant neoplasm of breast|C0678222:Breast Carcinoma|>.";
assertEquals(expected, actual);

// Submatches
actual = mm.annotate("History of present illness");
expected = "<History of present illness|C0262512:History of present illness|C0488508:History of present illness:Finding:Point in time:^Patient:Nominal:Reported|>";
assertEquals(expected, actual);

// Double spacing
actual = mm.annotate("headache. headache.");
expected = "<headache|C0018681:Headache|C2096315:ENT surgical result nose headache|>. <headache|C2096315:ENT surgical result nose headache|C0018681:Headache|>.";
assertEquals(expected, actual);
}
}

0 comments on commit ed07e87

Please sign in to comment.