Skip to content

Commit

Permalink
Merge branch 'master' of github.com:bst-mug/trec-pm
Browse files Browse the repository at this point in the history
  • Loading branch information
michelole committed Jan 25, 2019
2 parents 9ffe854 + 7a17655 commit ae81d97
Show file tree
Hide file tree
Showing 16 changed files with 402 additions and 79 deletions.
8 changes: 4 additions & 4 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
language: java
jdk:
- oraclejdk8
- oraclejdk9
- oraclejdk10
- oraclejdk11
- openjdk8
- openjdk9
- openjdk10
- openjdk11
script: mvn test -Delastic.hostname=$ELASTIC_HOSTNAME -Dlexigram.apikey=$LEXIGRAM_APIKEY -B
after_success:
- mvn -Delastic.hostname=$ELASTIC_HOSTNAME -Dlexigram.apikey=$LEXIGRAM_APIKEY clean test jacoco:report coveralls:report
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,11 @@
<move file="${project.build.directory}/lib/trec_eval-8c56e931127d53cff52945ca7d07c5270c190da1/trec_eval"
tofile="${project.build.directory}/lib/trec_eval"/>

<get src="https://trec.nist.gov/data/clinical/sample_eval.pl"
<get src="https://web.archive.org/web/20170514010604if_/http://trec.nist.gov:80/data/clinical/sample_eval.pl"
dest="${project.build.directory}/lib/sample_eval.pl"
verbose="false" usetimestamp="true"/>

<get src="ftp://ftp.ncbi.nlm.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz"
<get src="http://mirrors.vbi.vt.edu/mirrors/ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz"
dest="${basedir}/src/main/resources/genes/Homo_sapiens.gene_info.gz"
verbose="false" usetimestamp="true"/>
<gunzip src="${basedir}/src/main/resources/genes/Homo_sapiens.gene_info.gz"/>
Expand Down
68 changes: 4 additions & 64 deletions src/main/java/at/medunigraz/imi/bst/lexigram/Lexigram.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package at.medunigraz.imi.bst.lexigram;

import at.medunigraz.imi.bst.config.TrecConfig;
import at.medunigraz.imi.bst.trec.expansion.CachedWebRequester;
import at.medunigraz.imi.bst.trec.model.Topic;
import at.medunigraz.imi.bst.trec.model.TopicSet;
import at.medunigraz.imi.bst.trec.stats.CSVStatsWriter;
Expand Down Expand Up @@ -28,6 +29,8 @@ public class Lexigram {

private static final String ENDPOINT = "https://api.lexigram.io/v1/lexigraph/";

private static final CachedWebRequester REQUESTER = new CachedWebRequester("cache/lexigramV2.ser");

private static final List<String> NOISE = new ArrayList<>();
static {
NOISE.add("classification");
Expand All @@ -39,37 +42,6 @@ public class Lexigram {
NOISE.add("ca - ");
}

private static class Cache {
private static final String FILENAME = "cache/lexigram.ser";
private static HashMap<String, String> CALLS = new HashMap<>();
static {
if (Files.exists(Paths.get(FILENAME))) {
load();
}
}

private static void load() {
try {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(FILENAME));
CALLS = (HashMap) ois.readObject();
ois.close();
} catch (Exception e) {
throw new RuntimeException(e);
}
}

private static void save() {
try
{
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(FILENAME));
oos.writeObject(CALLS);
oos.close();
} catch(IOException e) {
throw new RuntimeException(e);
}
}
}

public static boolean isAPIKeyLoaded() {
final int MIN_API_KEY_LENGTH = 20;
return TrecConfig.LEXIGRAM_APIKEY.length() > MIN_API_KEY_LENGTH;
Expand Down Expand Up @@ -231,39 +203,7 @@ private static String cleanUpString(String label) {
}

private static JSONObject get(String url) {
if (!Cache.CALLS.containsKey(url)) {
HttpResponse<JsonNode> response = null;
try {
response = Unirest.get(url)
.header("authorization", "Bearer " + TrecConfig.LEXIGRAM_APIKEY)
.asJson();
} catch (UnirestException e) {
throw new RuntimeException(e);
}

if (response.getStatus() == 401) {
throw new RuntimeException("Unauthorized access to Lexigram API. Place your key in the file trec-pm.properties.");
}

if (response.getStatus() != 200) {
throw new RuntimeException("Got status code " + response.getStatus() + " from Lexigram API with body " + response.getBody());
}

JSONObject body = new JSONObject(response.getBody());

String firstArrayObject = "";
try {
firstArrayObject = body.getJSONObject("object").toString();
} catch (JSONException e) {
LOG.error("Unexpected response from Lexigram API: " + body);
throw e;
}

Cache.CALLS.put(url, firstArrayObject);
Cache.save();
}

return new JSONObject(Cache.CALLS.get(url));
return new JSONObject(REQUESTER.get(url, TrecConfig.LEXIGRAM_APIKEY));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ public static void main(String[] args) {
.withWordRemoval();
}

TreeMap<Double, String> resultsCombinationKeywords = runExperiments(builder.build());
//TreeMap<Double, String> resultsCombinationKeywords = runExperiments(builder.build());
}

private static TreeMap<Double, String> runExperiments(Set<Experiment> experiments) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package at.medunigraz.imi.bst.trec.expansion;

import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import com.mashape.unirest.http.exceptions.UnirestException;

import java.io.*;
import java.util.HashMap;

public class CachedWebRequester {

private File cacheFile;

private HashMap<String, String> cache = new HashMap<>();

public CachedWebRequester(String filename) {
this.cacheFile = new File(filename);

if (cacheFile.exists()) {
try {
cache = load(cacheFile);
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
}

/**
* Make a web request to a given url and cache results.
*
* @param url
* @return
*/
public String get(String url) {
if (!cache.containsKey(url)) {
String data = getResource(url);
put(url, data);
}
return cache.get(url);
}

/**
* Make a web request to a given restricted url and cache results.
*
* @param url
* @param bearer The bearer token.
* @return
*/
public String get(String url, String bearer) {
if (!cache.containsKey(url)) {
String data = getRestrictedResource(url, bearer);
put(url, data);
}
return cache.get(url);
}

private HashMap<String, String> load(File file) throws IOException, ClassNotFoundException {
ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file));
HashMap<String, String> ret = (HashMap) ois.readObject();
ois.close();
return ret;
}

private void save(Object object, File file) throws IOException {
ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(file));
oos.writeObject(object);
oos.close();
}

private void put(String url, String data) {
cache.put(url, data);

// Try to persist cache on disk
try {
save(cache, cacheFile);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private String getRestrictedResource(String url, String bearer) {
HttpResponse<JsonNode> response = null;
try {
response = Unirest.get(url)
.header("authorization", String.format("Bearer %s", bearer))
.asJson();
} catch (UnirestException e) {
throw new RuntimeException(e);
}

return parseResponse(response);
}

private String getResource(String url) {
HttpResponse<JsonNode> response = null;
try {
response = Unirest.get(url).asJson();
} catch (UnirestException e) {
throw new RuntimeException(e);
}

return parseResponse(response);
}

private String parseResponse(HttpResponse<JsonNode> response) {
if (response.getStatus() == 401) {
throw new RuntimeException("Unauthorized access to API. Check your keys in the file trec-pm.properties.");
}

if (response.getStatus() != 200) {
throw new RuntimeException("Got status code " + response.getStatus() + " from API with body " + response.getBody());
}

return response.getBody().toString();
}
}
122 changes: 122 additions & 0 deletions src/main/java/at/medunigraz/imi/bst/trec/expansion/DGIdb.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package at.medunigraz.imi.bst.trec.expansion;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;

import java.util.*;

public class DGIdb {

private static final Logger LOG = LogManager.getLogger();

private static final String ENDPOINT = "http://dgidb.org/api/v2/interactions.json";

private static final int DEFAULT_MINIMAL_SCORE = 0;

private static final boolean DEFAULT_EXPERT_CURATED_ONLY = false;

private static final CachedWebRequester REQUESTER = new CachedWebRequester("cache/dgidb.ser");

public Set<String> getDrugInteractions(String gene) {
return getDrugInteractions(gene, DEFAULT_EXPERT_CURATED_ONLY, DEFAULT_MINIMAL_SCORE);
}

public Set<String> getDrugInteractions(String gene, boolean expertCuratedOnly) {
return getDrugInteractions(gene, expertCuratedOnly, DEFAULT_MINIMAL_SCORE);
}

/**
* Get a list of known drug interactions for a given gene.
*
* @param gene The gene to query.
* @param expertCuratedOnly Whether the results should be restricted by `source_trust_levels`.
* @param minimalScore A minimal score, as given by the DGIdb.
* @return A set of drug interactions, sorted by decreasing score.
*/
public Set<String> getDrugInteractions(String gene, boolean expertCuratedOnly, int minimalScore) {
Map<Integer, Map<String, Set<String>>> data = getData(gene, expertCuratedOnly);

Set<String> ret = new LinkedHashSet<>();
data.entrySet().stream()
.filter(e -> e.getKey() >= minimalScore)
.sorted(Map.Entry.comparingByKey(Comparator.reverseOrder()))
.forEach(e -> ret.addAll(e.getValue().keySet()));

return ret;
}

public Set<String> getPublications(String gene) {
return getPublications(gene, DEFAULT_EXPERT_CURATED_ONLY, DEFAULT_MINIMAL_SCORE);
}

public Set<String> getPublications(String gene, boolean expertCuratedOnly) {
return getPublications(gene, expertCuratedOnly, DEFAULT_MINIMAL_SCORE);
}

/**
* Get a list of PubMed IDs backing drug interaction claims.
*
* @param gene The gene to query.
* @param expertCuratedOnly Whether the results should be restricted by `source_trust_levels`.
* @param minimalScore A minimal score, as given by the DGIdb.
* @return A set of PubMed IDs, sorted by score.
*/
public Set<String> getPublications(String gene, boolean expertCuratedOnly, int minimalScore) {
Map<Integer, Map<String, Set<String>>> data = getData(gene, expertCuratedOnly);

Set<String> ret = new LinkedHashSet<>();
data.entrySet().stream()
.filter(e -> e.getKey() >= minimalScore)
.sorted(Map.Entry.comparingByKey(Comparator.reverseOrder()))
.forEach(e -> e.getValue().forEach((k, v) -> ret.addAll(v)));

return ret;
}

/**
*
* @param gene
* @param expertCuratedOnly
* @return A two-level map `score -> (drugName -> pmids)`
*/
private Map<Integer, Map<String, Set<String>>> getData(String gene, boolean expertCuratedOnly) {
// TODO EML4-ALK must split
// TODO check any unwanted gene (e.g. coming from prepositions)

String url = String.format(ENDPOINT + "?genes=%s", gene);
url = expertCuratedOnly ? url + "&source_trust_levels=Expert%20curated" : url;

JSONObject data = new JSONObject(REQUESTER.get(url));

Map<Integer, Map<String, Set<String>>> ret = new TreeMap<>();

JSONArray matchedTerms = data.getJSONArray("matchedTerms");
for (Object term : matchedTerms) {
JSONArray interactions = ((JSONObject) term).getJSONArray("interactions");
for (int i = 0; i < interactions.length(); i++) {
JSONObject interaction = (JSONObject) interactions.get(i);

int score = interaction.getInt("score");
String drugName = interaction.getString("drugName").toLowerCase();

Set<String> pmids = new LinkedHashSet<>();
interaction.getJSONArray("pmids").forEach(e -> pmids.add(e.toString()));

if (!ret.containsKey(score)) {
ret.put(score, new TreeMap<>());
}

// The map might already contain an interaction for a given gene if there are multiple matched terms.
Map<String, Set<String>> interactionsByScore = ret.get(score);
if (!interactionsByScore.containsKey(drugName)) {
interactionsByScore.put(drugName, new LinkedHashSet<>());
}
interactionsByScore.get(drugName).addAll(pmids);
}
}

return ret;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ public ExperimentsBuilder withGeneFamily() {
return this;
}

public ExperimentsBuilder withDrugInteraction() {
Query previousDecorator = buildingExp.getDecorator();
buildingExp.setDecorator(new DrugInteractionQueryDecorator(previousDecorator));
return this;
}

public ExperimentsBuilder withGoldStandard(Experiment.GoldStandard gold) {
buildingExp.setGoldStandard(gold);
return this;
Expand Down
Loading

0 comments on commit ae81d97

Please sign in to comment.