From 764ec9c6b8bd243faaa084f35ac22170988cd944 Mon Sep 17 00:00:00 2001 From: Michel Oleynik Date: Thu, 13 Jul 2017 15:45:33 +0200 Subject: [PATCH] Introduces Experiment class to run experiments This refs #53. --- .../medunigraz/imi/bst/trec/RunnerDemo.java | 79 ++++-------------- .../imi/bst/trec/experiment/Experiment.java | 82 +++++++++++++++++++ 2 files changed, 99 insertions(+), 62 deletions(-) create mode 100644 src/main/java/at/medunigraz/imi/bst/trec/experiment/Experiment.java diff --git a/src/main/java/at/medunigraz/imi/bst/trec/RunnerDemo.java b/src/main/java/at/medunigraz/imi/bst/trec/RunnerDemo.java index c8df826..e1c46a7 100644 --- a/src/main/java/at/medunigraz/imi/bst/trec/RunnerDemo.java +++ b/src/main/java/at/medunigraz/imi/bst/trec/RunnerDemo.java @@ -2,93 +2,48 @@ import java.io.File; import java.util.HashSet; -import java.util.List; import java.util.Set; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -import at.medunigraz.imi.bst.trec.evaluator.TrecEval; -import at.medunigraz.imi.bst.trec.evaluator.TrecWriter; +import at.medunigraz.imi.bst.trec.experiment.Experiment; import at.medunigraz.imi.bst.trec.model.Gene; -import at.medunigraz.imi.bst.trec.model.Result; -import at.medunigraz.imi.bst.trec.model.ResultList; -import at.medunigraz.imi.bst.trec.model.Topic; -import at.medunigraz.imi.bst.trec.model.TopicSet; import at.medunigraz.imi.bst.trec.query.ElasticSearchQuery; -import at.medunigraz.imi.bst.trec.query.GeneExpanderQueryDecorator; import at.medunigraz.imi.bst.trec.query.Query; import at.medunigraz.imi.bst.trec.query.TemplateQueryDecorator; import at.medunigraz.imi.bst.trec.query.WordRemovalQueryDecorator; -import at.medunigraz.imi.bst.trec.stats.CSVStatsWriter; -import at.medunigraz.imi.bst.trec.stats.XMLStatsWriter; public class RunnerDemo { - private static final Logger LOG = LogManager.getLogger(); + public static void main(String[] args) { + Set bestExperiments = new HashSet<>(); - public static void main(String[] args) { - String[] pmRuns = { "example-pmid", "extra-pmid", "topics2017-pmid" }; final File pmTemplate = new File(RunnerDemo.class.getResource("/templates/boost-extra.json").getFile()); Gene.Field[] expandTo = { Gene.Field.SYMBOL, Gene.Field.DESCRIPTION }; Query pmDecorator = new WordRemovalQueryDecorator( new TemplateQueryDecorator(pmTemplate, new ElasticSearchQuery("trec"))); - - for (String id : pmRuns) { - runExperiment(id, pmDecorator); - } + bestExperiments.add(new Experiment().withId("example-pmid").withDecorator(pmDecorator)); + bestExperiments.add(new Experiment().withId("extra-pmid").withDecorator(pmDecorator)); + bestExperiments.add(new Experiment().withId("topics2017-pmid").withDecorator(pmDecorator)); - // TODO DRY Issue #53 - String[] ctRuns = { "extra-ct" }; final File ctTemplate = new File(RunnerDemo.class.getResource("/templates/baseline-ct.json").getFile()); Query ctDecorator = new TemplateQueryDecorator(ctTemplate, new ElasticSearchQuery("clinicaltrials")); - for (String id : ctRuns) { - runExperiment(id, ctDecorator); - } - } - - private static void runExperiment(String id, Query decorator) { - final String collection = id.substring(0, id.indexOf('-')); + bestExperiments.add(new Experiment().withId("extra-ct").withDecorator(ctDecorator)); - LOG.info("Running collection '" + id + "'..."); - File example = new File(CSVStatsWriter.class.getResource("/topics/" + collection + ".xml").getPath()); - TopicSet topicSet = new TopicSet(example); - - File output = new File("results/" + id + ".trec_results"); - TrecWriter tw = new TrecWriter(output); - - // TODO DRY Issue #53 - Set resultListSet = new HashSet<>(); - for (Topic topic : topicSet.getTopics()) { - List results = decorator.query(topic); - - ResultList resultList = new ResultList(topic); - resultList.setResults(results); - resultListSet.add(resultList); + for (Experiment exp : bestExperiments) { + exp.start(); + try { + exp.join(); + } catch (InterruptedException e) { + e.printStackTrace(); + } } - tw.write(resultListSet); - tw.close(); - - File goldStandard = new File( - CSVStatsWriter.class.getResource("/gold-standard/" + id + ".qrels").getPath()); - TrecEval te = new TrecEval(goldStandard, output); - - LOG.debug("NDCG: " + te.getNDCG()); - LOG.trace(te.getMetricsByTopic("all")); - - XMLStatsWriter xsw = new XMLStatsWriter(new File("stats/" + id + ".xml")); - xsw.write(te.getMetrics()); - xsw.close(); - - CSVStatsWriter csw = new CSVStatsWriter(new File("stats/" + id + ".csv")); - csw.write(te.getMetrics()); - csw.close(); - } + for (Experiment exp : bestExperiments) { + } + } } diff --git a/src/main/java/at/medunigraz/imi/bst/trec/experiment/Experiment.java b/src/main/java/at/medunigraz/imi/bst/trec/experiment/Experiment.java new file mode 100644 index 0000000..c8f37f9 --- /dev/null +++ b/src/main/java/at/medunigraz/imi/bst/trec/experiment/Experiment.java @@ -0,0 +1,82 @@ +package at.medunigraz.imi.bst.trec.experiment; + +import java.io.File; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import at.medunigraz.imi.bst.trec.evaluator.TrecEval; +import at.medunigraz.imi.bst.trec.evaluator.TrecWriter; +import at.medunigraz.imi.bst.trec.model.Result; +import at.medunigraz.imi.bst.trec.model.ResultList; +import at.medunigraz.imi.bst.trec.model.Topic; +import at.medunigraz.imi.bst.trec.model.TopicSet; +import at.medunigraz.imi.bst.trec.query.Query; +import at.medunigraz.imi.bst.trec.stats.CSVStatsWriter; +import at.medunigraz.imi.bst.trec.stats.XMLStatsWriter; + +public class Experiment extends Thread { + + private static final Logger LOG = LogManager.getLogger(); + + private String id; + private Query decorator; + + public Experiment() { + } + + public Experiment withId(String id) { + this.id = id; + return this; + } + + public Experiment withDecorator(Query decorator) { + this.decorator = decorator; + return this; + } + + @Override + public void run() { + final String collection = id.substring(0, id.indexOf('-')); + + LOG.info("Running collection '" + id + "'..."); + + File example = new File(CSVStatsWriter.class.getResource("/topics/" + collection + ".xml").getPath()); + TopicSet topicSet = new TopicSet(example); + + File output = new File("results/" + id + ".trec_results"); + TrecWriter tw = new TrecWriter(output); + + // TODO DRY Issue #53 + Set resultListSet = new HashSet<>(); + for (Topic topic : topicSet.getTopics()) { + List results = decorator.query(topic); + + ResultList resultList = new ResultList(topic); + resultList.setResults(results); + resultListSet.add(resultList); + } + + tw.write(resultListSet); + tw.close(); + + File goldStandard = new File(CSVStatsWriter.class.getResource("/gold-standard/" + id + ".qrels").getPath()); + TrecEval te = new TrecEval(goldStandard, output); + + LOG.debug("NDCG: " + te.getNDCG()); + LOG.trace(te.getMetricsByTopic("all")); + + XMLStatsWriter xsw = new XMLStatsWriter(new File("stats/" + id + ".xml")); + xsw.write(te.getMetrics()); + xsw.close(); + + CSVStatsWriter csw = new CSVStatsWriter(new File("stats/" + id + ".csv")); + csw.write(te.getMetrics()); + csw.close(); + + LOG.info("Collection '" + id + "' finished."); + } +}