Skip to content

Commit

Permalink
Adding scripts and code for experiments.
Browse files Browse the repository at this point in the history
  • Loading branch information
khituras committed Jan 28, 2019
1 parent 273703d commit b8469f0
Show file tree
Hide file tree
Showing 65 changed files with 999 additions and 3,288 deletions.
421 changes: 91 additions & 330 deletions notebooks/sigir19/RelevantDocsFoundAnalysis.ipynb

Large diffs are not rendered by default.

9 changes: 9 additions & 0 deletions scripts/runAllPmClassExperimentsLiterature.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

mvn compile
# For the best_fields, the slop does not do anything, but a argument value is expected
sbatch --exclude=h5,h6 scripts/runPmClassExperimentsLiterature.sh best_fields OR
sbatch --exclude=h5,h6 scripts/runPmClassExperimentsLiterature.sh best_fields AND

# Here, the boolean operator has no effect
sbatch --exclude=h5,h6 scripts/runPmClassExperimentsLiterature.sh phrase OR
18 changes: 6 additions & 12 deletions scripts/runAllRecallExperimentsLiterature.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,11 @@

mvn compile
# For the best_fields, the slop does not do anything, but a argument value is expected
sbatch scripts/runRecallExperimentsLiterature.sh best_fields OR false 10
sbatch scripts/runRecallExperimentsLiterature.sh best_fields OR true 10
sbatch scripts/runRecallExperimentsLiterature.sh best_fields AND false 10
sbatch scripts/runRecallExperimentsLiterature.sh best_fields AND true 10
sbatch --exclude=h5,h6 scripts/runRecallExperimentsLiterature.sh best_fields OR 10
sbatch --exclude=h5,h6 scripts/runRecallExperimentsLiterature.sh best_fields AND 10

# Here, the boolean operator has no effect
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR false 10
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR true 10
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR false 5
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR true 5
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR false 3
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR true 3
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR false 2
sbatch scripts/runRecallExperimentsLiterature.sh phrase OR true 2
sbatch --exclude=h5,h6 scripts/runRecallExperimentsLiterature.sh phrase OR 10
#sbatch --exclude=h5,h6 scripts/runRecallExperimentsLiterature.sh phrase OR 5
#sbatch --exclude=h5,h6 scripts/runRecallExperimentsLiterature.sh phrase OR 3
#sbatch --exclude=h5,h6 scripts/runRecallExperimentsLiterature.sh phrase OR 2
9 changes: 9 additions & 0 deletions scripts/runAllTermBoostExperimentsLiterature.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

mvn compile
# For the best_fields, the slop does not do anything, but a argument value is expected
sbatch --exclude=h5,h6 scripts/runTermBoostExperimentsLiterature.sh best_fields OR
sbatch --exclude=h5,h6 scripts/runTermBoostExperimentsLiterature.sh best_fields AND

# Here, the boolean operator has no effect
sbatch --exclude=h5,h6 scripts/runTermBoostExperimentsLiterature.sh phrase OR
6 changes: 6 additions & 0 deletions scripts/runPmClassExperimentsLiterature.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
#SBATCH --cpus-per-task 2
#SBATCH --mem 10G
#SBATCH -J termboostexp

mvn exec:java -Dexec.mainClass=at.medunigraz.imi.bst.trec.SigirPubmedExperimenterPmClass -Dexec.args="$1 $2"
2 changes: 1 addition & 1 deletion scripts/runRecallExperimentsLiterature.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
#SBATCH --mem 10G
#SBATCH -J recallexp

mvn exec:java -Dexec.mainClass=at.medunigraz.imi.bst.trec.SigirPubmedRecallExperimenterDefaultBoosting -Dexec.args="$1 $2 $3 $4"
mvn exec:java -Dexec.mainClass=at.medunigraz.imi.bst.trec.SigirPubmedRecallExperimenterDefaultBoosting -Dexec.args="$1 $2 $3"
6 changes: 6 additions & 0 deletions scripts/runTermBoostExperimentsLiterature.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
#SBATCH --cpus-per-task 5
#SBATCH --mem 10G
#SBATCH -J termboostexp

mvn exec:java -Dexec.mainClass=at.medunigraz.imi.bst.trec.SigirPubmedTermBoostExperimenterDefaultBoosting -Dexec.args="$1 $2"
2 changes: 2 additions & 0 deletions src/main/java/at/medunigraz/imi/bst/trec/SigirParameters.java
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public class SigirParameters {
TREC_2018_HPIPUBNONE.put("non_mel_boost", "1");
TREC_2018_HPIPUBNONE.put("pm_gs_boost", "1");
TREC_2018_HPIPUBNONE.put("dgi_boost", "0");
TREC_2018_HPIPUBNONE.put("mut_boost", "1");

TREC_2018_HPIPUBNONE.put("dis_multi_match_type", BEST_FIELDS);
TREC_2018_HPIPUBNONE.put("dis_prefterm_multi_match_type", BEST_FIELDS);
Expand Down Expand Up @@ -92,6 +93,7 @@ public class SigirParameters {
LITERATURE_ES_DEFAULTS.put("non_mel_boost", "1");
LITERATURE_ES_DEFAULTS.put("pm_gs_boost", "1");
LITERATURE_ES_DEFAULTS.put("dgi_boost", "1");
LITERATURE_ES_DEFAULTS.put("mut_boost", "1");

LITERATURE_ES_DEFAULTS.put("dis_multi_match_type", BEST_FIELDS);
LITERATURE_ES_DEFAULTS.put("dis_prefterm_multi_match_type", BEST_FIELDS);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@
import java.util.*;
import java.util.stream.IntStream;

public class SigirPubmedRecallExperimenterBoostOptimizer extends SuperSigirPubmedRecallExperimenter {
public class SigirPubmedExperimenterBoostOptimizer extends SuperSigirPubmedRecallExperimenter {
public static void main(String[] args) {

Set<String> validParams = new LinkedHashSet<>();
validParams.add("disease");
validParams.add("gene");
validParams.add("genedis");
validParams.add("fields");
validParams.add("posneg");
validParams.add("additional");
validParams.add("extra");
validParams.add("pmgs");
validParams.add("pmclass");
validParams.add("mutation");
validParams.add("drug");

if (args.length != 1 || !validParams.contains(args[0])) {
System.err.println("Usage: " + SigirPubmedRecallExperimenterBoostOptimizer.class.getSimpleName() + " <what>");
System.err.println("Usage: " + SigirPubmedExperimenterBoostOptimizer.class.getSimpleName() + " <what>");
System.err.println("Where <what> is one of " + validParams);
System.exit(1);
}
Expand All @@ -36,48 +36,37 @@ public static void main(String[] args) {


DecimalFormat df = new DecimalFormat("0.0");
if (what.equals("disease")) {
if (what.equals("genedis")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double disb = .4; disb < 2.4; disb += .4) {
for (double ptb = .2; ptb < 1.2; ptb += .4) {
for (double synb = .2; synb < 2; synb += .4) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("dis_boost", String.valueOf(disb));
paramcombination.put("dis_prefterm_boost", String.valueOf(ptb));
paramcombination.put("dis_syn_boost", String.valueOf(synb));
String suffix = "--dis" + df.format(disb) + "-pt" + df.format(ptb) + "-syn" + df.format(synb);
parameters.add(paramcombination);
suffixes.add(suffix);
}
}
}
runExperimentsWithParameters(parameters, suffixes, year, what, goldStandard, target);
} else if (what.equals("gene")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double genb = .4; genb < 2.4; genb += .4) {
for (double descb = .2; descb < 1.8; descb += .4) {
for (double synb = .2; synb < 1.8; synb += .4) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("gene_boost", String.valueOf(genb));
paramcombination.put("gene_desc_boost", String.valueOf(descb));
paramcombination.put("gene_syn_boost", String.valueOf(synb));
String suffix = "--gen" + df.format(genb) + "-desc" + df.format(descb) + "-syn" + df.format(synb);
parameters.add(paramcombination);
suffixes.add(suffix);
for (double genb = 1; genb < 3; genb += .5) {
for (double descb = 1; descb < 3; descb += .5) {
for (double gsynb = 1; gsynb < 3; gsynb += .5) {
for (double disb = 1; disb < 3; disb += .5) {
for (double dsynb = 1; dsynb < 3; gsynb += .5) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("gene_boost", String.valueOf(genb));
paramcombination.put("gene_desc_boost", String.valueOf(descb));
paramcombination.put("gene_syn_boost", String.valueOf(gsynb));
paramcombination.put("dis_boost", String.valueOf(disb));
paramcombination.put("dis_syn_boost", String.valueOf(dsynb));
String suffix = "--gen" + df.format(genb) + "-gdes" + df.format(descb) + "-gsyn" + df.format(gsynb) + "--dis" + df.format(disb) + "-dsyn" + df.format(dsynb);
parameters.add(paramcombination);
suffixes.add(suffix);
}
}
}
}
}
runExperimentsWithParameters(parameters, suffixes, year, what, goldStandard, target);
} else if (what.equals("fields")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double titb = 1; titb < 1.6; titb += .2) {
for (double abstrb = 1; abstrb < 1.6; abstrb += .2) {
for (double kwb = 1; kwb < 1.6; kwb += .2) {
for (double meshb = 1; meshb < 1.6; meshb += .2) {
for (double genesb = 1; genesb < 1.1; genesb += .4) {
for (double titb = 1; titb < 3; titb += .5) {
for (double abstrb = 1; abstrb < 3; abstrb += .5) {
for (double kwb = 1; kwb < 3; kwb += .5) {
for (double meshb = 1; meshb < 3; meshb += .5) {
for (double genesb = 1; genesb < 3; genesb += .5) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("title_boost", "^" + titb);
paramcombination.put("abstract_boost", "^" + abstrb);
Expand All @@ -96,8 +85,8 @@ public static void main(String[] args) {
} else if (what.equals("posneg")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double posb = .7; posb < 1.2; posb += .1) {
for (double negb = -3; negb < .2; negb += .4) {
for (double posb = .5; posb < 3; posb += .5) {
for (double negb = -3; negb <= .5; negb += .5) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("pos_words_boost", String.valueOf(posb));
paramcombination.put("neg_words_boost", String.valueOf(negb));
Expand All @@ -110,47 +99,43 @@ public static void main(String[] args) {
} else if (what.equals("additional")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double cancerb = .4; cancerb < 2; cancerb += .4) {
for (double chemob = .4; chemob < 2; chemob += .4) {
for (double dnab = .4; dnab < 2; dnab += .4) {
for (double nonmelb = -1; nonmelb < .8; nonmelb += .4) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("cancer_boost", String.valueOf(cancerb));
paramcombination.put("chemo_boost", String.valueOf(chemob));
paramcombination.put("dna_boost", String.valueOf(dnab));
paramcombination.put("non_mel_boost", String.valueOf(dnab));
String suffix = "--canc" + df.format(cancerb) + "-chem" + df.format(chemob) + "-dna" + df.format(dnab) + "-nonmel" + df.format(nonmelb);
parameters.add(paramcombination);
suffixes.add(suffix);
}
for (double cancerb = .5; cancerb < 3; cancerb += .5) {
for (double chemob = .5; chemob < 3; chemob += .5) {
for (double dnab = .5; dnab < 3; dnab += .5) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("cancer_boost", String.valueOf(cancerb));
paramcombination.put("chemo_boost", String.valueOf(chemob));
paramcombination.put("dna_boost", String.valueOf(dnab));
String suffix = "--canc" + df.format(cancerb) + "-chem" + df.format(chemob) + "-dna" + df.format(dnab);
parameters.add(paramcombination);
suffixes.add(suffix);
}
}
}
runExperimentsWithParameters(parameters, suffixes, year, what, goldStandard, target);
} else if (what.equals("extra")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double extrab = .4; extrab < 2; extrab += .4) {
for (double extrab = .5; extrab <= 3; extrab += .5) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("extra_boost", String.valueOf(extrab));
String suffix = "--extra" + df.format(extrab);
parameters.add(paramcombination);
suffixes.add(suffix);
}
runExperimentsWithParameters(parameters, suffixes, year, what, goldStandard, target);
} else if (what.equals("pmgs")) {
} else if (what.equals("mutation")) {
List<Map<String, String>> parameters = new ArrayList<>();
List<String> suffixes = new ArrayList<>();
for (double pmgsb = -1; pmgsb < .8; pmgsb += .4) {
for (double extrab = .5; extrab <= 3; extrab += .5) {
Map<String, String> paramcombination = new HashMap<>(templateProperties);
paramcombination.put("pm_gs_boost", String.valueOf(pmgsb));
String suffix = "--pmgs" + df.format(pmgsb);
paramcombination.put("mut_boost", String.valueOf(extrab));
String suffix = "--mut" + df.format(extrab);
parameters.add(paramcombination);
suffixes.add(suffix);
}
runExperimentsWithParameters(parameters, suffixes, year, what, goldStandard, target);
}
if (what.equals("pmclass")) {
} else if (what.equals("pmclass")) {
final List<String> pmfields = Arrays.asList("pmclass2017lstm.keyword",
"pmclass2017lstmatt.keyword",
"pmclass2017lstmgru.keyword",
Expand All @@ -163,7 +148,7 @@ public static void main(String[] args) {
pmfields.parallelStream().forEach(pmfield -> {
Map<String, String> parameters = new HashMap<>(templateProperties);
parameters.put("pm_class_field", pmfield);
runExperiments(parameters, false, goldStandard, target, year, what, "-" + pmfield);
// runExperiments(parameters, false, goldStandard, target, year, what, "-" + pmfield);
});
} else throw new IllegalStateException("Unknown mode " + what);

Expand All @@ -174,7 +159,7 @@ private static void runExperimentsWithParameters(List<Map<String, String>> param
IntStream.range(0, parameters.size()).parallel().forEach(i -> {
Map<String, String> parameterset = parameters.get(i);
String suffix = suffixes.get(i);
runExperiments(parameterset, false, goldStandard, target, year, what, suffix);
// runExperiments(parameterset, false, goldStandard, target, year, what, suffix);
});
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package at.medunigraz.imi.bst.trec;

import at.medunigraz.imi.bst.trec.experiment.Experiment;

import java.text.DecimalFormat;
import java.util.*;
import java.util.stream.IntStream;

import static at.medunigraz.imi.bst.trec.SigirParameters.BEST_FIELDS;

public class SigirPubmedExperimenterPmClass extends SuperSigirPubmedRecallExperimenter {
public static void main(String[] args) {

if (args.length != 2) {
System.err.println("Parameters: <multi match mode> <match default operator>");
}

String what = "pmclass";

final Experiment.GoldStandard goldStandard = Experiment.GoldStandard.OFFICIAL;
final Experiment.Task target = Experiment.Task.PUBMED;
final int year = 2017;




Map<String, String> templateProperties = new HashMap<>(SigirParameters.LITERATURE_ES_DEFAULTS);

String defaultMultiMatch = args[0];
templateProperties.put("dis_multi_match_type", defaultMultiMatch);
templateProperties.put("dis_syn_multi_match_type", defaultMultiMatch);
templateProperties.put("dis_hyper_multi_match_type", defaultMultiMatch);
templateProperties.put("gene_multi_match_type", defaultMultiMatch.equals("phrase") ? BEST_FIELDS : defaultMultiMatch);
templateProperties.put("gene_syn_multi_match_type", defaultMultiMatch);
templateProperties.put("gene_desc_multi_match_type", defaultMultiMatch);
templateProperties.put("gene_hyper_multi_match_type", defaultMultiMatch);
templateProperties.put("cancer_multi_match_type", defaultMultiMatch);
templateProperties.put("dna_multi_match_type", defaultMultiMatch);
templateProperties.put("neg_boost_multi_match_type", defaultMultiMatch);
templateProperties.put("pos_boost_multi_match_type", defaultMultiMatch);
templateProperties.put("dis_prefterm_multi_match_type", defaultMultiMatch);
templateProperties.put("dgi_multi_match_type", defaultMultiMatch);

String defaultOperator = args[1];
templateProperties.put("dis_operator", defaultOperator);
templateProperties.put("dis_prefterm_operator", defaultOperator);
templateProperties.put("dis_syn_operator", defaultOperator);
templateProperties.put("dis_hyper_operator", defaultOperator);
templateProperties.put("gene_operator", "OR");
templateProperties.put("gene_syn_operator", defaultOperator);
templateProperties.put("gene_hyper_operator", defaultOperator);
templateProperties.put("gene_desc_operator", "OR");
templateProperties.put("cancer_operator", "OR");
templateProperties.put("dna_operator", "OR");

templateProperties.put("phrase_slop", "10");


final List<String> pmfields = Arrays.asList("pmclass2017lstm.keyword",
"pmclass2017lstmatt.keyword",
"pmclass2017lstmgru.keyword",
"pmclass2018lstm.keyword",
"pmclass2018lstmatt.keyword",
"pmclass2018lstmgru.keyword",
"pmclass2017.keyword",
"pmclass2018.keyword");
pmfields.parallelStream().forEach(pmfield -> {
Map<String, String> parameters = new HashMap<>(templateProperties);
parameters.put("pm_class_field", pmfield);
runPmClassifierExperiments(null, parameters, goldStandard, target, year, what, "--mmm:" + defaultMultiMatch + "-op:" + defaultOperator + "-pmf:" + pmfield);
});


}


}
Loading

0 comments on commit b8469f0

Please sign in to comment.