Skip to content

Commit

Permalink
Negative keyword boosting (bst-mug#130)
Browse files Browse the repository at this point in the history
- Adds experiment with negative keyword boosting
- Updates KeywordExperimenter to test strings from a manually generated
file.
- As it improves NDCG from 0.7728 to 0.79, enables it by default.
- This fixes bst-mug#130.
  • Loading branch information
Michel Oleynik committed Jul 31, 2017
1 parent 9ebf19e commit df3e9ba
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 3 deletions.
5 changes: 5 additions & 0 deletions src/main/java/at/medunigraz/imi/bst/trec/Experimenter.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ public static void main(String[] args) {
final File synonymTemplate = new File(RunnerDemo.class.getResource("/templates/synonym.json").getFile());
final File regexpDrugsTemplate = new File(
RunnerDemo.class.getResource("/templates/regexp-drugs.json").getFile());
final File negativeBoostKeywordsTemplate = new File(
RunnerDemo.class.getResource("/templates/negative-boost-keywords.json").getFile());
final Gene.Field[] expandTo = { Gene.Field.SYMBOL, Gene.Field.DESCRIPTION };

ExperimentsBuilder builder = new ExperimentsBuilder();
Expand Down Expand Up @@ -55,6 +57,9 @@ public static void main(String[] args) {

builder.newExperiment().withGoldStandard(Experiment.GoldStandard.FINAL).withTarget(Experiment.Task.PUBMED)
.withTemplate(regexpDrugsTemplate).withWordRemoval();

builder.newExperiment().withGoldStandard(Experiment.GoldStandard.FINAL).withTarget(Experiment.Task.PUBMED)
.withTemplate(negativeBoostKeywordsTemplate).withWordRemoval();

Set<Experiment> experiments = builder.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class KeywordExperimenter {
public static void main(String[] args) {
final File keywordTemplate = new File(
KeywordExperimenter.class.getResource("/templates/keyword.json").getFile());
final File keywordsSource = new File(KeywordExperimenter.class.getResource("/keywords/").getFile());
final File keywordsSource = new File(KeywordExperimenter.class.getResource("/negative-keywords/").getFile());

ExperimentsBuilder builder = new ExperimentsBuilder();

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/at/medunigraz/imi/bst/trec/RunnerDemo.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

public class RunnerDemo {
public static void main(String[] args) {
final File pmTemplate = new File(RunnerDemo.class.getResource("/templates/regexp-drugs.json").getFile());
final File pmTemplate = new File(RunnerDemo.class.getResource("/templates/negative-boost-keywords.json").getFile());

final File ctTemplate = new File(RunnerDemo.class.getResource("/templates/improved-ct.json").getFile());

Expand Down
3 changes: 2 additions & 1 deletion src/main/resources/templates/keyword.json
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@
"abstract"
],
"tie_breaker": 0.3,
"type": "best_fields"
"type": "best_fields",
"boost": -1
}
}
}
Expand Down
116 changes: 116 additions & 0 deletions src/main/resources/templates/negative-boost-keywords.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
{
"bool": {
"must": [
{
"multi_match": {
"query": "{{disease}}",
"fields": [
"title^2",
"abstract",
"keyword",
"meshTags"
],
"tie_breaker": 0.3,
"type": "best_fields",
"boost": 1
}
},
{
"multi_match": {
"query": "{{gene}}",
"fields": [
"title^2",
"abstract",
"keyword",
"meshTags"
],
"tie_breaker": 0.3,
"type": "best_fields"
}
}
],
"should": [
{
"match": {
"_type": "extra"
}
},
{
"query_string": {
"query": "*mab *nib *cin *one *ate *mus *lin",
"fields": [
"title^2",
"abstract",
"keyword",
"meshTags"
]
}
},
{
"bool": {
"should": {
"multi_match": {
"query": "cancer carcinoma tumor",
"fields": [
"title^2",
"abstract",
"keyword",
"meshTags"
],
"tie_breaker": 0.3,
"type": "best_fields"
}
}
}
},
{
"bool": {
"should": {
"multi_match": {
"query": "gene genotype DNA base",
"fields": [
"title^2",
"abstract",
"keyword",
"meshTags"
],
"tie_breaker": 0.3,
"type": "best_fields"
}
}
}
},
{
"bool": {
"should": {
"multi_match": {
"query": "therapy treatment prognosis prognostic survival patient resistance recurrence targets malignancy study therapeutical outcome",
"fields": [
"title^2",
"abstract"
],
"tie_breaker": 0.3,
"type": "best_fields"
}
}
}
},
{
"bool": {
"should": {
"multi_match": {
"query": "transcript paraffin tumorigenesis embedded formalin fish tissue probes detection screening",
"fields": [
"title^2",
"abstract"
],
"tie_breaker": 0.3,
"type": "best_fields",
"boost": -1
}
}
}
}
]
}
}

0 comments on commit df3e9ba

Please sign in to comment.