Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions ir_datasets/datasets/trec_tot.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,20 @@ def default_text(self):
return self.title + ' ' + self.text


class TipOfTheTongueTestQuery(NamedTuple):
query_id: str
domain: str
title: str
text: str
sentence_annotations: List[Dict[str, str]]

def default_text(self):
return self.title + ' ' + self.text


QUERY_MAP = {'query_id': 'id', 'url': 'url', 'domain': 'domain', 'title': 'title', 'text': 'text', 'sentence_annotations': 'sentence_annotations'}

QUERY_MAP_TEST = {'query_id': 'id', 'domain': 'domain', 'title': 'title', 'text': 'text', 'sentence_annotations': 'sentence_annotations'}

def _init():
documentation = YamlDocumentation(f'docs/{NAME}.yaml')
Expand Down Expand Up @@ -64,6 +76,16 @@ def _init():
documentation(f'2023/{s}'),
)
ir_datasets.registry.register(f'{NAME}/2023/{s}', subsets[f'2023/{s}'])

# Test dataset outside of the loop above as no qrels available at the moment and queries come from different file
s = 'test'
test_dlc = dlc['test-queries']
subsets[f'2023/{s}'] = Dataset(
docs_2023_handler,
JsonlQueries(Cache(ZipExtract(test_dlc, f'test/queries.jsonl'), base_path/f'2023/{s}/queries.jsonl'), query_cls=TipOfTheTongueTestQuery, mapping=QUERY_MAP_TEST, lang='en'),
documentation(f'2023/{s}'),
)
ir_datasets.registry.register(f'{NAME}/2023/{s}', subsets[f'2023/{s}'])

return base, subsets

Expand Down
5 changes: 5 additions & 0 deletions ir_datasets/etc/downloads.json
Original file line number Diff line number Diff line change
Expand Up @@ -5812,6 +5812,11 @@
"url": "https://surfdrive.surf.nl/files/index.php/s/FaEK4xc6Xp2JcAJ/download",
"expected_md5": "f84fe82cb80e3ee1072576c8d6c4a417",
"cache_path": "trec-tot.zip"
},
"test-queries": {
"url": "https://surfdrive.surf.nl/files/index.php/s/qQCQ1iRZvj712kv/download",
"expected_md5": "756b77a2725f0960defb3a9b12d95334",
"cache_path": "trec-tot-test-queries.zip"
}
},

Expand Down
Loading