|
8 | 8 | from ir_datasets.util import DownloadConfig, Download, RequestsDownload, TarExtractAll, GzipExtract |
9 | 9 | from ir_datasets.formats import BaseDocs, TrecXmlQueries, DocSourceSeekableIter, DocSource, SourceDocIter |
10 | 10 | from ir_datasets.datasets.base import Dataset, YamlDocumentation |
11 | | -from ir_datasets.indices import Docstore |
| 11 | +from ir_datasets.indices import Docstore, DEFAULT_DOCSTORE_OPTIONS |
12 | 12 |
|
13 | 13 | _logger = ir_datasets.log.easy() |
14 | 14 |
|
@@ -112,8 +112,8 @@ def seek(self, idx): |
112 | 112 |
|
113 | 113 |
|
114 | 114 | class C4Docstore(Docstore): |
115 | | - def __init__(self, docs): |
116 | | - super().__init__(docs.docs_cls(), 'doc_id') |
| 115 | + def __init__(self, docs, options=DEFAULT_DOCSTORE_OPTIONS): |
| 116 | + super().__init__(docs.docs_cls(), 'doc_id', options=options) |
117 | 117 | self.docs = docs |
118 | 118 |
|
119 | 119 | def get_many_iter(self, doc_ids): |
@@ -157,9 +157,9 @@ def docs_iter(self): |
157 | 157 | def docs_cls(self): |
158 | 158 | return C4Doc |
159 | 159 |
|
160 | | - def docs_store(self, field='doc_id'): |
| 160 | + def docs_store(self, field='doc_id', options=DEFAULT_DOCSTORE_OPTIONS): |
161 | 161 | assert field == 'doc_id' |
162 | | - return C4Docstore(self) |
| 162 | + return C4Docstore(self, options=options) |
163 | 163 |
|
164 | 164 | def docs_count(self, force=False): |
165 | 165 | if force or self._sources is not None: |
|
0 commit comments