-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSAR_Indexer.py
47 lines (31 loc) · 1.58 KB
/
SAR_Indexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import argparse
import pickle
import time
from SAR_lib import SAR_Project
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Index a directory with news in json format.')
parser.add_argument('newsdir', metavar='newsdir', type=str, help='directory with the news.')
parser.add_argument('index', metavar='index', type=str, help='name of the file to save the project object.')
parser.add_argument('-S', '--stem', dest='stem', action='store_true', default=False, help='compute stem index.')
parser.add_argument('-P', '--permuterm', dest='permuterm', action='store_true', default=False,
help='compute permuterm index.')
parser.add_argument('-M', '--multifield', dest='multifield', action='store_true', default=False,
help='compute index for all the fields.')
parser.add_argument('-O', '--positional', dest='positional', action='store_true', default=False,
help='compute positional index.')
parser.add_argument('-U', '--suggest', dest='suggestion', action='store_true', default=False,
help='makes suggestions on posible words')
args = parser.parse_args()
news_dir = args.newsdir
index_file = args.index
indexer = SAR_Project()
t0 = time.time()
indexer.index_dir(news_dir, **vars(args))
t1 = time.time()
with open(index_file, 'wb') as fh:
pickle.dump(indexer, fh)
t2 = time.time()
indexer.show_stats()
print("Time indexing: %2.2fs." % (t1 - t0))
print("Time saving: %2.2fs." % (t2 - t1))
print()