-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdoi_to_cites.py
72 lines (57 loc) · 1.88 KB
/
doi_to_cites.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import arrow
import codeswitch
import json
import threading
import time
from bz2 import BZ2File as bzopen
from edit_queue import EditQueue
from citation_grapher import CitationGrapher
print('Setting up globals')
WRITE_THREAD_COUNT = 2
READ_THREAD_COUNT = 100
THREAD_LIMIT = WRITE_THREAD_COUNT + READ_THREAD_COUNT + 1
# Go from newest Wikidata QID to oldest?
DESCENDING_ORDER = True
eq = EditQueue(
source='Q5188229',
url_pattern='https://api.crossref.org/works/',
write_thread_count=WRITE_THREAD_COUNT,
append_value=['P2860'],
good_refs=[{'P248': None, 'P813': None, 'P854': None}],
edit_summary='Updating citation graph',
alt_account=True)
print('Done setting up globals')
def update_graph(doi, wd_item, cites):
CG = CitationGrapher(eq)
CG.process_manifest({wd_item: (doi, tuple(cites), '+2018-01-21T00:00:00Z')})
def process_bundle(lookup, doi_x):
lookup = codeswitch.doi_to_wikidata(lookup)
if lookup[0] is None:
return
cites = []
for wd_y in lookup:
if wd_y is None:
continue
if wd_y == lookup[0]:
continue
cites.append(wd_y)
if len(cites) > 0:
t = threading.Thread(target=update_graph, args=(doi_x, lookup[0], cites))
t.daemon = True
t.start()
def main():
with bzopen('assets/crossref_references.jsonl.bz2', 'r') as f:
for line in f:
print('. ', end='', flush=True)
while threading.active_count() >= THREAD_LIMIT:
time.sleep(0.25)
mapping = json.loads(line)
doi_x = list(mapping.keys())[0]
lookup = [doi_x]
for doi_y in mapping[doi_x]:
lookup.append(doi_y)
t = threading.Thread(target=process_bundle, args=(lookup, doi_x))
t.daemon = True
t.start()
if __name__ == '__main__':
main()