| 
6 | 6 | from datetime import datetime  | 
7 | 7 | import math  | 
8 | 8 | 
 
  | 
 | 9 | +from constants import VIRUSES, GEOS, REGIONS, NATION, LAST_WEEK_OF_YEAR   | 
 | 10 | + | 
9 | 11 | def abbreviate_virus(full_name):  | 
10 | 12 |     lowercase=full_name.lower()  | 
11 |  | -      | 
12 |  | -    if any(name in lowercase for name in ["parainfluenza","para","piv"]):  | 
13 |  | -        if "hpiv" not in lowercase:  | 
14 |  | -            abbrev = re.sub("parainfluenza|para|piv","hpiv",lowercase)  | 
15 |  | -        else:  | 
16 |  | -            abbrev = lowercase  | 
17 |  | -    elif any(name in lowercase for name in ["adenovirus","adeno"]):  | 
18 |  | -        abbrev =  re.sub("adenovirus|adeno","adv",lowercase)  | 
19 |  | -    elif "human metapneumovirus" in lowercase:  | 
20 |  | -        abbrev =  re.sub("human metapneumovirus","hmpv",lowercase)  | 
21 |  | -    elif any(name in lowercase for name in ["enterovirus/rhinovirus","rhinovirus","rhv","entero/rhino","rhino","ev/rv","evrv"]):  | 
22 |  | -        abbrev = re.sub("enterovirus/rhinovirus|rhinovirus|rhv|entero/rhino|rhino|ev/rv|evrv","ev_rv",lowercase)  | 
23 |  | -    elif any(name in lowercase for name in ["coronavirus","coron","coro"]):  | 
24 |  | -        abbrev = re.sub("coronavirus|coron|coro","hcov",lowercase)  | 
25 |  | -    elif "respiratory syncytial virus" in lowercase:  | 
26 |  | -        abbrev = re.sub("respiratory syncytial virus","rsv",lowercase)  | 
27 |  | -    elif "influenza" in lowercase:  | 
28 |  | -        abbrev = re.sub("influenza","flu",lowercase)         | 
29 |  | -    elif "sarscov2" in lowercase:  | 
30 |  | -        abbrev = re.sub("sarscov2","sars-cov-2",lowercase)   | 
31 |  | -    else:  | 
32 |  | -        abbrev=lowercase  | 
33 |  | -    return(abbrev)  | 
 | 13 | +    keys = (re.escape(k) for k in VIRUSES.keys())  | 
 | 14 | +    pattern = re.compile(r'\b(' + '|'.join(keys) + r')\b')  | 
 | 15 | +    result = pattern.sub(lambda x: VIRUSES[x.group()], lowercase)  | 
 | 16 | +    return(result)  | 
34 | 17 | 
 
  | 
35 | 18 | def abbreviate_geo(full_name):  | 
36 | 19 |     lowercase=full_name.lower()  | 
37 |  | -      | 
38 |  | -    if "newfoundland" in lowercase:  | 
39 |  | -        abbrev =  "nl"  | 
40 |  | -    elif "prince edward island" in lowercase:  | 
41 |  | -        abbrev =  "pe"  | 
42 |  | -    elif "nova scotia" in lowercase:  | 
43 |  | -        abbrev =  "ns"  | 
44 |  | -    elif "new brunswick" in lowercase:  | 
45 |  | -        abbrev =  "nb"  | 
46 |  | -    elif "nova scotia" in lowercase:  | 
47 |  | -        abbrev =  "ns"       | 
48 |  | -    elif re.match('|'.join(("^québec$", "province of québec","quebec")),lowercase):  | 
49 |  | -        abbrev = "qc"    | 
50 |  | -    elif re.match('|'.join(("^ontario$", "province of ontario")),lowercase):  | 
51 |  | -        abbrev =  "on"  | 
52 |  | -    elif "manitoba" in lowercase:  | 
53 |  | -        abbrev =  "mb"  | 
54 |  | -    elif "saskatchewan" in lowercase:  | 
55 |  | -        abbrev =  "sk"  | 
56 |  | -    elif "alberta" in lowercase:  | 
57 |  | -        abbrev =  "ab"  | 
58 |  | -    elif "british columbia" in lowercase:  | 
59 |  | -        abbrev =  "bc"  | 
60 |  | -    elif "yukon" in lowercase:  | 
61 |  | -        abbrev =  "yk"  | 
62 |  | -    elif "northwest territories" in lowercase:  | 
63 |  | -        abbrev =  "nt"  | 
64 |  | -    elif "nunavut" in lowercase:  | 
65 |  | -        abbrev =  "nu"  | 
66 |  | -    elif re.match("canada|can",lowercase):  | 
67 |  | -        abbrev = "ca"   | 
68 |  | -    elif re.match(r"^at\b",lowercase):  | 
69 |  | -        abbrev = "atlantic"   | 
70 |  | -    elif "pr" in lowercase:  | 
71 |  | -        abbrev = "prairies"   | 
72 |  | -    elif "terr" in lowercase:  | 
73 |  | -        abbrev = "territories"   | 
74 |  | -    else:  | 
75 |  | -        abbrev=lowercase  | 
76 |  | -    return(abbrev)  | 
 | 20 | +    keys = (re.escape(k) for k in GEOS.keys())  | 
 | 21 | +    pattern = re.compile(r'\b(' + '|'.join(keys) + r')\b')  | 
 | 22 | + | 
 | 23 | +    result = pattern.sub(lambda x: GEOS[x.group()], lowercase)  | 
 | 24 | +    return(result)  | 
77 | 25 | 
 
  | 
78 | 26 | def create_geo_types(geo,default_geo):  | 
79 |  | -    regions = ['atlantic','atl','province of québec','québec','qc','province of ontario','ontario','on',  | 
80 |  | -               'prairies', 'pr', "british columbia", 'bc',"territories",'terr']  | 
81 |  | -    nation = ["canada","can",'ca']  | 
82 |  | -                      | 
83 |  | -    if geo in nation:  | 
 | 27 | +    if geo in NATION:  | 
84 | 28 |         geo_type="nation"  | 
85 |  | -    elif geo in regions:  | 
 | 29 | +    elif geo in REGIONS:  | 
86 | 30 |         geo_type="region"  | 
87 | 31 |     else:  | 
88 | 32 |         geo_type = default_geo  | 
@@ -163,7 +107,7 @@ def get_weekly_data(base_url,start_year):  | 
163 | 107 |     week_string = week_df.iloc[0]['Text'].lower()  | 
164 | 108 |     current_week = int(re.search("week (.+?) ", week_string).group(1))  | 
165 | 109 | 
 
  | 
166 |  | -    if current_week < 34:  | 
 | 110 | +    if current_week < LAST_WEEK_OF_YEAR:  | 
167 | 111 |         current_year = start_year+1  | 
168 | 112 |     else:  | 
169 | 113 |         current_year = start_year  | 
 | 
0 commit comments