-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreformat.py
More file actions
74 lines (63 loc) · 3.13 KB
/
Copy pathreformat.py
File metadata and controls
74 lines (63 loc) · 3.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import re
from datetime import datetime
named_zip = re.compile(r'(?P<trash>[A-Z]*)(?P<zip>\d{5})-?(?P<zip4>\d{4})?')
x=0
debug = False
def nowtime():
d = datetime.now()
return d.strftime('%Y-%m-%dT%H:%M:%S'),
def address_seq():
global x
x+=1
return str(x),
def zip_parse(z):
m = named_zip.match(z)
return m.groupdict()['zip'] if m.groupdict()['zip'] else '', m.groupdict()['zip4'] if m.groupdict()['zip4'] else ''
def create_vf_address(street_num, pre_dir, street_name, street_suf, post_dir, unit_des, apt_num):
primary = [k for k in [street_num, pre_dir, street_name, street_suf, post_dir] if k and len(k)>0]
secondary = [k for k in [unit_des, apt_num] if k and len(k)>0]
return ' '.join(primary), ' '.join(secondary)
def concat_us(*args, **kwargs):
return '_'.join(args + tuple(kwargs.values())),
def _saintrep(m):
return m.groupdict()['prefix'] + 'st' + m.groupdict()['suffix']
def ed_concat(*args, **kwargs):
name = concat_us(*args, **kwargs)[0]
name = re.sub(r'(?P<prefix>[_\s]|^)s(?:ain)?te?\.?(?P<suffix>[_\s]|$)', _saintrep, name.lower().strip())
name = name.replace("'",'')
return name,
def referendum_id(state, referendum_title):
return '{state}_{referendum_title}'.format(state=state.strip(), referendum_title=referendum_title.strip()).lower(),
def contest_id(state, district, office_name):
return '{state}_{district}_{office_name}'.format(state=state.strip(), district=district.strip(), office_name=office_name.strip()).lower(),
#An issue here is that the electoral district map made for each state is
#created from the district names in the voterfile, so if these district
#names do not contain enough information to replicate the candidate file
#district names, a problem arises. The candidate file names need to be
#transformed to simpler forms that can be matched from the VF
#So we'll introduce a transform function
def get_edmap(ed_map):
class passdict(dict):
def __missing__(self,key):
return {'name':key,'type':''}
ed_map = passdict(ed_map)
patt = re.compile(r'(?P<name>\D+)(?P<number>\d+)(?P<extra>\D?)$')
def edmap(electoral_district):
electoral_district = ' '.join(re.split(r'\s+',electoral_district.strip()))
print electoral_district
m = patt.match(electoral_district)
if m:
electoral_district = '{name}{number}{extra}'.format(name=m.groupdict()['name'], number=int(m.groupdict()['number']), extra=m.groupdict()['extra'])
#TODO this regex has problems with 2 saints next to each other
electoral_district = re.sub(r'(?P<prefix>[_\s]|^)s(?:ain)?te?\.?(?P<suffix>[_\s]|$)', _saintrep, electoral_district.lower().strip())
electoral_district = electoral_district.replace("'",'')
print electoral_district
if not ed_map.has_key(electoral_district):
#import pdb;pdb.set_trace()
pass
t = ed_map[electoral_district]
print t
if debug and not ed_map.has_key(electoral_district):
import pdb;pdb.set_trace()
return t['name'],t['type'], ed_concat(t['name'],t['type'])[0], ed_map.has_key(electoral_district)
return edmap