-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_statute_special.py
50 lines (40 loc) · 2.22 KB
/
load_statute_special.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#! C:\python27
import re, urllib, pickle
def read_statute_special(theURL):
theStatuteNumber = theURL.split(u'&Statute=')[1]
#theText = theFakeURL #comment this out and uncomment below line when ready to go live
theText = urllib.urlopen(theURL).read()
theBodyText = re.findall(ur'<HEAD>(.*)<\/HEAD>', theText, re.DOTALL)
theTitle = re.findall(r'<title>(.*\.)\s*<\/title>', theText)
theBodyText = re.findall(ur'<BODY>(.*)<\/BODY>', theText, re.DOTALL)
theText = re.sub(" ", " ", theBodyText[0])
theText = re.sub(r'(\d*[A-Z]?-\d*[A-Z]?-\d[\.\d]*)', r"\1 ", theText, re.DOTALL)
theText = re.sub('<!-- WP Style Open: IN --> <!-- WP Style End: IN -->', " ", theText) # account for indents
theText = re.sub(r'\<!--.*-->', "", theText)
theText = re.sub(r'<Div.*\">', "", theText) # clear the div
theText = re.sub(r'\n|<b>|</b>|\r', " ", theText) # remove the newlines
theText = re.sub(r'<BR>|<br>', r'\n', theText)
theText = re.sub("<p>", r'\n', theText)
theText = re.sub(r" +", " ", theText) # Turn all multiple spaces into a single space
if theText.find("Repealed by") != -1:
theTitle = "Repealed."
elif theText.find("Supereseded") != -1:
theTitle = "Supereseded."
elif theText.find("Obsolete") != -1:
theTitle = "Obsolete."
elif theText.find("Repealed") != -1:
theTitle = "Repealed."
elif theText.find("Omitted") != -1:
print "YES"
theTitle = "Omitted."
else:
theTitle = theTitle[0][(len(theStatuteNumber) + 1):] # Get rid of the leading statute number and proceeding space
return theStatuteNumber, theText, theTitle
theWebStatute = {}
#theWebStatute['35-4-2.10'] = read_statute_special('http://legis.sd.gov/Statutes/Codified_Laws/DisplayStatute.aspx?Type=Statute&Statute=35-4-2.10', '35-4-2.10')
#theWebStatute['34A-3A-6'] = read_statute_special('http://legis.sd.gov/Statutes/Codified_Laws/DisplayStatute.aspx?Type=Statute&Statute=34A-3A-6')
theStatNum, theStatText, theStatTitle = read_statute_special('http://legis.sd.gov/Statutes/Codified_Laws/DisplayStatute.aspx?Type=Statute&Statute=35-5-3.1')
print theStatNum
print theStatText
print theStatTitle
#pickle.dump( theWebStatute, open("webstatutes.p", "wb"))