Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 97 additions & 39 deletions convictions_data/management/commands/chrgdesc2category.py
Original file line number Diff line number Diff line change
@@ -1,66 +1,124 @@
import logging
from __future__ import division

from django.core.management.base import BaseCommand
from django.db import transaction

from convictions_data.models import Disposition
from convictions_data.models import Conviction
from convictions_data.statute import (get_iucr, IUCRLookupError,
ILCSLookupError, StatuteFormatError)

from pprint import pprint
import json

# can't find a handler?
#logger = logging.getLogger(__name__)

FIRST_TIME = True

def append_or_create(dict, chrgdesc, category):
if category:
try:
categories = dict[chrgdesc]
if category not in categories:
dict[chrgdesc].append(category)
except KeyError:
dict[chrgdesc] = [category]
def true_once():
global FIRST_TIME
if FIRST_TIME:
FIRST_TIME = not FIRST_TIME
return True
else:
# warn if there's no IUCR category for this disposition
assert False
return False

class Command(BaseCommand):
help = "Map charge descriptions to iucr categories."

def handle(self, *args, **options):
def div(x, y):
try:
return round((x / y), 4) * 100
except ZeroDivisionError:
return 0

chrgdesc_to_category = {}

for disposition in Disposition.objects.all():
def update_or_create(dict, chrgdesc, new_categories):
try:
present_categories = dict[chrgdesc]
for c in new_categories:
if c not in present_categories:
dict[chrgdesc].append(c)
except KeyError:
dict[chrgdesc] = new_categories

chrgdesc = disposition.ammndchrgdescr if \
disposition.ammndchrgdescr else disposition.chrgdesc
category = disposition.iucr_category

case_number = disposition.case_number
statute = disposition.final_statute if \
disposition.final_statute else disposition.statute
chrgdisp = disposition.chrgdisp
chrgdispdate = disposition.chrgdispdate
class Command(BaseCommand):
help = \
"""
Try to generate, as close as possible,
a one-to-one mapping between charge
descriptions and iucr categories.
"""

def handle(self, *args, **options):

try:
append_or_create(chrgdesc_to_category, chrgdesc, category)
except AssertionError:
# print('No IUCR category for disposition: {} {} {} {}'
# .format(case_number, statute, chrgdispdate, chrgdisp))
pass
print('inside the command')

print('num total: ', len(chrgdesc_to_category))
chrgdesc_to_category = {}
hit = 0

convictions = Conviction.objects.all()
total = convictions.count()

for i, conviction in enumerate(convictions):

if true_once():
print('inside the iteration')

chrgdesc = conviction.final_chrgdesc
category = conviction.iucr_category
statute = conviction.final_statute

# if exactly one IUCR code / category is associated
# with this conviction, map it to the conviction's
# charge description;

# also make sure that the category can be found in the crosswalk's
# list of possible categories and is not just somehow in the database
if category and category not in [o.offense_category for o in get_iucr(statute)]:
category = ''

if category:
update_or_create(chrgdesc_to_category, chrgdesc, [category])
hit += 1

# otherwise, check if the conviction doesn't have an IUCR
# because multiple possible IUCRs matched the conviction's statute
else:
try:
offenses = get_iucr(statute)
except IUCRLookupError:
continue
except ILCSLookupError:
continue
except StatuteFormatError:
continue

if len(offenses) >= 1:
# if so, check if all possible IUCRs associated with
# that statute map to a single charge description;
if len(set([o.offense_category for o in offenses])) == 1:
category = offenses[0].offense_category

update_or_create(chrgdesc_to_category, chrgdesc, [category])
hit += 1
else:
categories = list(set([o.offense_category for o in offenses]))
update_or_create(chrgdesc_to_category, chrgdesc, categories)

print "{}% one-to-one mapping".format(div(hit, i))

print 'num total: ', len(chrgdesc_to_category)
print 'writing file with all'
with open('chrgdesc_to_category__all.json', 'w') as f:
json.dump(chrgdesc_to_category, f)

chrgdesc_to_category = {x: chrgdesc_to_category[x] for x in chrgdesc_to_category.keys() if len(chrgdesc_to_category[x]) > 1}
chrgdesc_to_category_multiples = {x: chrgdesc_to_category[x] for x in chrgdesc_to_category.keys() if len(chrgdesc_to_category[x]) > 1}

print 'num chrgdesc that map to multiple possible IUCR categories: ', len(chrgdesc_to_category_multiples)
print 'writing multiples file'
with open('chrgdesc_to_category__multiples.json', 'w') as f:
print('num with multiple: ', len(chrgdesc_to_category))
json.dump(chrgdesc_to_category, f)
json.dump(chrgdesc_to_category_multiples, f)

print ('num convictions whose chrgdesc maps to multiple possible '
'IUCR categories: {}').format(total - hit)

print 'done'



Expand Down
120 changes: 120 additions & 0 deletions convictions_data/management/commands/disambiguate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from __future__ import division

from django.core.management.base import BaseCommand
from django.db.models import Count

from convictions_data.models import Conviction
from convictions_data.statute import get_iucr, IUCRLookupError, \
ILCSLookupError, StatuteFormatError

import json, sys


def prefix_fmt(num_break):
return ' ' * num_break

def suffix_fmt(num_break):
return '\n' * num_break

def fmt(msg, prefix, suffix):
prefix = prefix_fmt(prefix)
suffix = suffix_fmt(suffix)
print "{0}{1} ...{2}".format(prefix, msg, suffix)

def fmt_item(name, item, prefix, suffix):
if not item:
item = 'This value is empty.'

msg='{}: {}'.format(name, item)
fmt(msg, prefix, suffix)

def set_formatting(loop_level):
# no indent, two newlines
# at first loop level
if loop_level == 0:
return 0, 2

# one indent, two new lines
# at second loop level
if loop_level == 1:
return 1, 2

# two indents, one new line
# at third loop level
if loop_level == 2:
return 2, 1


class Command(BaseCommand):
help = \
"""
Do some work...
"""

def handle(self, *args, **options):

with open('chrgdesc_to_category__multiples.json') as f:

START_LOOP = 0
prefix = None
suffix = None

multiples = json.load(f)

print '\n'
print 'Total num of multiples: {}'.format(len(multiples))
print '\n\n'

for chrgdesc in multiples:

loop_level = START_LOOP
prefix, suffix = set_formatting(loop_level)

fmt_item('chrgdesc', chrgdesc, prefix, 0)

convictions = Conviction.objects.filter(final_chrgdesc=chrgdesc).values('final_statute').annotate(Count('id')).order_by()
num_convictons = convictions.count()
fmt('Num of statutes: {}'.format(num_convictons), prefix, suffix)

for i, c in enumerate(convictions):

statute = c['final_statute']

loop_level = 1
prefix, suffix = set_formatting(loop_level)

fmt_item('statute', statute, prefix, 0)
fmt('Num statutes left: {}'.format(num_convictons - (i+1)), prefix, suffix)
try:

loop_level = 2
prefix, suffix = set_formatting(loop_level)

o_tuples = [(o.code, o.offense_category) for o in get_iucr(statute)]
fmt_item('codes', [o[0] for o in o_tuples], prefix, suffix)
fmt_item('categories', [o[1] for o in o_tuples], prefix, suffix)

except IUCRLookupError:
fmt('IUCRLookupError occurred', prefix, suffix)
except ILCSLookupError:
fmt('ILCSLookupError occurred', prefix, suffix)
except StatuteFormatError:
fmt('StatuteFormatError occurred', prefix, suffix)

finally:

try:
cmd = raw_input('>> ')
except KeyboardInterrupt:
print '\ndone!'
sys.exit(0)

if cmd == 'n':
break
print '\n'

print 'done!'