Skip to content

Commit

Permalink
Added function to read and parse the CityScope categories and their r…
Browse files Browse the repository at this point in the history
…elation to NAICS
  • Loading branch information
crisjf committed Apr 7, 2020
1 parent 440d71a commit 746865e
Showing 1 changed file with 21 additions and 1 deletion.
22 changes: 21 additions & 1 deletion indicator_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
@author: doorleyr
"""
import math
import pandas as pd

def shannon_equitability_score(species_counts):
diversity=0
Expand All @@ -18,4 +19,23 @@ def shannon_equitability_score(species_counts):
equitability=diversity/math.log(len(species_counts))
return equitability
else:
return None
return None

def parse_CityScopeCategories(fpath,CS_column='CS Amenities ',NAICS_column='Unnamed: 5'):
'''
Useful function to parse the cityscope categories excel located at:
fpath = tables/200405_CityScope.categories.xlsx
'''
CS_cats = pd.read_excel(fpath).iloc[1:]
CS_cats = CS_cats[[CS_column,NAICS_column]]
CS_cats['shifted'] = CS_cats[CS_column]
while any(CS_cats[CS_column].isna()):
CS_cats['shifted'] = CS_cats['shifted'].shift(1)
CS_cats.loc[CS_cats[CS_column].isna(),CS_column] = CS_cats[CS_cats[CS_column].isna()]['shifted']
CS_cats = CS_cats.drop('shifted',1)
CS_cats = CS_cats.dropna().drop_duplicates()
CS_cats['NAICS'] = CS_cats[NAICS_column].str.strip().str.split(' ').apply(lambda x:x[0])
CS_cats['NAICS_name'] = [n.replace(c,'').replace('-','').strip() for n,c in CS_cats[[NAICS_column,'NAICS']].values]
CS_cats = CS_cats.drop(NAICS_column,1)
return CS_cats

0 comments on commit 746865e

Please sign in to comment.