-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmarginal_effect_innovation_indicator.py
80 lines (67 loc) · 2.72 KB
/
marginal_effect_innovation_indicator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import pandas as pd
import numpy as np
import joblib
import os
from innovation_indicator import InnoIndicator
from indicator_tools import DataLoader
from APICalls import CBPCall
def industry_to_skills_knowledge(Xdiff,I):
industry_compositions = Xdiff.to_dict('records')
skill_compositions = []
knowledge_compositions = []
for industry_composition in industry_compositions:
worker_composition = I.industries_to_occupations(industry_composition)
skill_composition = I.occupations_to_skills(worker_composition)
knowledge_composition = I.occupations_to_knowledge(worker_composition)
skill_compositions.append(skill_composition)
knowledge_compositions.append(knowledge_composition)
return skill_compositions,knowledge_compositions
def AME_industry(I,col,X):
'''
Numbers should be interpreted as the change in the average indicator (over all X) as a result of a 0.1 increase in the given NAICS code
'''
dx = np.median(np.diff(sorted(X[X[col]!=0][col])))
X_ = X.reset_index().drop('index',1)
X_[col] = X[col]+dx
Xdiff = pd.concat([X.reset_index().drop('index',1),X_]).sort_index().sort_values(by=col).sort_index()
skill_compositions,knowledge_compositions = industry_to_skills_knowledge(Xdiff,I)
Ypred = I.sks_model.predict(pd.DataFrame(skill_compositions))
if I.normalize:
Ypred = I.normalize_value(Ypred,I.sks_bounds)
dsks = np.diff(Ypred)[::2]
dsksdx = (dsks/dx)*0.1
Ypred = I.kno_model.predict(pd.DataFrame(knowledge_compositions))
if I.normalize:
Ypred = I.normalize_value(Ypred,I.kno_bounds)
dkno = np.diff(Ypred)[::2]
dknodx = (dkno/dx)*0.1
return np.mean(dsksdx),np.mean(dknodx)
def main():
outPath = 'tables/innovation_data'
outfpath = os.path.join(outPath,'innovation_marginal_effect.csv')
if os.path.isfile(outfpath):
print('Marginal effects already stored. To recompute, delete the current results located at: {}'.format(outfpath))
else:
print('Loading indicator and employment by industry for each MSA')
I = InnoIndicator()
data = DataLoader()
data.load_MSA_emp_byInd()
X = pd.pivot_table(data.emp_msa_ind,values='EMP',index='MSA',columns='NAICS2017').fillna(0)
X = X.assign(TOTAL=X.sum(1))
for c in set(data.emp_msa_ind['NAICS2017']):
X[c] = X[c]/X['TOTAL']
X = X.drop('TOTAL',1)
X = X.reset_index().drop('MSA',1)
kno_ames = {}
sks_ames = {}
for col in X.columns:
print('\tDerivating with respect to NAICS: {}'.format(col))
sks_ame,kno_ame = AME_industry(I,col,X)
sks_ames[col] = sks_ame
kno_ames[col] = kno_ame
print('Combining and saving results')
ames = pd.DataFrame(sks_ames.items(),columns=['NAICS','SKS_AME'])
ames = pd.merge(ames,pd.DataFrame(kno_ames.items(),columns=['NAICS','KNO_AME']))
ames.to_csv(outfpath,index=False)
if __name__ == '__main__':
main()