Skip to content

Commit 7fc5dc2

Browse files
committed
add R replication
1 parent b32a786 commit 7fc5dc2

File tree

6 files changed

+1136
-167
lines changed

6 files changed

+1136
-167
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
/data
2+
/tables

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
11
Code for my Economics undergraduate dissertation project: using a diff-in-diff to find the effect of the Linda Mama policy of 2013 on facility delivery and nutrition outcomes using DHS data. The identification strategy was using competition among hospitals calculated using HHI.
2+
3+
I first ran the cleaning and analysis in Stata.
4+
I then replicated the cleaning and analysis in R.

dhs_cleaning.do

Lines changed: 97 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -8,38 +8,72 @@
88

99
cd /*set directory */
1010

11-
use dhs/births.dta
11+
use data/births.dta
1212

1313
*Current relevant variables
14-
keep caseid - v007 v012 v025 v106 v130 v131 v133 v138 v140 v190 v191 v201 v440 v444a v481 v501 v701 v715 v730 bord b1 b2 b4 b5 b8 m15 hw70 - hw72 idxml
14+
keep caseid - v007 v012 v025 v106 v130 v131 v133 v138 v140 v190 v191 v201 v440 v444a v445 v481 v501 v701 v715 v730 bord b1 b2 b4 b5 b8 m14 m15 hw70 - hw72 idxml
15+
16+
** get other variables
17+
*create mean wealth index by cluster
18+
egen meanwindex = mean(v191), by(v001)
19+
la var meanwindex "cluster mean wealth factor score"
20+
* rural-urban
21+
gen rural = cond(v025 == 2, 1, 0, .)
22+
*male
23+
gen male = cond(b4 == 1, 1, 0, .)
24+
*education
25+
ren v133 education
26+
* parity
27+
ren v201 parity
28+
* wealth index
29+
ren v191 windex
30+
* child alive
31+
ren b5 childalive
32+
* no. of anc vists
33+
ren m14 ancn
1534

1635

1736
*get only relevant years
1837
gen monthyearbirth = ym( b2, b1)
1938
la var monthyearbirth "birthdate in stata format"
20-
gen policy = cond(monthyear >= ym(2013, 06), 1, 0, .)
39+
gen policy = cond(monthyeabirth >= ym(2013, 06), 1, 0, .)
2140
la var policy "time effect"
22-
drop if monthyear < ym(2011, 1)
41+
drop if monthyearbirth < ym(2011, 1)
2342

2443
*gen age of child in months
2544
gen monthyearage = ym(v007, v006)
2645
la var monthyearage "interview date in stata format"
27-
gen age_months = monthyearage - monthyearbirth
28-
la var age_months "age in months"
29-
30-
*gen mother stunting/ wasted
31-
gen stunted = cond(v440 < -200, 1, 0, .)
32-
la var stunted "mother stunted"
33-
label define stuntedlbl 0 "not stunted" 1 "stunted"
34-
label values stunted stuntedlbl
35-
gen wasted = cond(v444a < -200, 1, 0, .)
36-
la var wasted "mother wasted"
37-
label define wastedlbl 0 "not wasted" 1 "wasted"
38-
label values wasted wastedlbl
39-
gen malnourished = cond(stunted == 1, 1, cond(wasted == 1, 1, 0,.))
40-
label define malnourishlbl 0 "not malnourished" 1 "malnourished"
41-
label values malnourished malnourishlbl
42-
la var malnourished "mother malnourished"
46+
gen agemonths = monthyearage - monthyearbirth
47+
la var agemonths "age in months"
48+
49+
* gen age of mother
50+
gen motherage = v012 - (agemonths/12)
51+
la var motherage "mother's age"
52+
53+
*gen mother malnutrition
54+
replace v440 = cond(v440 == 998, ., v440/100, .)
55+
gen mstunted = cond(v440 < -2, 1, 0, .)
56+
la var mstunted "mother stunted"
57+
label define mstuntedlbl 0 "not stunted" 1 "stunted"
58+
label values mstunted mstuntedlbl
59+
replace v444a = cond(v444a == 9998, ., v444a/100, .)
60+
gen mwasted = cond(v444a < -2, 1, 0, .)
61+
la var mwasted "mother wasted"
62+
label define mwastedlbl 0 "not wasted" 1 "wasted"
63+
label values mwasted mwastedlbl
64+
replace v445 = cond(v445 == 9998, ., v445/100, .)
65+
gen munderweight = cond(v445 <= 18.5, 1, 0, .)
66+
label var munderweight "mother underweight"
67+
label define munderweightlbl 0 "not underweight" 1 "underweight"
68+
label values munderweight munderweightlbl
69+
gen moverweight = cond(v445 >= 25, 1, 0, .)
70+
label var moverweight "mother overweight"
71+
label define moverweightlbl 0 "not overweight" 1 "overweight"
72+
label values moverweight moverweightlbl
73+
gen mmalnourished = cond(mstunted == 1, 1, cond(mwasted == 1, 1, cond(munderweight == 1, 1, 0, .)))
74+
label define mmalnourishlbl 0 "not malnourished" 1 "malnourished"
75+
label values mmalnourished mmalnourishlbl
76+
la var mmalnourished "mother malnourished"
4377

4478

4579
*Delivery into dummy
@@ -49,31 +83,50 @@ replace facilitydelivery = 0 if m15 == 11 | m15 == 12 | m15 == 96
4983
la var facilitydelivery "delivery in facility"
5084

5185
*Type of hospital
52-
gen hosp_type = cond(m15 == 20 | m15 == 21| m15 == 22 | m15 == 23 | m15 == 26, 1, cond(m15 == 31 | m15 == 32 | m15 == 33 | m15 == 36, 0, .))
53-
la var hosp_type "type of hospital"
86+
gen hosptype = cond(m15 == 20 | m15 == 21| m15 == 22 | m15 == 23 | m15 == 26, 1, cond(m15 == 31 | m15 == 32 | m15 == 33 | m15 == 36, 0, .))
87+
la var hosptype "type of hospital"
5488

5589
* label variables
5690
label define policylbl 0 "before 2013" 1 "after 2013"
5791
label values policy policylbl
5892
label define facilitylbl 0 "home" 1 "facility"
5993
label values facilitydelivery facilitylbl
6094
label define typelbl 0 "public" 1 "private"
61-
label values hosp_type typelbl
95+
label values hosptype typelbl
6296

6397
*Drop if missing height/ weight/ implausible
6498
*drop if hw13 != 0 /*decided not to drop dead children */
6599
foreach var of varlist hw70 - hw72 v440 v444a {
66100
replace `var' = . if `var' == 9996 | `var' == 9997 | `var' == 9998
67101
}
68-
replace v133 = . if v133 == 97
102+
replace education = . if education == 97
69103
replace v715 = . if v715 == 98
70104

71105
/*make height/weight/age into std deviations
72106
foreach var of varlist hw70 - hw72 {
73107
replace `var' = `var'/100
74108
}
109+
110+
* generate child malnutrition
111+
gen cstunted = cond(hw70 < -2, 1, 0, .)
112+
la var cstunted "child stunted"
113+
label values cstunted mstuntedlbl
114+
gen cwasted = cond(hw72 < -2, 1, 0, .)
115+
la var cwasted "child wasted"
116+
label values cwasted mwastedlbl
117+
gen cunderweight = cond(hw71 < -2, 1, 0, .)
118+
label var cunderweight "child underweight"
119+
label values cunderweight munderweightlbl
120+
gen coverweight = cond(hw71 > 2, 1, 0, .)
121+
label var coverweight "child overweight"
122+
label values coverweight moverweightlbl
123+
gen cmalnourished = cond(cstunted == 1, 1, cond(cwasted == 1, 1, cond(cunderweight == 1, 1, 0, .)))
124+
label values cmalnourished malnourishlbl
125+
la var cmalnourished "child malnourished"
126+
127+
75128
*/
76-
save dhs/dhs_births.dta, replace
129+
save dhs/dhsbirths.dta, replace
77130
clear
78131

79132

@@ -82,55 +135,53 @@ clear
82135
****
83136

84137
* Shp to dta for gps location
85-
shp2dta using gps.shp, data(gps_data) coord(gps_coord) genid(gps_id) replace
138+
shp2dta using gps.shp, data(gpsdata) coord(gpscoord) genid(gpsid) replace
86139

87-
use gps_data.dta
88-
ren _all, lower
140+
use gpsdata.dta
141+
ren all, lower
89142
ren dhsclust v001
90143
ren dhsregna province
91144

92-
save gps_data.dta, replace
145+
save gpsdata.dta, replace
93146
clear
94147

95148
****
96149
***Identify subcounty
97150
****
98151

99-
use .data/dhs_births.dta
152+
use .data/dhsbirths.dta
100153

101-
merge m:1 v001 using .data/gps_data.dta
102-
drop if _merge != 3 /*Noone in this regions was eligible */
103-
drop _merge
104-
ren _all, lower
154+
merge m:1 v001 using .data/gpsdata.dta
155+
drop if merge != 3 /*Noone in this regions was eligible */
156+
drop merge
157+
ren all, lower
105158
ren adm1name county
106159

107160
foreach var of varlist county province source {
108161
replace `var'=ustrlower(`var')
109162
}
110163
drop if source == "mis" /*lat and long are 0,0*/
111-
drop dhsid - adm1dhs dhsregco source alt_gps - datum
164+
drop dhsid - adm1dhs dhsregco source altgps - datum
112165

113166
*** Put cluster in subcounty
114-
ren gps_id clusterid
167+
ren gpsid clusterid
115168
la var clusterid "cluster id"
116-
geoinpoly latnum longnum using ./06_gis/subcounty/subcounty/sub_county_coord.dta
117-
drop if missing(_ID)
118-
ren _ID sub_id
169+
geoinpoly latnum longnum using ./06gis/subcounty/subcounty/subcountycoord.dta
170+
drop if missing(ID)
171+
ren ID subid
119172

120173

121174
*** Merge with concentration data
122-
merge m:1 sub_id using .data/subcountymap.dta
123-
drop if _merge != 3 /*this subcounty has no person */
124-
drop _merge
125-
la var sub_id "subcounty id"
175+
merge m:1 subid using .data/subcountymap.dta
176+
drop if merge != 3 /*this subcounty has no person */
177+
drop merge
178+
la var subid "subcounty id"
126179
encode county, gen (dcounty)
127180
la var dcounty "factor variable"
128181
encode subcounty, gen(dsubcounty)
129182
la var dsubcounty "factor variable"
130183
encode province, gen(dprovince)
131184
la var dprovince "factor variable"
132-
save .data/dhs.dta, replace
133-
134-
135185

136186

187+
save .data/dhs.dta, replace

0 commit comments

Comments
 (0)