-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathregression_analysis.do
154 lines (80 loc) · 3.86 KB
/
regression_analysis.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
clear
set more off
// covid 19 prison release policies and county population rate of infection
cd "/Users/`c(username)'/Box/Rachel's Personal Folder/covid19/MIT_COVID-19_Datathon"
// import prison data by county
import delimited "prison_county_df.csv", encoding(ISO-8859-1)
*** (1) Clean prison data by county for merge with county demographics
drop v1
// pull out county name and state name
generate split = strpos(countystate,", ")
generate county = ""
replace county = substr(countystate,1,split - 1)
generate state = ""
replace state = substr(countystate, split + 1, .)
drop split
// create a date variable
gen stata_date = date(date, "YMD")
// deal with missing values
replace frac_released = "0" if frac_released == "NA"
destring frac_released, replace // 77066 missing values
// clean population data
destring census2010pop, force replace // 6217 missing values
sort county state
save "prison_county_df.dta", replace
*** (2) Clean county demographic data for merge
clear
// import demographic data
import delimited "county_demographics_df.csv", encoding(ISO-8859-1)
drop v1
destring land_area, force replace // 302 missing
destring population_density, force replace // 302 missing
gen county_lower = strlower(county)
replace county = county_lower
drop county_lower
sort county state
save "county_demographics_df.dta", replace
*** (3) Merge prison data by county and county demographic data
clear
use "prison_county_df.dta"
merge m:m county using "county_demographics_df"
tab _merge
rename _merge county_merge
save "prison_county_demographics_df.dta", replace
*** (4) General linear models
clear
use "prison_county_demographics_df.dta"
tabmiss
gen log_diff_cases = log(diff_cases)
gen log_frac_released = log(frac_released)
reg log_diff_cases log_frac_released, robust // n = 13,720; r-sq = 0.0050; B1 = -.07821 (p<0.001)
reg log_diff_cases log_frac_released female_perc black_pop_perc asian_pop_perc hisp_pop_perc population_density, ro
// n = 13,319; r-sq = 0.2080; B1 = -.1205477 (p<0.001)
reg log_diff_cases log_frac_released female_perc black_pop_perc asian_pop_perc hisp_pop_perc population_density highrisk_agegroup_perc land_area age_55to64_perc age_65to74_perc age_75to84_perc, ro
// n = 13,319; r-sq = 0.2080; B1 = -.1205477 (p<0.001)
**** (5). Output for reporting
clear
import delimited "/Users/rachelrosenberg/Box/Rachel's Personal Folder/covid19/MIT_COVID-19_Datathon/prison_county_df.csv"
destring frac_released, force replace
destring population_density, force replace
destring tot_pop, force replace
destring prison_capacity, force replace
destring prison_pop, force replace
destring highrisk_agegroup_perc, force replace
// OLS regression
glm diff_cases prison_county population_density tot_pop, ro
outreg2 using "Correlation_counties.xls", dec(3) replace excel
glm diff_cases frac_released population_density tot_pop if prison_county==1, ro
outreg2 using "Output.xls", dec(3) replace excel
glm diff_cases frac_released population_density tot_pop prison_pop if prison_county==1, ro
outreg2 using "Output.xls", dec(3) append excel
glm diff_cases frac_released population_density tot_pop prison_pop highrisk_agegroup_perc if prison_county==1, ro
outreg2 using "Output.xls", dec(3) append excel
quietly reg log_diff_cases log_frac_released, ro
outreg2 using "Output_log.xls", dec(3) replace excel
quietly reg log_diff_cases log_frac_released, ro
outreg2 using "Output_log.xls", dec(3) replace excel
quietly reg log_diff_cases log_frac_released female_perc black_pop_perc asian_pop_perc hisp_pop_perc population_density, ro
outreg2 using "Output_log.xls", dec(3) append excel
quietly reg log_diff_cases log_frac_released female_perc black_pop_perc asian_pop_perc hisp_pop_perc population_density highrisk_agegroup_perc land_area age_55to64_perc age_65to74_perc age_75to84_perc, ro
outreg2 using "Output_log.xls", dec(3) append excel