-
Notifications
You must be signed in to change notification settings - Fork 3
/
qualityChecks.do
199 lines (149 loc) · 5.67 KB
/
qualityChecks.do
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
************************************************************************************************************************
* Program qualityChecks.do
* Create control totals and other tabulations to assist with ETL and variable preparation quality control
************************************************************************************************************************
************************************************************************************************************************
* Checks for etlCz.do
************************************************************************************************************************
use czAssembled, clear
preserve
* Check IPUMS geo entity counts by year
tabulate year
* Check cz counts by year
collapse (count) cz_mergevar, by(czone year)
tabulate year
* Check that afactor sums to ~1 for every IPUMS entity (results should be blank)
restore
collapse (sum) afactor, by(year cz_mergevar)
list if afactor < .99
************************************************************************************************************************
* Checks for etlOcc.do
************************************************************************************************************************
use occ1990ddAssembled, clear
preserve
* Check occupation count by year
collapse (count) occ1990dd, by (year occ)
tabulate year
* Check balanced occupation count by year
restore
collapse (count) occ, by(year occ1990dd)
tabulate year
************************************************************************************************************************
* Checks for etlIpumsCzOcc.do
************************************************************************************************************************
use master, clear
* Individual record count by year
tabulate year
* Make sure serial numbers aren't ever duplicated more than ~20 times
duplicates report serial
* Examine group quarters type distribution
tabulate year gqtype
* Examine sex distribution
tabulate year sex
* Examine state distribution
hist statefip
* Examine grade distribution
tabulate higrade year
tabulate educ year
tabulate college year
* Examine employment distribution
tabulate empstat
tabulate classwkr
hist occ
hist ind1990
* Examine labor effort distribution
hist wkswork1
hist wkswork2
hist uhrswork
hist hrswork2
* Examine income distribution
hist incwage
************************************************************************************************************************
* Checks for etlImputeClean.do
************************************************************************************************************************
use masterImputedFiltered, clear
* Individual record count by year
tabulate year
* Make sure serial numbers aren't ever duplicated more than ~20 times
duplicates report serial
* Examine group quarters type distribution
tabulate year gqtype
* Examine sex distribution
tabulate year sex
* Examine state distribution
hist statefip
* Examine grade distribution
tabulate higrade year
tabulate educ year
tabulate college year
* Examine employment distribution
tabulate empstat
tabulate classwkr
hist occ
hist ind1990
* Examine imputed labor distributions
hist hourlyWage
hist hrswork
* Examine prepared variable distributions
hist task_abstract
hist task_routine
hist task_manual
hist task_offshorability
hist d_rpc
************************************************************************************************************************
* Checks for occPercentiles.do
************************************************************************************************************************
use occ1990ddWagePercentiles, clear
hist percentile
hist percentileFraction
use occWagePercentiles, clear
hist percentile
hist percentileFraction
************************************************************************************************************************
* Checks for occRti.do
************************************************************************************************************************
use occ1990ddRti, clear
tabulate rtiIntensive
hist rti
hist task_routine
hist task_manual
hist task_abstract
hist weightedLabor
use occRti, clear
tabulate rtiIntensive
hist rti
hist task_routine
hist task_manual
hist task_abstract
hist weightedLabor
************************************************************************************************************************
* Checks for czPopulation.do
************************************************************************************************************************
use czPopulation_all, clear
hist population
use czPopulation_college, clear
hist population
use czPopulation_noCollege, clear
hist population
************************************************************************************************************************
* Checks for czPopulation.do
************************************************************************************************************************
use czState, clear
tabulate statefip
************************************************************************************************************************
* Checks for czRtiShares.do
************************************************************************************************************************
use czRti_all, clear
hist rtiShare
tabulate highRtiShare
use czRti_college, clear
hist rtiShare
tabulate highRtiShare
use czRti_noCollege, clear
hist rtiShare
tabulate highRtiShare
************************************************************************************************************************
* Checks for czIV.do
************************************************************************************************************************
use czRtiIV, clear
hist rtiShareIV