-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhom_eda_2.py
117 lines (90 loc) · 4.93 KB
/
hom_eda_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jan 1 15:31:30 2023
@author: polinarozhkova
"""
import pandas as pd
import os
import seaborn as sns
import matplotlib.pyplot as plt
path = r'/Users/polinarozhkova/Desktop/GitHub/cr_eda_chicago/'
fname = 'inputs/FOIA_2019_to_2021_Clearance_Rates_Shooting_Homicides.xlsx'
foia_hom = pd.read_excel((os.path.join(path, fname)), sheet_name=2)
final_merge_df = pd.read_csv(os.path.join(path, 'clean_data/merge_all.csv'))
cr_reports = pd.read_excel(os.path.join(path, 'inputs/CR_from_CPD_Annual_Reports_copy.xlsx'))
def load_foia_hom(df):
df_clean = df.rename(columns={'RD': 'case_number', 'HOMICIDE ID': 'id',
'INJURY DATE': 'date', 'INJURY DESCRIPTION': 'injury_type',
'DATE CLEARED': 'date_clear'})
df_clean.columns = df_clean.columns.str.lower()
df_clean = df_clean[df_clean['date'].dt.year < 2022]
df_clean.columns = df_clean.columns.str.lower()
df_clean['time_to_clear'] = df_clean['date_clear'] - df_clean['date']
df_clean['year'] = df_clean['date'].dt.year
df_clean['year_cleared'] = df_clean['date_clear'].dt.year
return df_clean
def formal_clearance(df):
df_new = df[(df['date'].dt.year >= 2001) & (df['date'].dt.year < 2022)]
df_new['time_to_clear'] = df_new['date_clear'] - df_new['date']
return df_new
foia_df = load_foia_hom(foia_hom)
# exceptional clearances 2019 - 2021
hom_year_df = pd.DataFrame(
foia_df.groupby(['year_cleared', 'year', 'cleared'])['case_number'].count()).reset_index()
clear_year_df = pd.DataFrame(
foia_df.groupby(['year_cleared', 'cleared'])['case_number'].count()).reset_index()
except_clear_year = pd.DataFrame(
foia_df.groupby(['year_cleared', 'cleared', 'cleared exceptionally'])
['case_number'].count()).reset_index()
# Cases and their clearance status
# This dataset does not include incidents that counted towards clearance rates prior to 2019
all_cleared_df = final_merge_df[final_merge_df['cleared'] == 'Y']
# race by yr
race_grouped = pd.DataFrame(final_merge_df.groupby(['year', 'race'])
['case_number'].count()).reset_index()
race_grouped
# race and sex by year
sex_grouped = pd.DataFrame(final_merge_df.groupby(['year', 'sex', 'race'])
['case_number'].count()).reset_index()
sex_grouped
sex_grouped = sex_grouped.rename(columns={'case_number': 'num_occur'})
black_fem = final_merge_df[(final_merge_df['race'] == 'BLK')
& (final_merge_df['sex'] == 'F')]
black_fem_counts = pd.DataFrame(black_fem.groupby(['cleared'])
['case_number'].count()).reset_index()
white_fem = final_merge_df[(final_merge_df['race'] == 'WHI')
& (final_merge_df['sex'] == 'F')]
white_fem_counts = pd.DataFrame(white_fem.groupby(['cleared'])
['case_number'].count()).reset_index()
white_male = final_merge_df[(final_merge_df['race'] == 'WHI')
& (final_merge_df['sex'] == 'M')]
white_male_counts = pd.DataFrame(white_male.groupby(['cleared', 'year_cleared'])
['case_number'].count()).reset_index()
black_male = final_merge_df[(final_merge_df['race'] == 'BLK')
& (final_merge_df['sex'] == 'M')]
blk_male_counts = pd.DataFrame(black_male.groupby(['cleared', 'year_cleared'])
['case_number'].count()).reset_index()
wwh_male = final_merge_df[(final_merge_df['race'] == 'WWH')
& (final_merge_df['sex'] == 'M')]
wwh_male_counts = pd.DataFrame(wwh_male.groupby(['cleared', 'year_cleared'])
['case_number'].count()).reset_index()
wwh_fem = final_merge_df[(final_merge_df['race'] == 'WWH')
& (final_merge_df['sex'] == 'F')]
wwh_fem_counts = pd.DataFrame(wwh_fem.groupby(['cleared', 'year_cleared'])
['case_number'].count()).reset_index()
api_fem = final_merge_df[(final_merge_df['race'] == 'API')
& (final_merge_df['sex'] == 'F')]
api_fem_counts = pd.DataFrame(api_fem.groupby(['cleared', 'year_cleared'])
['case_number'].count()).reset_index()
api_male = final_merge_df[(final_merge_df['race'] == 'API')
& (final_merge_df['sex'] == 'M')]
api_male_counts = pd.DataFrame(api_male.groupby(['cleared', 'year_cleared'])
['case_number'].count()).reset_index()
# race cleared by year -- incidents occurring between 2001 and 2021
race_cleared_grouped = pd.DataFrame(all_cleared_df.groupby(['year_cleared', 'year', 'race'])
['case_number'].count()).reset_index()
race_cleared_grouped
sex_cleared_grouped = pd.DataFrame(all_cleared_df.groupby(['year_cleared', 'year', 'sex', 'race'])
['case_number'].count()).reset_index()
sex_cleared_grouped