-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathgenerate_import_shares.py
161 lines (135 loc) · 6.51 KB
/
generate_import_shares.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
Generates import shares (fractions of imports by commodity and country)
"""
from pathlib import Path
import pandas as pd
from download_imports_data import get_imports_data
conPath = Path(__file__).parent / 'concordances'
out_Path = Path(__file__).parent / 'output'
single_country_regions = ('CA', 'MX', 'JP', 'CN')
def generate_import_shares(year, schema):
useeio_corr = get_detail_to_summary_useeio_concordance(schema=schema)
# Country imports by detail sector
imports = get_imports_data(year=year, schema=schema)
if len(imports.query('`Import Quantity` <0')) > 0:
print('WARNING: negative import values...')
imports = imports.query('`Import Quantity` >= 0').reset_index(drop=True)
if sum(imports.duplicated(['Country', 'BEA Detail'])) > 0:
print('Error calculating country coefficients by detail sector')
elec = get_electricity_imports(year)
imports = pd.concat([imports, elec],ignore_index=True)
imports = imports.merge(useeio_corr, how='left', on='BEA Detail')
imports = map_countries_to_regions(imports)
imports = calc_contribution_coefficients(imports, schema=schema)
## ^^ Country contribution coefficients by sector
imports.to_csv(out_Path / f'import_shares_{year}.csv', index=False)
def get_detail_to_summary_useeio_concordance(schema=2012):
'''
Opens crosswalk between BEA (summary & detail) and USEEIO (with and
without waste disaggregation) sectors. USEEIO Detail with Waste Disagg
and corresponding summary-level codes.
'''
path = conPath / 'useeio_internal_concordance.csv'
u_cc = (pd.read_csv(path, dtype=str)
.rename(columns={f'USEEIO_Detail_{schema}': 'BEA Detail',
'BEA_Summary': 'BEA Summary'})
)
u_c = u_cc[['BEA Detail','BEA Summary']]
u_c = u_c.drop_duplicates()
return u_c
def get_electricity_imports(year):
url = 'https://www.eia.gov/electricity/annual/xls/epa_02_14.xlsx'
sheet = 'epa_02_14'
c_map = {'Mexico':'MX','Canada':'CA'}
df = pd.read_excel(url, sheet_name=sheet,usecols=[0,1,3],
skiprows=[0,1,2,], skipfooter=1)
df.columns.values[1] = 'Canada'
df.columns.values[2] = 'Mexico'
df['Year'] = df['Year'].astype(int)
df_y = df.loc[df['Year']==year]
df_y = pd.melt(df_y, id_vars=['Year'],value_vars=['Mexico','Canada'])
df_y = df_y.rename(columns={'variable':'Country',
'value':'Import Quantity'})
df_y = (df_y.assign(Unit='MWh')
.assign(BEADetail='221100')
.assign(Source='EIA')
.rename(columns={'BEADetail':'BEA Detail'}))
df_y['CountryCode'] = df_y['Country'].map(c_map)
elec = df_y.filter(['BEA Detail', 'Year', 'Import Quantity', 'Unit',
'Source', 'Country'])
elec['Year']=elec['Year'].astype(str)
return elec
def map_countries_to_regions(df):
path = conPath / 'country_to_region_concordance.csv'
regions = (pd.read_csv(path, dtype=str,
usecols=['Country', 'Region'])
)
df = df.merge(regions, on='Country', how='left', validate='m:1')
missing = (set(df[df.isnull().any(axis=1)]['Country'])
- set(regions['Country']))
if len(missing) > 0:
print(f'WARNING: missing countries in correspondence: {missing}')
return df.dropna(subset='Region').reset_index(drop=True)
def calc_contribution_coefficients(df, schema=2012):
'''
Appends contribution coefficients to prepared dataframe.
'''
df = calc_coefficients_bea_detail(df)
df = calc_coefficients_bea_summary(df)
if not(df['cntry_cntrb_to_region_summary'].between(0,1).all() &
df['cntry_cntrb_to_region_detail'].between(0,1).all() &
df['cntry_cntrb_to_national_summary'].between(0,1).all() &
df['cntry_cntrb_to_national_detail'].between(0,1).all()):
print('ERROR: Check contribution values outside of [0-1]')
return df
def calc_coefficients_bea_summary(df):
'''
Calculate the fractional contributions of each country
to total imports by summary sector
'''
df['cntry_cntrb_to_national_summary'] =(df['Import Quantity']/
df.groupby(['BEA Summary'])
['Import Quantity']
.transform('sum'))
df['cntry_cntrb_to_region_summary'] = (df['Import Quantity']/
df.groupby(['Region',
'BEA Summary'])
['Import Quantity']
.transform('sum'))
## If no imports identified for summary code,
## where the country == region, set contribution to 1
df.loc[(df['cntry_cntrb_to_region_summary'].isna() &
(df['Region'].isin(single_country_regions))),
'cntry_cntrb_to_region_summary'] = 1
if (df['cntry_cntrb_to_region_summary'].isnull().sum()) > 0:
print('WARNING: some summary sectors missing contributions')
return df
def calc_coefficients_bea_detail(df):
'''
Calculate the fractional contributions of each country
to total imports by detail sector
'''
df['cntry_cntrb_to_national_detail'] = (df['Import Quantity']/
df.groupby(['BEA Detail'])
['Import Quantity']
.transform('sum'))
df['cntry_cntrb_to_region_detail'] = (df['Import Quantity']/
df.groupby(['Region',
'BEA Detail'])
['Import Quantity']
.transform('sum'))
## If no imports identified for detail code,
## where the country == region, set contribution to 1
## where country != region, set contribution to detail equal for all countries
df.loc[(df['cntry_cntrb_to_region_detail'].isna() &
(df['Region'].isin(single_country_regions))),
'cntry_cntrb_to_region_detail'] = 1
df.loc[(df['cntry_cntrb_to_region_detail'].isna() &
(~df['Region'].isin(single_country_regions))),
'cntry_cntrb_to_region_detail'] = (
1 / df.groupby(['Region', 'BEA Detail'])
['Country'].transform('count'))
return df
#%%
if __name__ == '__main__':
generate_import_shares(year = 2019, schema = 2017)