-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathread_ixi_years.py
211 lines (175 loc) · 9.21 KB
/
read_ixi_years.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 10 16:28:17 2018
@author: DanielAvila, based on Arjan de Koning's code.
README:
1. To run this code, you should have the script in a folder. Within this folder create two new folders > raw & clean.
2. In the "raw" folder, you should unzip EXIOBASE v3.4 years (folder per year), which you can download from https://exiobase.eu/.
3. Run the code.
This code process Exiobase v3.4 (industry by industry/sector by sector) as time series (1995 - 2011).
It saves, if specified, the files in a pickle format.
You will get then as result:
direct_satelliteYEAR.pkl
iotYEAR.pkl
meYEAR.pkl
mmYEAR.pkl
mmb_finalYEAR.pkl
mrYEAR.pkl
mwYEAR.pkl
myYEAR.pkl
satelliteYEAR.pkl
wYEAR.pkl
x_outSYEAR.pkl
YYEAR.pkl
"""
import os
import os.path
import csv
import pickle
import numpy as np
import pandas as pd
import gc
os.getcwd()
years = [1995, 2012]
raw_data_dir = os.path.join('..', 'data', 'raw')
clean_data_dir = os.path.join('..', 'data', 'clean')
variables = ['x_outS', 'mm'] # These variables were specifically named for the Circularity Gap Report for Circular Economy
# the second element in the list can be any of the available extensions in Exiobase, mm = materials
ind_cnt = 163 # Here you can change industry by industry, or sector by sector.
cntr_cnt = 49
def read_file(filename, separated, header, index_col):
with open(filename) as f:
reader = pd.read_csv(f, sep=separated, header=header, index_col=index_col, encoding='utf-8')
reader.astype(float)
return reader
def read_pickle(directory, filename):
""" """
reader = pd.read_pickle(directory + '/' + str(filename) + '.pkl')
return reader
def save_pickle(directory, filename, year, data):
""" """
data.to_pickle(directory + '/' + filename + str(year) + '.pkl')
def process_exiobase(raw_data_dir, clean_data_dir, year, condition=True):
""" """
iot_filename = 'A.txt'
final_demand_filename = 'Y.txt'
satellite_filename = 'F.txt'
direct_satellite_filename = 'F_hh.txt'
ind_cnt = 163 # Here you can change industry by industry, or sector by sector.
cntr_cnt = 49
factor_inputs_index = 23
emissions_index = 446
resources_index = 466
energy_index = 470
materials_index = 910
water_index = 1104
# create canonical filename
full_raw_io_data_dir = os.path.join(raw_data_dir, 'IOT_' + str(year) + '_ixi')
full_raw_ext_data_dir = os.path.join(full_raw_io_data_dir, 'satellite')
full_iot_fn = os.path.join(full_raw_io_data_dir, iot_filename)
full_final_demand_fn = os.path.join(full_raw_io_data_dir, final_demand_filename)
full_satellite_fn = os.path.join(full_raw_ext_data_dir, satellite_filename)
full_direct_satellite_fn = os.path.join(full_raw_ext_data_dir, direct_satellite_filename)
# read files
iot = read_file(full_iot_fn, '\t', [0,1], [0,1]) # A in Exiobase v3.4
final_demands = read_file(full_final_demand_fn, '\t', [0,1], [0,1])
satellite = read_file(full_satellite_fn, '\t', [0,1], [0])
direct_satellite = read_file(full_direct_satellite_fn, '\t', [0,1], [0])
# total output
y_tot = final_demands.sum(1).rename('Ytot', inplace=True)
i = np.eye(ind_cnt * cntr_cnt)
leontief = np.linalg.inv(i - iot)
x = np.dot(leontief, y_tot) # array
x_outS = pd.Series(x, index=iot.index.tolist()).rename('Xtot', inplace=True)
x_out = pd.DataFrame(x)
x_out['region'] = list(iot.index.get_level_values(0))
x_out['sector'] = list(iot.index.get_level_values(1))
# stressors
m = satellite
w = m.iloc[:factor_inputs_index, :] # value added
me = m.iloc[factor_inputs_index:emissions_index, :] # emissions
mr = m.iloc[emissions_index:resources_index, :] # resources (land)
my = m.iloc[resources_index:energy_index, :] # resources (energy)
mm = m.iloc[energy_index:materials_index, :] # resources (materials)
mw = m.iloc[materials_index:water_index, :] # resources (water)
if condition == True:
return iot, final_demands, x_out, x_outS, satellite, direct_satellite, w, me, mr, my, mm, mw
if condition == False:
save_pickle(clean_data_dir, 'iot', year, iot)
save_pickle(clean_data_dir, 'Y', year, final_demands)
save_pickle(clean_data_dir, 'x_outS', year, x_outS)
save_pickle(clean_data_dir, 'satellite', year, satellite)
save_pickle(clean_data_dir, 'direct_satellite', year, direct_satellite)
save_pickle(clean_data_dir, 'w', year, w)
save_pickle(clean_data_dir, 'me', year, me)
save_pickle(clean_data_dir, 'mr', year, mr)
save_pickle(clean_data_dir, 'my', year, my)
save_pickle(clean_data_dir, 'mm', year, mm)
save_pickle(clean_data_dir, 'mw', year, mw)
def resources_impact(data_output, data_resources, cntr_cnt, ind_cnt, clean_data_dir, year, condition=True):
""" This function reads for a given year:
- the total output (data_output)
- the environmental extension (material_resources)
- the country and sector/industry count (cntr_cnt & ind_cnt)
It returns the:
- the environmental coefficient (mmb)
- the environmental requirement per industry/sector (mmb_final)"""
inv_x = 1/data_output # Inverse matrix to calculate A, A = technical coefficient matrix
inv_x[inv_x == np.inf] =0 # It cleans all "inf" values and convert its to "0"
inv_x = np.diag(inv_x) # Diagonal of the previous matrix
# Total (grouped) resources, based on intermediate resources (kilo tonnes)
mmt = pd.DataFrame(data_resources.sum(0)).T # Sums all rows and transpose it to a column
mmi = data_resources.div(mmt.iloc[0], axis='columns') # It divide each row by the total
mmi.fillna(0, inplace=True)
mmt = np.reshape(np.array(mmt), (cntr_cnt*ind_cnt, 1)) # It reshapes the array
# Material coefficient (kt/M EUR), based on intermediate resources
mmb= pd.DataFrame(np.transpose(np.dot(np.transpose(mmt), inv_x)), index=None, columns=['Value'])
mmb_list = mmb['Value'].tolist()
mmb_final = mmi.mul(mmb_list, axis=1)
if condition == True:
return mmb, mmb_final
if condition == False:
save_pickle(clean_data_dir, 'mmb', year, mmb)
save_pickle(clean_data_dir, 'mmb_final', year, mmb_final)
def extension_impact(data_output, data_resources, cntr_cnt, ind_cnt, clean_data_dir, year, lab='', lab2='',
condition=True):
""" This function reads for a given year:
- the total output (data_output)
- the environmental extension (any)
- the country and sector/industry count (cntr_cnt & ind_cnt)
It returns the:
- the environmental coefficient vector (mxb)
- the environmental requirement per industry/sector matrix (mxb_final)"""
inv_x = 1/data_output # Inverse matrix to calculate A, A = technical coefficient matrix
inv_x[inv_x == np.inf] =0 # It cleans all "inf" values and convert its to "0"
inv_x = np.diag(inv_x) # Diagonal of the previous matrix
# Total (grouped) resources, based on intermediate resources (units)
met = pd.DataFrame(data_resources.sum(0)).T # Sums all rows and transpose it to a column
mei = data_resources.div(met.iloc[0], axis='columns') # It divide each row by the total
mei.fillna(0, inplace=True)
met = np.reshape(np.array(met), (cntr_cnt*ind_cnt, 1)) # It reshapes the array
# Extension coefficient vector (unit/M EUR), based on intermediate resources
meb= pd.DataFrame(np.transpose(np.dot(np.transpose(met), inv_x)), index=None, columns=['Value'])
meb_list = meb['Value'].tolist()
meb_final = mei.mul(meb_list, axis=1) # Extension coefficient matrix (unit/ M EUR), based on intermediate resources
if condition == True:
return meb, meb_final
if condition == False:
save_pickle(clean_data_dir, lab, year, meb)
save_pickle(clean_data_dir, lab2, year, meb_final)
def file_names(yearOne, yearTwo, variables):
""" This function creates the filenames to use for the processed files (as output)."""
filenames = []
for i in np.arange(yearOne, yearTwo):
file_x, file_mm = variables[0] + str(i) , variables[1] + str(i)
filenames.append([file_x, file_mm])
return filenames
def time_Series(yearOne, yearTwo, raw_data_dir, clean_data_dir, filenames, cntr_cnt, ind_cnt):
""" This function process all EXIOBASE as time series (from any given year to any other given year)."""
for i, j in enumerate(np.arange(yearOne, yearTwo)):
process_exiobase(raw_data_dir, clean_data_dir, j, condition=False)
x_out = read_pickle(clean_data_dir,filenames[i][0])
mm = read_pickle(clean_data_dir,filenames[i][1])
resources_impact(x_out, mm, cntr_cnt, ind_cnt, clean_data_dir, j, condition=False)
filenames = file_names(years[0], years[1], variables)
time_Series(1995, 2012, raw_data_dir, clean_data_dir, filenames, cntr_cnt, ind_cnt)