diff --git a/dataprocessing/python_scripts/household_prognosis_to_zensus_grid.py b/dataprocessing/python_scripts/household_prognosis_to_zensus_grid.py new file mode 100644 index 00000000..97a2c79d --- /dev/null +++ b/dataprocessing/python_scripts/household_prognosis_to_zensus_grid.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Sep 7 13:09:54 2020 + +@author: Gyde +""" + +import pandas as pd +import os +import numpy as np + +path=os.path.join('C:\\','Users','Gyde','Documents','HS_Flensburg','DemandRegio') #lokaler Ablageort + + +# Input: Zensus household dataset including the NUTS3-code +zensus_district_households = pd.read_csv(os.path.join + (path,'Haushalte100m_inkl_nuts.csv'), + engine='python') + +# Input: dataset on household prognosis for a specific year on district-level (NUTS3) + +prognosis_households = pd.read_csv(os.path.join(path,'Haushalte2050.csv')) # source: DemandRedio +# sum up to total number of households +prognosis_households['total']=prognosis_households.sum(axis=1,numeric_only=True) + +# Calculating the future households on zensus level +zensus_households_prognosis=pd.DataFrame() +for nuts in zensus_district_households.nuts.unique(): + subset = zensus_district_households[zensus_district_households.nuts==nuts].copy() + subset['share_of_households']=subset['number']/subset['number'].sum() + total_prognosis=prognosis_households.loc[prognosis_households.nuts3==nuts, + 'total'].values[0] + subset['household_prognosis']=(subset['share_of_households']* + total_prognosis) + subset['household_prognosis_rounded']=subset['household_prognosis'].astype(int) + subset['rest']=subset['household_prognosis']-subset['household_prognosis_rounded'] + # rounding process + while (total_prognosis>subset['household_prognosis_rounded'].sum())==True: + index=np.random.choice(subset.index.values[subset.rest==max(subset.rest)]) + subset.at[index, 'household_prognosis_rounded']+=1 + subset.at[index, 'rest']=0 + zensus_households_prognosis=pd.concat([zensus_households_prognosis,subset]) + +zensus_households_prognosis = zensus_households_prognosis[['grid_id', + 'attribute', + 'nuts', + 'household_prognosis' + '_rounded']] + +# Export dataset + diff --git a/dataprocessing/python_scripts/nuts_code_to_zensus_household_data_set.py b/dataprocessing/python_scripts/nuts_code_to_zensus_household_data_set.py new file mode 100644 index 00000000..07afabf4 --- /dev/null +++ b/dataprocessing/python_scripts/nuts_code_to_zensus_household_data_set.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +""" +Created on Mon Sep 7 12:19:28 2020 + +@author: Gyde +""" + +import os +import pandas as pd + + +def set_nuts_id(df_zensus,df_zensus_nuts): + """ + + Returns the nuts code depending on the id of the zensus square + + + Parameter + --------- + df_zensus_nuts: DataFrame + DataFrame containing the zensus id and the corresponding + nuts code of the district + df_zensus: Zensus + DataFrame including the ID of the zensus square + + Returns + ------ + nuts_code: nuts_code + String + Code of the district + """ + grid_id = df_zensus['grid_id'] + nuts_code = df_zensus_nuts['nuts'][(df_zensus_nuts["grid_id"]==grid_id)].iloc[0] + return nuts_code + +path=os.path.join('C:\\','Users','Gyde','Documents','HS_Flensburg', + 'DemandRegio') #lokaler Ablageort + +zensus_district_population = pd.read_csv(os.path.join + (path,'Zensus_2011_Nuts_01_to_03.csv')) #Example Dataset + +# import original data-set from Zensus2011 as csv +# Available here: https://www.zensus2011.de/DE/Home/Aktuelles/DemografischeGrunddaten.html +# +households_zensus = pd.read_csv(os.path.join(path,'Haushalte100m.csv'), + engine='python') + + +# rename the columns to english +households_zensus.rename(columns={'Gitter_ID_100m':'grid_id', + 'Gitter_ID_100m_neu':'grid_id_new', + 'Merkmal':'attribute', + 'Auspraegung_Code':'domain_code', + 'Auspraegung_Text':'domain_text', + 'Anzahl':'number', + 'Anzahl_q':'number_q'},inplace=True) + + +# Filter household data with all rasters that are in Zensus population dataset +# in order to assign the nuts-code +zensus_district_households = households_zensus[['grid_id', + 'number','attribute']][(households_zensus['grid_id']. + isin(zensus_district_population['grid_id'])) + &(households_zensus['attribute']=='INSGESAMT')].copy() + + +# add column to household dataset that includes the nuts_id for the district +zensus_district_households['nuts']=zensus_district_households.apply(set_nuts_id, + df_zensus_nuts= + zensus_district_population, + axis=1) + +# export dataset +zensus_district_households.to_csv(os.path.join(path,'Haushalte100m_inkl_nuts.csv'),index=False) + + + + + + + + + + + diff --git a/dataprocessing/python_scripts/population_prognosis_to_zensus_grid.py b/dataprocessing/python_scripts/population_prognosis_to_zensus_grid.py new file mode 100644 index 00000000..c9e57edf --- /dev/null +++ b/dataprocessing/python_scripts/population_prognosis_to_zensus_grid.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Jul 30 16:34:04 2020 + +@author: gthomsen +""" + + +import os +import pandas as pd + + +path=os.path.join('C:\\','Users','Gyde','Documents','HS_Flensburg','DemandRegio') #lokaler Ablageort + + +# 1. Input: dataset on population prognosis for a specific year on district-level (NUTS3) + +pop_prognosis = pd.read_csv(os.path.join(path,'Bevölkerung2050.csv')) # source: DemandRedio + +# 2. Input: Zensus2011 population data including the NUTS3-Code for the district +# (created via SQL) + +zensus_district = pd.read_csv(os.path.join(path,'Zensus_2011_Nuts_01_to_03.csv')) #Example Dataset + + +# create two DataFrames with inhabited and empty zensus grids + +zensus_inhabited = zensus_district[zensus_district.population!=-1].copy() +zensus_empty = zensus_district[zensus_district.population==-1].copy() + +# Calculating the future population on zensus level +zensus_inhabited_prognosis=pd.DataFrame() +for nuts in zensus_inhabited.nuts.unique(): + subset = zensus_inhabited[zensus_inhabited.nuts==nuts].copy() + subset['share_of_population']=subset['population']/subset['population'].sum() + subset['population_prognosis']=(subset['share_of_population']* + pop_prognosis.loc + [pop_prognosis.nuts3==nuts,'value'] + .values[0]) + zensus_inhabited_prognosis=pd.concat([zensus_inhabited_prognosis,subset]) + +# Join DataFrames with inhabited and empty zensus grids +zensus_prognosis = pd.concat([zensus_inhabited_prognosis,zensus_empty]) +del zensus_prognosis['population'] +del zensus_prognosis['share_of_population'] +zensus_prognosis['population_prognosis'].fillna(-1,inplace=True) + + + +# export data + + + + + + + + + + + + + \ No newline at end of file diff --git a/dataprocessing/sql_snippets/nuts_code_to_zensus_grid.sql b/dataprocessing/sql_snippets/nuts_code_to_zensus_grid.sql new file mode 100644 index 00000000..9d4aebcb --- /dev/null +++ b/dataprocessing/sql_snippets/nuts_code_to_zensus_grid.sql @@ -0,0 +1,10 @@ + With gemeinden as + (SELECT ags, nuts, ST_Transform(geom,4326) as geom + from boundaries.bkg_vg250_6_gem), + zensus as + (SELECT grid_id, x_mp, y_mp,population, + ST_Transform(geom_point,4326) as zensus_geom, gid + from society.destatis_zensus_population_per_ha) + SELECT gemeinden.*,zensus.* + FROM zensus, gemeinden + WHERE ST_Contains(gemeinden.geom,zensus.zensus_geom);