Skip to content

Commit

Permalink
added dataseeder and run shell script
Browse files Browse the repository at this point in the history
  • Loading branch information
Amanda Eames authored and Amanda Eames committed Apr 29, 2024
1 parent df48b95 commit 3bc97d1
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 30 deletions.
73 changes: 73 additions & 0 deletions global-api/import_argentiniandatasets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
#!/bin/bash

if command -v python3 &>/dev/null; then
python_cmd=python3
else
python_cmd=python
fi

export PGPASSWORD=$CC_GLOBAL_API_DB_PASSWORD
export DB_URI="postgresql://$CC_GLOBAL_API_DB_USER:$CC_GLOBAL_API_DB_PASSWORD@$CC_GLOBAL_API_DB_HOST/$CC_GLOBAL_API_DB_NAME"

# export DB_URI="postgresql://ccglobal:@localhost/ccglobal"
# export CC_GLOBAL_API_DB_HOST="localhost"
# export CC_GLOBAL_API_DB_USER="ccglobal"
# export CC_GLOBAL_API_DB_NAME="ccglobal"

# Argentinian



































# load cammesa

pushd importer/argentinian_datasets/cammesa/

$python_cmd ./transformation_cammesa.py --filepath ./ --database_uri $DB_URI

psql -h $CC_GLOBAL_API_DB_HOST \
-U $CC_GLOBAL_API_DB_USER \
-d $CC_GLOBAL_API_DB_NAME \
-f load_cammesa.sql

popd

# Import datasources

pushd importer/datasource_seeder
psql -h $CC_GLOBAL_API_DB_HOST \
-U $CC_GLOBAL_API_DB_USER \
-d $CC_GLOBAL_API_DB_NAME \
-f ./import_datasource_seeder.sql
popd
55 changes: 26 additions & 29 deletions global-api/importer/argentinian_datasets/cammesa/load_cammesa.sql
Original file line number Diff line number Diff line change
@@ -1,33 +1,30 @@
-- Create a staging table
CREATE TEMP TABLE IF NOT EXISTS region_code_staging (LIKE regionwide_emissions INCLUDING ALL);

-- Clear the staging table
TRUNCATE region_code_staging;

-- Load the staging table from the downloaded file
\copy region_code_staging (id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units) FROM 'processed_cammesa_AR.csv' WITH (FORMAT CSV, HEADER);
-- The ID column is not unique based on the processed records,
-- we have multiple acitivty records for single region_code, year, gas_name, GPC_refno
-- rather than upsert we will just delete existing source data and insert fresh with generated id to make record unique
-- the route for regions will need to be aggregated over region_code, year, gas_name, GPC_refno to get accurate emissions values
DELETE FROM regionwide_emissions WHERE source_name = 'cammesa';

-- Update the main table with the staging table
INSERT INTO regionwide_emissions (id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units)
SELECT id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units
FROM region_code_staging
ON CONFLICT ON CONSTRAINT regionwide_emissions_pkey
DO UPDATE SET
id = excluded.id,
source_name = excluded.source_name,
"GPC_refno" = excluded."GPC_refno",
region_name = excluded.region_name,
region_code = excluded.region_code,
temporal_granularity = excluded.temporal_granularity,
year = excluded.year,
activity_name = excluded.activity_name,
activity_value = excluded.activity_value,
activity_units = excluded.activity_units,
gas_name = excluded.gas_name,
emission_factor_value = excluded.emission_factor_value,
emission_factor_units = excluded.emission_factor_units,
emissions_value = excluded.emissions_value,
emissions_units = excluded.emissions_units;
INSERT INTO regionwide_emissions (
id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,
activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units
)
SELECT gen_random_uuid() as id,
source_name,
"GPC_refno",
region_name,
region_code,
temporal_granularity,
year,
activity_name,
activity_value,
activity_units,
gas_name,
emission_factor_value,
emission_factor_units,
emissions_value,
emissions_units
FROM cammesa_region_emissions_staging;

-- Drop the staging table
DROP TABLE region_code_staging;
DROP TABLE cammesa_region_emissions_staging;
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import argparse
import uuid
import os
from sqlalchemy import create_engine

def uuid_generate_v3(name, namespace=uuid.NAMESPACE_OID):
"""generate a version 3 UUID from namespace and name"""
Expand All @@ -12,6 +13,11 @@ def uuid_generate_v3(name, namespace=uuid.NAMESPACE_OID):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--filepath", help="path to the files location", required=True)
parser.add_argument(
"--database_uri",
help="database URI (e.g. postgresql://ccglobal:@localhost/ccglobal)",
default=os.environ.get("DB_URI"),
)
args = parser.parse_args()
absolute_path = os.path.abspath(args.filepath)

Expand Down Expand Up @@ -123,4 +129,10 @@ def generate_uuid(row):
'activity_units', 'gas_name', 'emission_factor_value', 'emission_factor_units', 'emissions_value', 'emissions_units']
df = df.reindex(columns=col_order)

df.to_csv(f'{absolute_path}/processed_cammesa_AR.csv', sep=",", decimal=".", index=False)
#df.to_csv(f'{absolute_path}/processed_cammesa_AR.csv', sep=",", decimal=".", index=False)

# Create a SQLAlchemy engine
engine = create_engine(args.database_uri)

# Write the DataFrame to the database table
df.to_sql('cammesa_region_emissions_staging', engine, if_exists='replace', index=False)
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ de8dc6b3-6c78-4fc7-9b4a-df24a2326634,Google EIE,Google Environmental Insights Ex
fdf77b4a-5fb8-4b33-92b5-07b92f839c9b,Carbon Monitor,Carbon Monitor Cities,Carbon Monitor Cities Residential Energy,Estimation of residential energy emissions from Carbon Monitor. Carbon Monitor Cities is a global initiative to provide real-time and historical data on CO2 emissions from cities around the world.,third_party,public,https://carbonmonitor.org/,EARTH,2019,2021,2022,annual,city,en,,medium,,kg,The data is based on satellite observations of CO2 concentrations and a data-driven model to estimate emissions.,https://carbonmonitor.org/,Emissions data are matched to cities by name and ISO code for the region.,global_api,https://ccglobal.openearth.dev/api/v0/source/Carbon Monitor Cities/city/:locode/:year/:gpcReferenceNumber,I.1.1,1
e2143a90-0e5f-48fa-9a1d-85505f90b95f,Carbon Monitor,Carbon Monitor Cities,Carbon Monitor Cities On-Road Transportation,Estimation of on-road transportation emissions from Carbon Monitor. Carbon Monitor Cities is a global initiative to provide real-time and historical data on CO2 emissions from cities around the world.,third_party,public,https://carbonmonitor.org/,EARTH,2019,2021,2022,annual,city,en,,medium,,kg,The data is based on satellite observations of CO2 concentrations and a data-driven model to estimate emissions.,https://carbonmonitor.org/,Emissions data are matched to cities by name and ISO code for the region.,global_api,https://ccglobal.openearth.dev/api/v0/source/Carbon Monitor Cities/city/:locode/:year/:gpcReferenceNumber,II.1.1,1
1007a979-3c3c-4115-b61a-c85e3e39b165,Carbon Monitor,Carbon Monitor Cities,Carbon Monitor Cities Aviation,Estimation of aviation emissions from Carbon Monitor. Carbon Monitor Cities is a global initiative to provide real-time and historical data on CO2 emissions from cities around the world.,third_party,public,https://carbonmonitor.org/,EARTH,2019,2021,2022,annual,city,en,,medium,,kg,The data is based on satellite observations of CO2 concentrations and a data-driven model to estimate emissions.,https://carbonmonitor.org/,Emissions data are matched to cities by name and ISO code for the region.,global_api,https://ccglobal.openearth.dev/api/v0/source/Carbon Monitor Cities/city/:locode/:year/:gpcReferenceNumber,II.4.1,1
e81bb333-d0a0-4621-b15f-f6f0012c2a5e,cammesa,CAMMESA,Annual electricity generation in power plants by province,Local data of energy generation by power plants in Argentina,Third-party,Public,https://cammesaweb.cammesa.com/download/factor-de-emision/,AR,2020,2023,2023,annual,region,es,,high,,kg,"The report contains the behavior of the main physical and economic variables of the MEM throughout the month of analysis and its comparison with previous months; Among the variables, electricity demand, energy supply, installed power, generation, fuel consumption, energy costs and prices stand out.",https://cammesaweb.cammesa.com/informes-y-estadisticas/,"The raw data was adapted to our Global API database schema and we use population as scaling method whenever is needed. Depending on the source availability and documentation, this information can be more or less desegregated.",global_api_downscaled_by_population,https://ccglobal.openearth.dev/api/v0/source/CAMMESA/region/:region/:year/:gpcReferenceNumber,I.4.4,1

0 comments on commit 3bc97d1

Please sign in to comment.