From 3bc97d1c5b0903be59cda4f113a248bf70c7d0e5 Mon Sep 17 00:00:00 2001 From: Amanda Eames Date: Tue, 30 Apr 2024 09:56:27 +1000 Subject: [PATCH] added dataseeder and run shell script --- global-api/import_argentiniandatasets.sh | 73 +++++++++++++++++++ .../cammesa/load_cammesa.sql | 55 +++++++------- .../cammesa/transformation_cammesa.py | 14 +++- .../datasource_seeder/datasource_seeder.csv | 1 + 4 files changed, 113 insertions(+), 30 deletions(-) create mode 100755 global-api/import_argentiniandatasets.sh diff --git a/global-api/import_argentiniandatasets.sh b/global-api/import_argentiniandatasets.sh new file mode 100755 index 000000000..ffbac4b4e --- /dev/null +++ b/global-api/import_argentiniandatasets.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +if command -v python3 &>/dev/null; then + python_cmd=python3 +else + python_cmd=python +fi + +export PGPASSWORD=$CC_GLOBAL_API_DB_PASSWORD +export DB_URI="postgresql://$CC_GLOBAL_API_DB_USER:$CC_GLOBAL_API_DB_PASSWORD@$CC_GLOBAL_API_DB_HOST/$CC_GLOBAL_API_DB_NAME" + +# export DB_URI="postgresql://ccglobal:@localhost/ccglobal" +# export CC_GLOBAL_API_DB_HOST="localhost" +# export CC_GLOBAL_API_DB_USER="ccglobal" +# export CC_GLOBAL_API_DB_NAME="ccglobal" + +# Argentinian + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +# load cammesa + +pushd importer/argentinian_datasets/cammesa/ + +$python_cmd ./transformation_cammesa.py --filepath ./ --database_uri $DB_URI + +psql -h $CC_GLOBAL_API_DB_HOST \ + -U $CC_GLOBAL_API_DB_USER \ + -d $CC_GLOBAL_API_DB_NAME \ + -f load_cammesa.sql + +popd + +# Import datasources + +pushd importer/datasource_seeder +psql -h $CC_GLOBAL_API_DB_HOST \ + -U $CC_GLOBAL_API_DB_USER \ + -d $CC_GLOBAL_API_DB_NAME \ + -f ./import_datasource_seeder.sql +popd \ No newline at end of file diff --git a/global-api/importer/argentinian_datasets/cammesa/load_cammesa.sql b/global-api/importer/argentinian_datasets/cammesa/load_cammesa.sql index d673b844d..0b005d109 100644 --- a/global-api/importer/argentinian_datasets/cammesa/load_cammesa.sql +++ b/global-api/importer/argentinian_datasets/cammesa/load_cammesa.sql @@ -1,33 +1,30 @@ --- Create a staging table -CREATE TEMP TABLE IF NOT EXISTS region_code_staging (LIKE regionwide_emissions INCLUDING ALL); - --- Clear the staging table -TRUNCATE region_code_staging; - --- Load the staging table from the downloaded file -\copy region_code_staging (id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units) FROM 'processed_cammesa_AR.csv' WITH (FORMAT CSV, HEADER); +-- The ID column is not unique based on the processed records, +-- we have multiple acitivty records for single region_code, year, gas_name, GPC_refno +-- rather than upsert we will just delete existing source data and insert fresh with generated id to make record unique +-- the route for regions will need to be aggregated over region_code, year, gas_name, GPC_refno to get accurate emissions values +DELETE FROM regionwide_emissions WHERE source_name = 'cammesa'; -- Update the main table with the staging table -INSERT INTO regionwide_emissions (id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units) - SELECT id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value,activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units - FROM region_code_staging - ON CONFLICT ON CONSTRAINT regionwide_emissions_pkey - DO UPDATE SET - id = excluded.id, - source_name = excluded.source_name, - "GPC_refno" = excluded."GPC_refno", - region_name = excluded.region_name, - region_code = excluded.region_code, - temporal_granularity = excluded.temporal_granularity, - year = excluded.year, - activity_name = excluded.activity_name, - activity_value = excluded.activity_value, - activity_units = excluded.activity_units, - gas_name = excluded.gas_name, - emission_factor_value = excluded.emission_factor_value, - emission_factor_units = excluded.emission_factor_units, - emissions_value = excluded.emissions_value, - emissions_units = excluded.emissions_units; +INSERT INTO regionwide_emissions ( + id,source_name,"GPC_refno",region_name,region_code,temporal_granularity,year,activity_name,activity_value, + activity_units,gas_name,emission_factor_value,emission_factor_units,emissions_value,emissions_units + ) +SELECT gen_random_uuid() as id, + source_name, + "GPC_refno", + region_name, + region_code, + temporal_granularity, + year, + activity_name, + activity_value, + activity_units, + gas_name, + emission_factor_value, + emission_factor_units, + emissions_value, + emissions_units +FROM cammesa_region_emissions_staging; -- Drop the staging table -DROP TABLE region_code_staging; \ No newline at end of file +DROP TABLE cammesa_region_emissions_staging; \ No newline at end of file diff --git a/global-api/importer/argentinian_datasets/cammesa/transformation_cammesa.py b/global-api/importer/argentinian_datasets/cammesa/transformation_cammesa.py index 3e3ef6a75..315f86fdd 100644 --- a/global-api/importer/argentinian_datasets/cammesa/transformation_cammesa.py +++ b/global-api/importer/argentinian_datasets/cammesa/transformation_cammesa.py @@ -2,6 +2,7 @@ import argparse import uuid import os +from sqlalchemy import create_engine def uuid_generate_v3(name, namespace=uuid.NAMESPACE_OID): """generate a version 3 UUID from namespace and name""" @@ -12,6 +13,11 @@ def uuid_generate_v3(name, namespace=uuid.NAMESPACE_OID): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--filepath", help="path to the files location", required=True) + parser.add_argument( + "--database_uri", + help="database URI (e.g. postgresql://ccglobal:@localhost/ccglobal)", + default=os.environ.get("DB_URI"), + ) args = parser.parse_args() absolute_path = os.path.abspath(args.filepath) @@ -123,4 +129,10 @@ def generate_uuid(row): 'activity_units', 'gas_name', 'emission_factor_value', 'emission_factor_units', 'emissions_value', 'emissions_units'] df = df.reindex(columns=col_order) - df.to_csv(f'{absolute_path}/processed_cammesa_AR.csv', sep=",", decimal=".", index=False) \ No newline at end of file + #df.to_csv(f'{absolute_path}/processed_cammesa_AR.csv', sep=",", decimal=".", index=False) + + # Create a SQLAlchemy engine + engine = create_engine(args.database_uri) + + # Write the DataFrame to the database table + df.to_sql('cammesa_region_emissions_staging', engine, if_exists='replace', index=False) diff --git a/global-api/importer/datasource_seeder/datasource_seeder.csv b/global-api/importer/datasource_seeder/datasource_seeder.csv index 0b24302a7..a197a08aa 100644 --- a/global-api/importer/datasource_seeder/datasource_seeder.csv +++ b/global-api/importer/datasource_seeder/datasource_seeder.csv @@ -48,3 +48,4 @@ de8dc6b3-6c78-4fc7-9b4a-df24a2326634,Google EIE,Google Environmental Insights Ex fdf77b4a-5fb8-4b33-92b5-07b92f839c9b,Carbon Monitor,Carbon Monitor Cities,Carbon Monitor Cities Residential Energy,Estimation of residential energy emissions from Carbon Monitor. Carbon Monitor Cities is a global initiative to provide real-time and historical data on CO2 emissions from cities around the world.,third_party,public,https://carbonmonitor.org/,EARTH,2019,2021,2022,annual,city,en,,medium,,kg,The data is based on satellite observations of CO2 concentrations and a data-driven model to estimate emissions.,https://carbonmonitor.org/,Emissions data are matched to cities by name and ISO code for the region.,global_api,https://ccglobal.openearth.dev/api/v0/source/Carbon Monitor Cities/city/:locode/:year/:gpcReferenceNumber,I.1.1,1 e2143a90-0e5f-48fa-9a1d-85505f90b95f,Carbon Monitor,Carbon Monitor Cities,Carbon Monitor Cities On-Road Transportation,Estimation of on-road transportation emissions from Carbon Monitor. Carbon Monitor Cities is a global initiative to provide real-time and historical data on CO2 emissions from cities around the world.,third_party,public,https://carbonmonitor.org/,EARTH,2019,2021,2022,annual,city,en,,medium,,kg,The data is based on satellite observations of CO2 concentrations and a data-driven model to estimate emissions.,https://carbonmonitor.org/,Emissions data are matched to cities by name and ISO code for the region.,global_api,https://ccglobal.openearth.dev/api/v0/source/Carbon Monitor Cities/city/:locode/:year/:gpcReferenceNumber,II.1.1,1 1007a979-3c3c-4115-b61a-c85e3e39b165,Carbon Monitor,Carbon Monitor Cities,Carbon Monitor Cities Aviation,Estimation of aviation emissions from Carbon Monitor. Carbon Monitor Cities is a global initiative to provide real-time and historical data on CO2 emissions from cities around the world.,third_party,public,https://carbonmonitor.org/,EARTH,2019,2021,2022,annual,city,en,,medium,,kg,The data is based on satellite observations of CO2 concentrations and a data-driven model to estimate emissions.,https://carbonmonitor.org/,Emissions data are matched to cities by name and ISO code for the region.,global_api,https://ccglobal.openearth.dev/api/v0/source/Carbon Monitor Cities/city/:locode/:year/:gpcReferenceNumber,II.4.1,1 +e81bb333-d0a0-4621-b15f-f6f0012c2a5e,cammesa,CAMMESA,Annual electricity generation in power plants by province,Local data of energy generation by power plants in Argentina,Third-party,Public,https://cammesaweb.cammesa.com/download/factor-de-emision/,AR,2020,2023,2023,annual,region,es,,high,,kg,"The report contains the behavior of the main physical and economic variables of the MEM throughout the month of analysis and its comparison with previous months; Among the variables, electricity demand, energy supply, installed power, generation, fuel consumption, energy costs and prices stand out.",https://cammesaweb.cammesa.com/informes-y-estadisticas/,"The raw data was adapted to our Global API database schema and we use population as scaling method whenever is needed. Depending on the source availability and documentation, this information can be more or less desegregated.",global_api_downscaled_by_population,https://ccglobal.openearth.dev/api/v0/source/CAMMESA/region/:region/:year/:gpcReferenceNumber,I.4.4,1 \ No newline at end of file