diff --git a/setup.py b/setup.py index 4b7c5e275..ce9e4b177 100755 --- a/setup.py +++ b/setup.py @@ -89,7 +89,8 @@ def read(*names, **kwargs): "psycopg2", "sqlalchemy", "geopandas", - "disaggregator @ git+https://github.com/openego/disaggregator.git@features/pip_install" + "disaggregator @ git+https://github.com/openego/disaggregator.git@features/pip_install", + "importlib-resources" # eg: 'aspectlib==1.1.1', 'six>=1.7', ], extras_require={ diff --git a/src/egon/data/airflow/dags/pipeline.py b/src/egon/data/airflow/dags/pipeline.py index 953f70b16..a478720e0 100644 --- a/src/egon/data/airflow/dags/pipeline.py +++ b/src/egon/data/airflow/dags/pipeline.py @@ -4,6 +4,8 @@ import airflow import os +from importlib_resources import files + from egon.data.airflow.tasks import initdb from egon.data.db import airflow_db_connection import egon.data.importing.openstreetmap as import_osm @@ -12,6 +14,7 @@ import egon.data.processing.openstreetmap as process_osm import egon.data.importing.zensus as import_zs + # Prepare connection to db for operators airflow_db_connection() @@ -20,8 +23,7 @@ description="The eGo^N data processing DAG.", default_args={"start_date": days_ago(1)}, template_searchpath=[ - os.path.abspath(os.path.join(os.path.dirname( - __file__ ), '..', '..', 'processing', 'vg250')) + files('egon.data.processing').joinpath('vg250') ], is_paused_upon_creation=False, schedule_interval=None, diff --git a/src/egon/data/airflow/tasks.py b/src/egon/data/airflow/tasks.py index c65a9b9d1..050303f2d 100644 --- a/src/egon/data/airflow/tasks.py +++ b/src/egon/data/airflow/tasks.py @@ -1,10 +1,11 @@ import os.path import socket +from importlib_resources import files + from egon.data.db import credentials import egon.data.subprocess as subprocess - def initdb(): """ Initialize the local database used for data processing. """ db = credentials() @@ -13,5 +14,5 @@ def initdb(): if code != 0: subprocess.run( ["docker-compose", "up", "-d", "--build"], - cwd=os.path.dirname(__file__), + cwd=files('egon.data.airflow'), ) diff --git a/src/egon/data/cli.py b/src/egon/data/cli.py index 0677d51e3..8d534700c 100644 --- a/src/egon/data/cli.py +++ b/src/egon/data/cli.py @@ -19,13 +19,13 @@ import os.path import subprocess +from importlib_resources import files import click import yaml import egon.data import egon.data.airflow - @click.command( add_help_option=False, context_settings=dict(allow_extra_args=True, ignore_unknown_options=True), @@ -85,7 +85,7 @@ def serve(context): @click.version_option(version=egon.data.__version__) @click.pass_context def main(context, **kwargs): - os.environ["AIRFLOW_HOME"] = os.path.dirname(egon.data.airflow.__file__) + os.environ["AIRFLOW_HOME"] = str(files(egon.data.airflow)) translations = { "database": "POSTGRES_DB", "database_password": "POSTGRES_PASSWORD", diff --git a/src/egon/data/config.py b/src/egon/data/config.py index 248c19384..ffe60785f 100644 --- a/src/egon/data/config.py +++ b/src/egon/data/config.py @@ -1,6 +1,7 @@ import os import yaml +from importlib_resources import files import egon @@ -24,7 +25,7 @@ def datasets(config_file=None): """ if not config_file: - package_path = egon.data.__path__[0] + package_path = files('egon.data') config_file = os.path.join(package_path, "datasets.yml") return yaml.load(open(config_file), Loader=yaml.SafeLoader) diff --git a/src/egon/data/db.py b/src/egon/data/db.py index cb3b66e93..684354e93 100644 --- a/src/egon/data/db.py +++ b/src/egon/data/db.py @@ -3,6 +3,7 @@ from sqlalchemy import create_engine, text import yaml +from importlib_resources import files import egon @@ -16,7 +17,7 @@ def credentials(): Complete DB connection information """ # Read database configuration from docker-compose.yml - package_path = egon.data.__path__[0] + package_path = files(egon.data) docker_compose_file = os.path.join( package_path, "airflow", "docker-compose.yml" ) diff --git a/src/egon/data/importing/__init__.py b/src/egon/data/importing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/egon/data/importing/openstreetmap/__init__.py b/src/egon/data/importing/openstreetmap/__init__.py index 7af8cb427..1df08ed30 100644 --- a/src/egon/data/importing/openstreetmap/__init__.py +++ b/src/egon/data/importing/openstreetmap/__init__.py @@ -14,9 +14,13 @@ import os import time +from importlib_resources import files + from egon.data import db import egon.data.config import egon.data.subprocess as subprocess +import egon.data.importing.openstreetmap as import_openstreetmap + def download_pbf_file(): @@ -25,7 +29,7 @@ def download_pbf_file(): osm_config = data_config["openstreetmap"]["original_data"] target_file = os.path.join( - os.path.dirname(__file__), osm_config["target"]["path"] + files(import_openstreetmap), osm_config["target"]["path"] ) if not os.path.isfile(target_file): @@ -50,7 +54,7 @@ def to_postgres(num_processes=4, cache_size=4096): data_config = egon.data.config.datasets() osm_config = data_config["openstreetmap"]["original_data"] input_file = os.path.join( - os.path.dirname(__file__), osm_config["target"]["path"] + files(import_openstreetmap), osm_config["target"]["path"] ) # Prepare osm2pgsql command @@ -74,7 +78,7 @@ def to_postgres(num_processes=4, cache_size=4096): " ".join(cmd), shell=True, env={"PGPASSWORD": docker_db_config["POSTGRES_PASSWORD"]}, - cwd=os.path.dirname(__file__), + cwd=files(import_openstreetmap), ) diff --git a/src/egon/data/importing/vg250.py b/src/egon/data/importing/vg250.py index 842300c7b..da84d707e 100644 --- a/src/egon/data/importing/vg250.py +++ b/src/egon/data/importing/vg250.py @@ -15,18 +15,20 @@ from geoalchemy2 import Geometry import geopandas as gpd +from importlib_resources import files from egon.data import db import egon.data.config + def download_vg250_files(): """Download VG250 (Verwaltungsgebiete) shape files.""" data_config = egon.data.config.datasets() vg250_config = data_config["vg250"]["original_data"] target_file = os.path.join( - os.path.dirname(__file__), vg250_config["target"]["path"] + files(egon.data.importing), vg250_config["target"]["path"] ) if not os.path.isfile(target_file): @@ -44,7 +46,7 @@ def to_postgres(): db.execute_sql(f"CREATE SCHEMA IF NOT EXISTS {vg250_processed['schema']};") zip_file = os.path.join( - os.path.dirname(__file__), vg250_orig["target"]["path"] + files(egon.data.importing), vg250_orig["target"]["path"] ) engine_local_db = db.engine() diff --git a/src/egon/data/importing/zensus/__init__.py b/src/egon/data/importing/zensus/__init__.py index cf85d0337..2f3888df3 100755 --- a/src/egon/data/importing/zensus/__init__.py +++ b/src/egon/data/importing/zensus/__init__.py @@ -5,8 +5,11 @@ import os import zipfile +from importlib_resources import files + from egon.data import db, subprocess import egon.data.config +import egon.data.importing.zensus as import_zensus def download_zensus_pop(): @@ -17,7 +20,7 @@ def download_zensus_pop(): ] target_file = os.path.join( - os.path.dirname(__file__), zensus_population_config["target"]["path"] + files(import_zensus), zensus_population_config["target"]["path"] ) if not os.path.isfile(target_file): @@ -39,7 +42,7 @@ def download_zensus_misc(): url_path_map = list(zip(zensus_url, zensus_path)) for url, path in url_path_map: - target_file_misc = os.path.join(os.path.dirname(__file__), path) + target_file_misc = os.path.join(files(import_zensus), path) if not os.path.isfile(target_file_misc): urlretrieve(url, target_file_misc) @@ -110,7 +113,7 @@ def population_to_postgres(): zensus_population_orig = data_config["zensus_population"]["original_data"] zensus_population_processed = data_config["zensus_population"]["processed"] input_file = os.path.join( - os.path.dirname(__file__), zensus_population_orig["target"]["path"] + files(import_zensus), zensus_population_orig["target"]["path"] ) # Read database configuration from docker-compose.yml @@ -184,7 +187,7 @@ def zensus_misc_to_postgres(): for input_file, table in zensus_misc_processed["path_table_map"].items(): with zipfile.ZipFile(os.path.join( - os.path.dirname(__file__), input_file)) as zf: + files(import_zensus), input_file)) as zf: csvfiles = [n for n in zf.namelist() if n.lower()[-3:] == "csv"] for filename in csvfiles: zf.extract(filename)