From 3033cf896c282d4cb4754259bce704be134950de Mon Sep 17 00:00:00 2001 From: "thomas.grandjean" Date: Fri, 27 Oct 2023 00:46:02 +0200 Subject: [PATCH] Fix preprocessing with new paths --- cartiflette/download/download.py | 2 +- cartiflette/public/client.py | 9 ++++++++- cartiflette/public/output.py | 5 +++-- cartiflette/s3/preprocess.py | 13 ++++++++++--- cartiflette/utils/create_path_bucket.py | 1 + 5 files changed, 23 insertions(+), 7 deletions(-) diff --git a/cartiflette/download/download.py b/cartiflette/download/download.py index 41a1aade..5e6b052d 100644 --- a/cartiflette/download/download.py +++ b/cartiflette/download/download.py @@ -98,7 +98,7 @@ def _upload_raw_dataset_to_s3( "dataset_family": layer.dataset_family, "source": layer.source, "territory": layer.territory, - "simplification": None, + "simplification": 0, "filename": rename_basename, } ) diff --git a/cartiflette/public/client.py b/cartiflette/public/client.py index df4969f5..cb3cc46e 100644 --- a/cartiflette/public/client.py +++ b/cartiflette/public/client.py @@ -175,7 +175,9 @@ def get_cog_year( "fs": fs, "type_download": type_download, } - dict_cog[level] = download_file_single(**kwargs_cartiflette, **kwargs_requests) + dict_cog[level] = download_file_single( + **kwargs_cartiflette, **kwargs_requests + ) return dict_cog @@ -188,6 +190,7 @@ def get_vectorfile_ign( crs: str = "*", filter_by: str = "origin", value: str = "raw", + simplification: int = 0, bucket=cartiflette.BUCKET, path_within_bucket=cartiflette.PATH_WITHIN_BUCKET, type_download: str = "https", @@ -244,6 +247,7 @@ def get_vectorfile_ign( "file_format": "GPKG", "fs": fs, "type_download": type_download, + "simplification": 0, } gdf = download_file_single(**kwargs, **kwargs_requests) return gdf @@ -330,6 +334,7 @@ def get_vectorfile_communes_arrondissement( "filename": "COMMUNE_ARRONDISSEMENTS_MUNICIPAUX.gpkg", "fs": fs, "type_download": type_download, + "simplification": 0, } gdf = download_file_single(**kwargs, **kwargs_requests) return gdf @@ -361,6 +366,7 @@ def get_living_area_commune( "territory": "france_entiere", "filename": "bassins_vie.gpkg", "fs": fs, + "simplification": 0, } gdf = download_file_single(**kwargs, **kwargs_requests) return gdf @@ -391,6 +397,7 @@ def get_living_area( "territory": "france_entiere", "filename": "bassins_vie.gpkg", "fs": fs, + "simplification": 0, } gdf = download_file_single(**kwargs, **kwargs_requests) return gdf diff --git a/cartiflette/public/output.py b/cartiflette/public/output.py index 4c52f3bf..4c1ce936 100644 --- a/cartiflette/public/output.py +++ b/cartiflette/public/output.py @@ -16,6 +16,7 @@ logger = logging.getLogger(__name__) + def download_file_single( year: typing.Union[str, int], borders: str, @@ -25,9 +26,9 @@ def download_file_single( file_format: str, provider: str, source: str, - simplication: typing.Union[str, int, float] = 0, dataset_family: str, territory: str, + simplication: typing.Union[str, int, float] = 0, filename: str = "*", type_download: str = "https", fs: s3fs.S3FileSystem = cartiflette.FS, @@ -130,7 +131,7 @@ def download_file_single( "dataset_family": dataset_family, "source": source, "year": year, - "administrative_level": borders, + "borders": borders, "crs": crs, "filter_by": filter_by, "value": value, diff --git a/cartiflette/s3/preprocess.py b/cartiflette/s3/preprocess.py index e0e3bfd5..b81c1f4c 100644 --- a/cartiflette/s3/preprocess.py +++ b/cartiflette/s3/preprocess.py @@ -11,17 +11,18 @@ from pebble import ThreadPool import s3fs import tempfile -from typing import TypedDict from cartiflette import BUCKET, PATH_WITHIN_BUCKET, FS, THREADS_DOWNLOAD from cartiflette.utils import magic_csv_reader, create_path_bucket -from cartiflette.public import get_vectorfile_ign, download_file_single +from cartiflette.public import download_file_single logger = logging.getLogger(__name__) # TODO : docstrings +logger.error("preprocessing des bassins de vie pose pb, manque Paris") + def store_cog_year( year: int = None, @@ -103,6 +104,7 @@ def store_cog_year( "provider": "cartiflette", "dataset_family": "COG", "source": source, + "simplification": 0, "territory": "france_entiere", "filename": f"{source}.{ext}", } @@ -221,6 +223,7 @@ def store_cog_ign( "source": source, "territory": territory if territory != "*" else "france_entiere", "filename": f"{borders}.gpkg", + "simplification": 0, } path = create_path_bucket(config=config_dict) with fs.open(path, "wb") as f: @@ -341,6 +344,7 @@ def store_vectorfile_communes_arrondissement( "source": "COMMUNE_ARRONDISSEMENTS_MUNICIPAUX", "territory": "france_entiere", "filename": "COMMUNE_ARRONDISSEMENTS_MUNICIPAUX.gpkg", + "simplification": 0, } path = create_path_bucket(config=config_dict) with fs.open(path, "wb") as f: @@ -481,6 +485,7 @@ def store_living_area( "dataset_family": f"bassins-vie-{bv_source.split('_')[-1]}", "source": "BV", "filename": "bassins_vie.gpkg", + "simplification": 0, } path = create_path_bucket(config=config_dict) with fs.open(path, "wb") as f: @@ -506,6 +511,7 @@ def store_living_area( "source": "BV", "territory": "france_entiere", "filename": "bassins_vie.gpkg", + "simplification": 0, } path = create_path_bucket(config=config_dict) with fs.open(path, "wb") as f: @@ -611,4 +617,5 @@ def preprocess_pipeline( if __name__ == "__main__": logging.basicConfig(level=logging.INFO) - preprocess_pipeline() + # preprocess_pipeline() + store_living_area(year=2022) diff --git a/cartiflette/utils/create_path_bucket.py b/cartiflette/utils/create_path_bucket.py index 37c27abf..985bb8cc 100644 --- a/cartiflette/utils/create_path_bucket.py +++ b/cartiflette/utils/create_path_bucket.py @@ -23,6 +23,7 @@ class ConfigDict(TypedDict): dataset_family: str territory: str filename: str + simplification: int def create_path_bucket(config: ConfigDict) -> str: