Skip to content

Commit

Permalink
Fix preprocessing with new paths
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrandje committed Oct 26, 2023
1 parent 13534f3 commit 3033cf8
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 7 deletions.
2 changes: 1 addition & 1 deletion cartiflette/download/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _upload_raw_dataset_to_s3(
"dataset_family": layer.dataset_family,
"source": layer.source,
"territory": layer.territory,
"simplification": None,
"simplification": 0,
"filename": rename_basename,
}
)
Expand Down
9 changes: 8 additions & 1 deletion cartiflette/public/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,9 @@ def get_cog_year(
"fs": fs,
"type_download": type_download,
}
dict_cog[level] = download_file_single(**kwargs_cartiflette, **kwargs_requests)
dict_cog[level] = download_file_single(
**kwargs_cartiflette, **kwargs_requests
)
return dict_cog


Expand All @@ -188,6 +190,7 @@ def get_vectorfile_ign(
crs: str = "*",
filter_by: str = "origin",
value: str = "raw",
simplification: int = 0,
bucket=cartiflette.BUCKET,
path_within_bucket=cartiflette.PATH_WITHIN_BUCKET,
type_download: str = "https",
Expand Down Expand Up @@ -244,6 +247,7 @@ def get_vectorfile_ign(
"file_format": "GPKG",
"fs": fs,
"type_download": type_download,
"simplification": 0,
}
gdf = download_file_single(**kwargs, **kwargs_requests)
return gdf
Expand Down Expand Up @@ -330,6 +334,7 @@ def get_vectorfile_communes_arrondissement(
"filename": "COMMUNE_ARRONDISSEMENTS_MUNICIPAUX.gpkg",
"fs": fs,
"type_download": type_download,
"simplification": 0,
}
gdf = download_file_single(**kwargs, **kwargs_requests)
return gdf
Expand Down Expand Up @@ -361,6 +366,7 @@ def get_living_area_commune(
"territory": "france_entiere",
"filename": "bassins_vie.gpkg",
"fs": fs,
"simplification": 0,
}
gdf = download_file_single(**kwargs, **kwargs_requests)
return gdf
Expand Down Expand Up @@ -391,6 +397,7 @@ def get_living_area(
"territory": "france_entiere",
"filename": "bassins_vie.gpkg",
"fs": fs,
"simplification": 0,
}
gdf = download_file_single(**kwargs, **kwargs_requests)
return gdf
Expand Down
5 changes: 3 additions & 2 deletions cartiflette/public/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

logger = logging.getLogger(__name__)


def download_file_single(
year: typing.Union[str, int],
borders: str,
Expand All @@ -25,9 +26,9 @@ def download_file_single(
file_format: str,
provider: str,
source: str,
simplication: typing.Union[str, int, float] = 0,
dataset_family: str,
territory: str,
simplication: typing.Union[str, int, float] = 0,
filename: str = "*",
type_download: str = "https",
fs: s3fs.S3FileSystem = cartiflette.FS,
Expand Down Expand Up @@ -130,7 +131,7 @@ def download_file_single(
"dataset_family": dataset_family,
"source": source,
"year": year,
"administrative_level": borders,
"borders": borders,
"crs": crs,
"filter_by": filter_by,
"value": value,
Expand Down
13 changes: 10 additions & 3 deletions cartiflette/s3/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
from pebble import ThreadPool
import s3fs
import tempfile
from typing import TypedDict


from cartiflette import BUCKET, PATH_WITHIN_BUCKET, FS, THREADS_DOWNLOAD
from cartiflette.utils import magic_csv_reader, create_path_bucket
from cartiflette.public import get_vectorfile_ign, download_file_single
from cartiflette.public import download_file_single

logger = logging.getLogger(__name__)

# TODO : docstrings

logger.error("preprocessing des bassins de vie pose pb, manque Paris")


def store_cog_year(
year: int = None,
Expand Down Expand Up @@ -103,6 +104,7 @@ def store_cog_year(
"provider": "cartiflette",
"dataset_family": "COG",
"source": source,
"simplification": 0,
"territory": "france_entiere",
"filename": f"{source}.{ext}",
}
Expand Down Expand Up @@ -221,6 +223,7 @@ def store_cog_ign(
"source": source,
"territory": territory if territory != "*" else "france_entiere",
"filename": f"{borders}.gpkg",
"simplification": 0,
}
path = create_path_bucket(config=config_dict)
with fs.open(path, "wb") as f:
Expand Down Expand Up @@ -341,6 +344,7 @@ def store_vectorfile_communes_arrondissement(
"source": "COMMUNE_ARRONDISSEMENTS_MUNICIPAUX",
"territory": "france_entiere",
"filename": "COMMUNE_ARRONDISSEMENTS_MUNICIPAUX.gpkg",
"simplification": 0,
}
path = create_path_bucket(config=config_dict)
with fs.open(path, "wb") as f:
Expand Down Expand Up @@ -481,6 +485,7 @@ def store_living_area(
"dataset_family": f"bassins-vie-{bv_source.split('_')[-1]}",
"source": "BV",
"filename": "bassins_vie.gpkg",
"simplification": 0,
}
path = create_path_bucket(config=config_dict)
with fs.open(path, "wb") as f:
Expand All @@ -506,6 +511,7 @@ def store_living_area(
"source": "BV",
"territory": "france_entiere",
"filename": "bassins_vie.gpkg",
"simplification": 0,
}
path = create_path_bucket(config=config_dict)
with fs.open(path, "wb") as f:
Expand Down Expand Up @@ -611,4 +617,5 @@ def preprocess_pipeline(

if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
preprocess_pipeline()
# preprocess_pipeline()
store_living_area(year=2022)
1 change: 1 addition & 0 deletions cartiflette/utils/create_path_bucket.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class ConfigDict(TypedDict):
dataset_family: str
territory: str
filename: str
simplification: int


def create_path_bucket(config: ConfigDict) -> str:
Expand Down

0 comments on commit 3033cf8

Please sign in to comment.