diff --git a/misc/prototype_mapshaper.py b/misc/prototype_mapshaper.py index 49360a81..f8cd2e3f 100644 --- a/misc/prototype_mapshaper.py +++ b/misc/prototype_mapshaper.py @@ -1,9 +1,10 @@ +# IGN DATASET from cartiflette.s3 import upload_s3_raw from cartiflette.pipeline import crossproduct_parameters_production from cartiflette.pipeline import mapshaperize_split_from_s3, mapshaperize_merge_split_from_s3 from cartiflette.download.download import _download_sources -path_within_bucket = "test-download11" +path_within_bucket = "test-download16" # DATA RETRIEVING STEP ========================= @@ -11,7 +12,7 @@ # IGN DATASET path_bucket_adminexpress = upload_s3_raw( path_within_bucket=path_within_bucket, - year = 2022 + year=2022 ) path_bucket_cog_commune = upload_s3_raw( @@ -76,8 +77,95 @@ vectorfile_format="csv", path_within_bucket=path_within_bucket ) +from cartiflette.s3 import upload_s3_raw +from cartiflette.pipeline import crossproduct_parameters_production +from cartiflette.pipeline import mapshaperize_split_from_s3, mapshaperize_merge_split_from_s3 +from cartiflette.download.download import _download_sources + +path_within_bucket = "test-download11" + + +# DATA RETRIEVING STEP ========================= + +# IGN DATASET +path_bucket_adminexpress = upload_s3_raw( + path_within_bucket=path_within_bucket, + year = 2022 +) + +path_bucket_cog_commune = upload_s3_raw( + provider='Insee', + dataset_family='COG', + source="COMMUNE", + territory="france_entiere", + borders="DATASET_INSEE_COG_COMMUNE_FRANCE_ENTIERE_2022", + year=2022, + crs=None, + vectorfile_format="csv", + path_within_bucket=path_within_bucket + ) + +# DEPARTEMENT (FOR COMMON NAMES) +path_bucket_cog_departement = upload_s3_raw( + provider='Insee', + dataset_family='COG', + source="DEPARTEMENT", + territory="france_entiere", + borders="DATASET_INSEE_COG_DEPARTEMENT_FRANCE_ENTIERE_2022", + year=2022, + crs=None, + vectorfile_format="csv", + path_within_bucket=path_within_bucket + ) + +# REGIONS (FOR COMMON NAMES) +path_bucket_cog_region = upload_s3_raw( + provider='Insee', + dataset_family='COG', + source="REGION", + territory="france_entiere", + borders="DATASET_INSEE_COG_REGION_FRANCE_ENTIERE_2022", + year=2022, + crs=None, + vectorfile_format="csv", + path_within_bucket=path_within_bucket + ) + +# TABLE PASSAGE COMMUNES, DEP, REGIONS +path_bucket_tagc_appartenance = upload_s3_raw( + provider='Insee', + dataset_family='TAGC', + source="APPARTENANCE", + territory="france_entiere", + borders="table-appartenance-geo-communes-22", + year=2022, + crs=None, + vectorfile_format="xlsx", + path_within_bucket=path_within_bucket + ) + +path_bucket_tagc_passage = upload_s3_raw( + provider='Insee', + dataset_family='TAGC', + source="PASSAGE", + territory="france_entiere", + borders="table_passage_geo2003_geo2023", + year=2023, + crs=None, + vectorfile_format="xlsx", + path_within_bucket=path_within_bucket + ) + # PUTTING ALL METADATA TOGETHER +path_tagc = fs.ls(path_bucket_tagc_appartenance)[0] +path_bucket_cog_departement = fs.ls(path_bucket_cog_departement)[0] + +with fs.open(path_tagc, mode = "rb") as remote_file: + tagc = pd.read_excel(remote_file, skiprows=5, dtype_backend="pyarrow") + +with fs.open(path_bucket_cog_departement, mode = "rb") as remote_file: + cog_dep = pd.read_csv(remote_file, dtype_backend="pyarrow")