Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/mapshaper' into demoday
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrandje committed Nov 16, 2023
2 parents 17c7ba0 + 172f401 commit e9a4b2a
Show file tree
Hide file tree
Showing 15 changed files with 823 additions and 175 deletions.
2 changes: 2 additions & 0 deletions cartiflette/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@
from cartiflette.utils import *
from cartiflette.download import *
from cartiflette.s3 import *
from cartiflette.pipeline import *
from cartiflette.mapshaper import *
1 change: 1 addition & 0 deletions cartiflette/mapshaper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .mapshaper_split import *
168 changes: 168 additions & 0 deletions cartiflette/mapshaper/mapshaper_split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import subprocess



DICT_CORRESP_IGN = {"REGION": "INSEE_REG", "DEPARTEMENT": "INSEE_DEP"}


def mapshaperize_split(
local_dir="temp",
filename_initial="COMMUNE",
extension_initial="shp",
format_output="topojson",
niveau_agreg="DEPARTEMENT",
provider="IGN",
source="EXPRESS-COG-CARTO-TERRITOIRE",
year=2022,
dataset_family="ADMINEXPRESS",
territory="metropole",
crs=4326,
simplification=0,
dict_corresp=DICT_CORRESP_IGN
):
"""
Processes shapefiles and splits them based on specified parameters using Mapshaper.
Parameters
----------
local_dir : str, optional
The local directory for file storage, by default "temp".
filename_initial : str, optional
The initial filename, by default "COMMUNE".
extension_initial : str, optional
The initial file extension, by default "shp".
format_output : str, optional
The output format, by default "topojson".
niveau_agreg : str, optional
The level of aggregation for the split, by default "DEPARTEMENT".
provider : str, optional
The data provider, by default "IGN".
source : str, optional
The data source, by default "EXPRESS-COG-CARTO-TERRITOIRE".
year : int, optional
The year of the data, by default 2022.
dataset_family : str, optional
The dataset family, by default "ADMINEXPRESS".
territory : str, optional
The territory of the data, by default "metropole".
crs : int, optional
The coordinate reference system (CRS) code, by default 4326.
simplification : int, optional
The degree of simplification, by default 0.
dict_corresp: dict
A dictionary giving correspondance between niveau_agreg argument
and variable names.
Returns
-------
str
The output path of the processed and split shapefiles.
"""

simplification_percent = simplification if simplification is not None else 0

output_path = f"{local_dir}/{niveau_agreg}/{format_output}/{simplification=}"

if simplification_percent != 0:
option_simplify = f"-simplify {simplification_percent}% "
else:
option_simplify = ""

cmd = (
f"mapshaper {local_dir}/{filename_initial}.{extension_initial} name='' -proj EPSG:{crs} "
f"{option_simplify}"
f"-each \"SOURCE='{provider}:{source}'\" "
f"-split {dict_corresp[niveau_agreg]} "
f"-o {output_path} format={format_output} extension=\".{format_output}\" singles"
)


subprocess.run(
cmd,
shell=True
)

return output_path



def mapshaperize_split_merge(
local_dir="temp",
extension_initial="shp",
format_output="topojson",
niveau_agreg="DEPARTEMENT",
provider="IGN",
source="EXPRESS-COG-CARTO-TERRITOIRE",
year=2022,
dataset_family="ADMINEXPRESS",
territory="metropole",
crs=4326,
simplification=0,
dict_corresp=DICT_CORRESP_IGN
):

simplification_percent = simplification if simplification is not None else 0

output_path = f"{local_dir}/{niveau_agreg}/{format_output}/{simplification=}"

if simplification_percent != 0:
option_simplify = f"-simplify {simplification_percent}% "
else:
option_simplify = ""


format_intermediate = "geojson"

# PREPROCESS CITIES
subprocess.run(
(
f"mapshaper {local_dir}/COMMUNE.{extension_initial} name='COMMUNE' "
f"-proj EPSG:{crs} "
f"-filter '\"69123,13055,75056\".indexOf(INSEE_COM) > -1' invert "
f"-each \"INSEE_COG=INSEE_COM\" "
f"-o {output_path}/communes_simples.{format_intermediate} format={format_intermediate} extension=\".{format_intermediate}\" singles"
),
shell=True
)

# PREPROCESS ARRONDISSEMENT
subprocess.run(
(
f"mapshaper {local_dir}/ARRONDISSEMENT_MUNICIPAL.{extension_initial} name='ARRONDISSEMENT_MUNICIPAL' "
f"-proj EPSG:{crs} "
f"-rename-fields INSEE_COG=INSEE_ARM "
f"-each 'INSEE_DEP=INSEE_COG.substr(0,2), STATUT=\"Arrondissement municipal\" ' "
f"-o {output_path}/arrondissements.{format_intermediate} format={format_intermediate} extension=\".{format_intermediate}\""
),
shell=True
)

# MERGE CITIES AND ARRONDISSEMENT
subprocess.run(
(
f"mapshaper {output_path}/communes_simples.{format_intermediate} {output_path}/arrondissements.{format_intermediate} snap combine-files "
f"-proj EPSG:{crs} "
f"-rename-layers COMMUNE,ARRONDISSEMENT_MUNICIPAL "
f"-merge-layers target=COMMUNE,ARRONDISSEMENT_MUNICIPAL force "
f"-rename-layers COMMUNE_ARRONDISSEMENT "
f"-o {output_path}/raw.{format_intermediate} format={format_intermediate} extension=\".{format_intermediate}\""
),
shell=True
)

# TRANSFORM AS NEEDED
cmd = (
f"mapshaper {output_path}/raw.{format_intermediate} "
f"{option_simplify}"
f"-proj EPSG:{crs} "
f"-each \"SOURCE='{provider}:{source}'\" "
f"-split {dict_corresp[niveau_agreg]} "
f"-o {output_path} format={format_output} extension=\".{format_output}\" singles"
)


subprocess.run(
cmd,
shell=True
)
7 changes: 7 additions & 0 deletions cartiflette/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from .cross_product_parameters import (
restructure_nested_dict_borders,
crossproduct_parameters_production
)

from .prepare_mapshaper import prepare_local_directory_mapshaper
from .mapshaper_split_from_s3 import mapshaperize_split_from_s3, mapshaperize_merge_split_from_s3
111 changes: 111 additions & 0 deletions cartiflette/pipeline/cross_product_parameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import itertools
import pandas as pd


def restructure_nested_dict_borders(dict_with_list: dict):
"""
Restructures a nested dictionary by flattening its values and their corresponding keys.
Parameters:
-----------
dict_with_list : dict
A dictionary with list values to be restructured.
Returns:
--------
list
A list of lists containing key-value pairs obtained by flattening the input dictionary.
Example:
--------
Example usage:
sample_dict = {'a': [1, 2, 3], 'b': [4, 5]}
result = restructure_nested_dict_borders(sample_dict)
print(result)
This will output:
[['a', 1], ['a', 2], ['a', 3], ['b', 4], ['b', 5]]
"""
croisement_filter_by_borders_flat = [
[key, inner_value]
for key, values in dict_with_list.items()
for inner_value in values
]

return croisement_filter_by_borders_flat

import itertools
import pandas as pd

def crossproduct_parameters_production(
croisement_filter_by_borders: dict,
list_format: list,
years: list,
crs_list: list,
sources: list,
simplifications: list
) -> pd.DataFrame:
"""
Generates a DataFrame by performing a cross-product of the given parameters.
Parameters:
-----------
croisement_filter_by_borders : dict
A dictionary with nested lists for cross-product generation.
list_format : list
A list of formats for cross-product generation.
years : list
A list of years for cross-product generation.
crs_list : list
A list of CRS (Coordinate Reference Systems) for cross-product generation.
sources : list
A list of sources for cross-product generation.
simplifications : list
A list of simplifications for cross-product generation.
Returns:
--------
pd.DataFrame
A pandas DataFrame containing the cross-product of the input parameters.
Example:
--------
Example usage:
sample_dict = {'a': [1, 2, 3], 'b': [4, 5]}
formats = ['geojson', 'gpkg']
years = [2022, 2022]
crs_list = [4326, 2154]
sources = ['source1', 'source2']
simplifications = [0, 40]
result = crossproduct_parameters_production(
sample_dict, formats, years, crs_list, sources, simplifications
)
print(result)
This will output:
A pandas DataFrame with the cross-product of the provided parameters.
"""
croisement_filter_by_borders_flat = restructure_nested_dict_borders(
croisement_filter_by_borders
)

combinations = list(
itertools.product(
list_format,
croisement_filter_by_borders_flat,
years,
crs_list,
sources,
simplifications
)
)

tempdf = pd.DataFrame(
combinations,
columns=["format", "nested", "year", "crs", "source", "simplification"]
)
tempdf["borders"] = tempdf["nested"].apply(lambda l: l[0])
tempdf["filter_by"] = tempdf["nested"].apply(lambda l: l[1])
tempdf.drop("nested", axis="columns", inplace=True)

return tempdf
Loading

0 comments on commit e9a4b2a

Please sign in to comment.