-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge remote-tracking branch 'origin/mapshaper' into demoday
- Loading branch information
Showing
15 changed files
with
823 additions
and
175 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from .mapshaper_split import * |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
import subprocess | ||
|
||
|
||
|
||
DICT_CORRESP_IGN = {"REGION": "INSEE_REG", "DEPARTEMENT": "INSEE_DEP"} | ||
|
||
|
||
def mapshaperize_split( | ||
local_dir="temp", | ||
filename_initial="COMMUNE", | ||
extension_initial="shp", | ||
format_output="topojson", | ||
niveau_agreg="DEPARTEMENT", | ||
provider="IGN", | ||
source="EXPRESS-COG-CARTO-TERRITOIRE", | ||
year=2022, | ||
dataset_family="ADMINEXPRESS", | ||
territory="metropole", | ||
crs=4326, | ||
simplification=0, | ||
dict_corresp=DICT_CORRESP_IGN | ||
): | ||
""" | ||
Processes shapefiles and splits them based on specified parameters using Mapshaper. | ||
Parameters | ||
---------- | ||
local_dir : str, optional | ||
The local directory for file storage, by default "temp". | ||
filename_initial : str, optional | ||
The initial filename, by default "COMMUNE". | ||
extension_initial : str, optional | ||
The initial file extension, by default "shp". | ||
format_output : str, optional | ||
The output format, by default "topojson". | ||
niveau_agreg : str, optional | ||
The level of aggregation for the split, by default "DEPARTEMENT". | ||
provider : str, optional | ||
The data provider, by default "IGN". | ||
source : str, optional | ||
The data source, by default "EXPRESS-COG-CARTO-TERRITOIRE". | ||
year : int, optional | ||
The year of the data, by default 2022. | ||
dataset_family : str, optional | ||
The dataset family, by default "ADMINEXPRESS". | ||
territory : str, optional | ||
The territory of the data, by default "metropole". | ||
crs : int, optional | ||
The coordinate reference system (CRS) code, by default 4326. | ||
simplification : int, optional | ||
The degree of simplification, by default 0. | ||
dict_corresp: dict | ||
A dictionary giving correspondance between niveau_agreg argument | ||
and variable names. | ||
Returns | ||
------- | ||
str | ||
The output path of the processed and split shapefiles. | ||
""" | ||
|
||
simplification_percent = simplification if simplification is not None else 0 | ||
|
||
output_path = f"{local_dir}/{niveau_agreg}/{format_output}/{simplification=}" | ||
|
||
if simplification_percent != 0: | ||
option_simplify = f"-simplify {simplification_percent}% " | ||
else: | ||
option_simplify = "" | ||
|
||
cmd = ( | ||
f"mapshaper {local_dir}/{filename_initial}.{extension_initial} name='' -proj EPSG:{crs} " | ||
f"{option_simplify}" | ||
f"-each \"SOURCE='{provider}:{source}'\" " | ||
f"-split {dict_corresp[niveau_agreg]} " | ||
f"-o {output_path} format={format_output} extension=\".{format_output}\" singles" | ||
) | ||
|
||
|
||
subprocess.run( | ||
cmd, | ||
shell=True | ||
) | ||
|
||
return output_path | ||
|
||
|
||
|
||
def mapshaperize_split_merge( | ||
local_dir="temp", | ||
extension_initial="shp", | ||
format_output="topojson", | ||
niveau_agreg="DEPARTEMENT", | ||
provider="IGN", | ||
source="EXPRESS-COG-CARTO-TERRITOIRE", | ||
year=2022, | ||
dataset_family="ADMINEXPRESS", | ||
territory="metropole", | ||
crs=4326, | ||
simplification=0, | ||
dict_corresp=DICT_CORRESP_IGN | ||
): | ||
|
||
simplification_percent = simplification if simplification is not None else 0 | ||
|
||
output_path = f"{local_dir}/{niveau_agreg}/{format_output}/{simplification=}" | ||
|
||
if simplification_percent != 0: | ||
option_simplify = f"-simplify {simplification_percent}% " | ||
else: | ||
option_simplify = "" | ||
|
||
|
||
format_intermediate = "geojson" | ||
|
||
# PREPROCESS CITIES | ||
subprocess.run( | ||
( | ||
f"mapshaper {local_dir}/COMMUNE.{extension_initial} name='COMMUNE' " | ||
f"-proj EPSG:{crs} " | ||
f"-filter '\"69123,13055,75056\".indexOf(INSEE_COM) > -1' invert " | ||
f"-each \"INSEE_COG=INSEE_COM\" " | ||
f"-o {output_path}/communes_simples.{format_intermediate} format={format_intermediate} extension=\".{format_intermediate}\" singles" | ||
), | ||
shell=True | ||
) | ||
|
||
# PREPROCESS ARRONDISSEMENT | ||
subprocess.run( | ||
( | ||
f"mapshaper {local_dir}/ARRONDISSEMENT_MUNICIPAL.{extension_initial} name='ARRONDISSEMENT_MUNICIPAL' " | ||
f"-proj EPSG:{crs} " | ||
f"-rename-fields INSEE_COG=INSEE_ARM " | ||
f"-each 'INSEE_DEP=INSEE_COG.substr(0,2), STATUT=\"Arrondissement municipal\" ' " | ||
f"-o {output_path}/arrondissements.{format_intermediate} format={format_intermediate} extension=\".{format_intermediate}\"" | ||
), | ||
shell=True | ||
) | ||
|
||
# MERGE CITIES AND ARRONDISSEMENT | ||
subprocess.run( | ||
( | ||
f"mapshaper {output_path}/communes_simples.{format_intermediate} {output_path}/arrondissements.{format_intermediate} snap combine-files " | ||
f"-proj EPSG:{crs} " | ||
f"-rename-layers COMMUNE,ARRONDISSEMENT_MUNICIPAL " | ||
f"-merge-layers target=COMMUNE,ARRONDISSEMENT_MUNICIPAL force " | ||
f"-rename-layers COMMUNE_ARRONDISSEMENT " | ||
f"-o {output_path}/raw.{format_intermediate} format={format_intermediate} extension=\".{format_intermediate}\"" | ||
), | ||
shell=True | ||
) | ||
|
||
# TRANSFORM AS NEEDED | ||
cmd = ( | ||
f"mapshaper {output_path}/raw.{format_intermediate} " | ||
f"{option_simplify}" | ||
f"-proj EPSG:{crs} " | ||
f"-each \"SOURCE='{provider}:{source}'\" " | ||
f"-split {dict_corresp[niveau_agreg]} " | ||
f"-o {output_path} format={format_output} extension=\".{format_output}\" singles" | ||
) | ||
|
||
|
||
subprocess.run( | ||
cmd, | ||
shell=True | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from .cross_product_parameters import ( | ||
restructure_nested_dict_borders, | ||
crossproduct_parameters_production | ||
) | ||
|
||
from .prepare_mapshaper import prepare_local_directory_mapshaper | ||
from .mapshaper_split_from_s3 import mapshaperize_split_from_s3, mapshaperize_merge_split_from_s3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import itertools | ||
import pandas as pd | ||
|
||
|
||
def restructure_nested_dict_borders(dict_with_list: dict): | ||
""" | ||
Restructures a nested dictionary by flattening its values and their corresponding keys. | ||
Parameters: | ||
----------- | ||
dict_with_list : dict | ||
A dictionary with list values to be restructured. | ||
Returns: | ||
-------- | ||
list | ||
A list of lists containing key-value pairs obtained by flattening the input dictionary. | ||
Example: | ||
-------- | ||
Example usage: | ||
sample_dict = {'a': [1, 2, 3], 'b': [4, 5]} | ||
result = restructure_nested_dict_borders(sample_dict) | ||
print(result) | ||
This will output: | ||
[['a', 1], ['a', 2], ['a', 3], ['b', 4], ['b', 5]] | ||
""" | ||
croisement_filter_by_borders_flat = [ | ||
[key, inner_value] | ||
for key, values in dict_with_list.items() | ||
for inner_value in values | ||
] | ||
|
||
return croisement_filter_by_borders_flat | ||
|
||
import itertools | ||
import pandas as pd | ||
|
||
def crossproduct_parameters_production( | ||
croisement_filter_by_borders: dict, | ||
list_format: list, | ||
years: list, | ||
crs_list: list, | ||
sources: list, | ||
simplifications: list | ||
) -> pd.DataFrame: | ||
""" | ||
Generates a DataFrame by performing a cross-product of the given parameters. | ||
Parameters: | ||
----------- | ||
croisement_filter_by_borders : dict | ||
A dictionary with nested lists for cross-product generation. | ||
list_format : list | ||
A list of formats for cross-product generation. | ||
years : list | ||
A list of years for cross-product generation. | ||
crs_list : list | ||
A list of CRS (Coordinate Reference Systems) for cross-product generation. | ||
sources : list | ||
A list of sources for cross-product generation. | ||
simplifications : list | ||
A list of simplifications for cross-product generation. | ||
Returns: | ||
-------- | ||
pd.DataFrame | ||
A pandas DataFrame containing the cross-product of the input parameters. | ||
Example: | ||
-------- | ||
Example usage: | ||
sample_dict = {'a': [1, 2, 3], 'b': [4, 5]} | ||
formats = ['geojson', 'gpkg'] | ||
years = [2022, 2022] | ||
crs_list = [4326, 2154] | ||
sources = ['source1', 'source2'] | ||
simplifications = [0, 40] | ||
result = crossproduct_parameters_production( | ||
sample_dict, formats, years, crs_list, sources, simplifications | ||
) | ||
print(result) | ||
This will output: | ||
A pandas DataFrame with the cross-product of the provided parameters. | ||
""" | ||
croisement_filter_by_borders_flat = restructure_nested_dict_borders( | ||
croisement_filter_by_borders | ||
) | ||
|
||
combinations = list( | ||
itertools.product( | ||
list_format, | ||
croisement_filter_by_borders_flat, | ||
years, | ||
crs_list, | ||
sources, | ||
simplifications | ||
) | ||
) | ||
|
||
tempdf = pd.DataFrame( | ||
combinations, | ||
columns=["format", "nested", "year", "crs", "source", "simplification"] | ||
) | ||
tempdf["borders"] = tempdf["nested"].apply(lambda l: l[0]) | ||
tempdf["filter_by"] = tempdf["nested"].apply(lambda l: l[1]) | ||
tempdf.drop("nested", axis="columns", inplace=True) | ||
|
||
return tempdf |
Oops, something went wrong.