Skip to content

Commit

Permalink
add proxies and add test
Browse files Browse the repository at this point in the history
  • Loading branch information
B-Alica committed Nov 20, 2024
1 parent 53a0b2c commit 11d5761
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 62 deletions.
72 changes: 70 additions & 2 deletions python-package/cartiflette/cartiflette/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import typing
import geopandas as gpd
from datetime import date
import logging

from cartiflette.constants import DIR_CACHE, CACHE_NAME, BUCKET, PATH_WITHIN_BUCKET
from cartiflette.config import _config
from cartiflette.utils import download_cartiflette_single
from cartiflette.utils import create_path_bucket, standardize_inputs

logger = logging.getLogger(__name__)

session = CachedSession()

Expand All @@ -26,6 +29,71 @@ def __init__(
**kwargs,
)

for protocol in ["http", "https"]:
try:
proxy = {protocol: os.environ[f"{protocol}_proxy"]}
self.proxies.update(proxy)
except KeyError:
continue

def download_cartiflette_single(
self,
*args,
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
provider: str = "IGN",
dataset_family: str = "ADMINEXPRESS",
source: str = "EXPRESS-COG-TERRITOIRE",
vectorfile_format: str = "geojson",
borders: str = "COMMUNE",
filter_by: str = "region",
territory: str = "metropole",
year: typing.Union[str, int, float] = None,
value: typing.Union[str, int, float] = "28",
crs: typing.Union[list, str, int, float] = 2154,
simplification: typing.Union[str, int, float] = None,
filename: str = "raw",
**kwargs,
):
if not year:
year = str(date.today().year)

corresp_filter_by_columns, format_read, driver = standardize_inputs(
vectorfile_format
)

url = create_path_bucket(
{
"bucket": bucket,
"path_within_bucket": path_within_bucket,
"vectorfile_format": format_read,
"territory": territory,
"borders": borders,
"filter_by": filter_by,
"year": year,
"value": value,
"crs": crs,
"provider": provider,
"dataset_family": dataset_family,
"source": source,
"simplification": simplification,
"filename": filename,
}
)

url = f"https://minio.lab.sspcloud.fr/{url}"

try:
r = self.get(url)
gdf = gpd.read_file(r.content)
except Exception as e:
logger.error(
f"There was an error while reading the file from the URL: {url}"
)
logger.error(f"Error message: {str(e)}")
else:
return gdf

def get_dataset(
self,
values: typing.List[typing.Union[str, int, float]],
Expand Down Expand Up @@ -98,7 +166,7 @@ def get_dataset(

# Iterate over values
for value in values:
gdf_single = download_cartiflette_single(
gdf_single = self.download_cartiflette_single(
value=value,
bucket=bucket,
path_within_bucket=path_within_bucket,
Expand Down
57 changes: 0 additions & 57 deletions python-package/cartiflette/cartiflette/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import typing
import logging
from datetime import date
import geopandas as gpd

from cartiflette.constants import BUCKET, PATH_WITHIN_BUCKET

Expand Down Expand Up @@ -154,58 +152,3 @@ def create_path_bucket(config: ConfigDict) -> str:
write_path += f"/raw.{vectorfile_format}"

return write_path


def download_cartiflette_single(
*args,
bucket: str = BUCKET,
path_within_bucket: str = PATH_WITHIN_BUCKET,
provider: str = "IGN",
dataset_family: str = "ADMINEXPRESS",
source: str = "EXPRESS-COG-TERRITOIRE",
vectorfile_format: str = "geojson",
borders: str = "COMMUNE",
filter_by: str = "region",
territory: str = "metropole",
year: typing.Union[str, int, float] = None,
value: typing.Union[str, int, float] = "28",
crs: typing.Union[list, str, int, float] = 2154,
simplification: typing.Union[str, int, float] = None,
filename: str = "raw",
**kwargs,
):
if not year:
year = str(date.today().year)

corresp_filter_by_columns, format_read, driver = standardize_inputs(
vectorfile_format
)

url = create_path_bucket(
{
"bucket": bucket,
"path_within_bucket": path_within_bucket,
"vectorfile_format": format_read,
"territory": territory,
"borders": borders,
"filter_by": filter_by,
"year": year,
"value": value,
"crs": crs,
"provider": provider,
"dataset_family": dataset_family,
"source": source,
"simplification": simplification,
"filename": filename,
}
)

url = f"https://minio.lab.sspcloud.fr/{url}"

try:
gdf = gpd.read_file(url)
except Exception as e:
logger.error(f"There was an error while reading the file from the URL: {url}")
logger.error(f"Error message: {str(e)}")
else:
return gdf
2 changes: 1 addition & 1 deletion python-package/cartiflette/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cartiflette"
version = "0.1.3"
version = "0.1.4"
description = "Tools to easily retrieve French borders for geographic data analytics"
authors = ["Lino Galiana <[email protected]>", "Thomas Grandjean <[email protected]>", "Alica Burlot <[email protected]>"]
license = "OPEN LICENCE 2.0/LICENCE OUVERTE 2.0"
Expand Down
15 changes: 13 additions & 2 deletions python-package/cartiflette/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


def test_carti_download():
dataset = carti_download(
dataset_topojson = carti_download(
values=["France"],
crs=4326,
borders="DEPARTEMENT",
Expand All @@ -22,4 +22,15 @@ def test_carti_download():
source="EXPRESS-COG-CARTO-TERRITOIRE",
year=2022,
)
assert isinstance(dataset, gpd.GeoDataFrame)
dataset_geojson = carti_download(
values=["France"],
crs=4326,
borders="DEPARTEMENT",
vectorfile_format="geojson",
simplification=50,
filter_by="FRANCE_ENTIERE_DROM_RAPPROCHES",
source="EXPRESS-COG-CARTO-TERRITOIRE",
year=2022,
)
assert isinstance(dataset_topojson, gpd.GeoDataFrame)
assert isinstance(dataset_geojson, gpd.GeoDataFrame)

0 comments on commit 11d5761

Please sign in to comment.