Skip to content

Commit

Permalink
start pushing catalog to python client
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrandje committed Dec 15, 2024
1 parent 289b386 commit 0e9be7f
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 6 deletions.
94 changes: 88 additions & 6 deletions python-package/cartiflette/cartiflette/client.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
from requests_cache import CachedSession
import os
import typing
import geopandas as gpd
from datetime import date
import logging
import os
import typing

from cartiflette.constants import DIR_CACHE, CACHE_NAME, BUCKET, PATH_WITHIN_BUCKET
from requests_cache import CachedSession
import geopandas as gpd
import pandas as pd

from cartiflette.constants import (
DIR_CACHE,
CACHE_NAME,
BUCKET,
PATH_WITHIN_BUCKET,
CATALOG,
)
from cartiflette.config import _config
from cartiflette.utils import create_path_bucket, standardize_inputs
from cartiflette.utils import (
create_path_bucket,
standardize_inputs,
flatten_dict,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -94,6 +106,76 @@ def download_cartiflette_single(
else:
return gdf

def get_catalog(self, **kwargs) -> pd.DataFrame:
"""
Retrieve and load cartiflette's current datasets' inventory (as a
dataframe).
Inventory columns are [
'source',
'year',
'administrative_level',
'crs',
'filter_by',
'value',
'vectorfile_format',
'territory',
'simplification'
]
Each row corresponds to an available DataFrame.
Parameters
----------
fs : S3FileSystem, optional
S3 File System. The default is FS.
bucket : str, optional
Used bucket (both for inventory querying and json storage). The default
is BUCKET.
path_within_bucket : str, optional
Path used within bucket. The default is PATH_WITHIN_BUCKET.
Returns
-------
df : pd.DataFrame
Inventory DataFrame
"""

url = CATALOG

url = f"https://minio.lab.sspcloud.fr/{url}"

try:
r = self.get(url)
d = r.json()
except Exception as e:
logger.error(
f"There was an error while reading the file from the URL: {url}"
)
logger.error(f"Error message: {str(e)}")
return

d = flatten_dict(d)

index = pd.MultiIndex.from_tuples(d.keys())
df = pd.DataFrame(
list(d.values()), index=index, columns=["simplification"]
)
index.names = [
"source",
"year",
"administrative_level",
"crs",
"filter_by",
"value",
"vectorfile_format",
"territory",
]

df = df.reset_index(drop=False)
return df

def get_dataset(
self,
values: typing.List[typing.Union[str, int, float]],
Expand Down
5 changes: 5 additions & 0 deletions python-package/cartiflette/cartiflette/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,8 @@
CACHE_NAME = "cartiflette_http_cache.sqlite"
BUCKET = "projet-cartiflette"
PATH_WITHIN_BUCKET = "production"

CATALOG = url = (
"https://minio.lab.sspcloud.fr/"
f"{BUCKET}/{PATH_WITHIN_BUCKET}/inventory.json"
)
28 changes: 28 additions & 0 deletions python-package/cartiflette/cartiflette/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,31 @@ def create_path_bucket(config: ConfigDict) -> str:
write_path += f"/raw.{vectorfile_format}"

return write_path


def flatten_dict(d: dict, parent_key: tuple = ()) -> dict:
"""
Convenience function, flattens a nested dictionary and convert it back to
dataframe.
Parameters
----------
d : dict
Nested dictionary
parent_key : tuple, optional
Optional key, used for recursive purposes. The default is ().
Returns
-------
dict
flattened dictionnary
"""
items = []
for k, v in d.items():
new_key = parent_key + (k,)
if isinstance(v, dict):
items.extend(flatten_dict(v, new_key).items())
else:
items.append((new_key, v))
return dict(items)

0 comments on commit 0e9be7f

Please sign in to comment.