diff --git a/pyproject.toml b/pyproject.toml index 5079f27a80..4160a40766 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ authors = ["Sylvain Lesage "] description = "API to extract rows of 🤗 datasets" name = "datasets-preview-backend" -version = "0.16.4" +version = "0.16.5" [tool.poetry.dependencies] Pillow = "^8.4.0" diff --git a/src/datasets_preview_backend/models/dataset.py b/src/datasets_preview_backend/models/dataset.py index 8494de64fd..f32073bc34 100644 --- a/src/datasets_preview_backend/models/dataset.py +++ b/src/datasets_preview_backend/models/dataset.py @@ -3,7 +3,7 @@ from datasets import get_dataset_config_names, get_dataset_split_names -from datasets_preview_backend.constants import DATASETS_BLOCKLIST, FORCE_REDOWNLOAD +from datasets_preview_backend.constants import FORCE_REDOWNLOAD from datasets_preview_backend.exceptions import Status400Error logger = logging.getLogger(__name__) @@ -17,8 +17,6 @@ class SplitFullName(TypedDict): def get_dataset_split_full_names(dataset_name: str, hf_token: Optional[str] = None) -> List[SplitFullName]: logger.info(f"get dataset '{dataset_name}' split full names") - if dataset_name in DATASETS_BLOCKLIST: - raise Status400Error("this dataset is not supported for now.") try: return [ {"dataset_name": dataset_name, "config_name": config_name, "split_name": split_name} diff --git a/src/datasets_preview_backend/routes/rows.py b/src/datasets_preview_backend/routes/rows.py index ecff5ef48b..24b5caa90e 100644 --- a/src/datasets_preview_backend/routes/rows.py +++ b/src/datasets_preview_backend/routes/rows.py @@ -4,7 +4,8 @@ from starlette.responses import Response from datasets_preview_backend.config import MAX_AGE_LONG_SECONDS -from datasets_preview_backend.exceptions import StatusError +from datasets_preview_backend.constants import DATASETS_BLOCKLIST +from datasets_preview_backend.exceptions import Status400Error, StatusError from datasets_preview_backend.io.cache import get_rows_response from datasets_preview_backend.routes._utils import get_response @@ -20,6 +21,8 @@ async def rows_endpoint(request: Request) -> Response: try: if not isinstance(dataset_name, str) or not isinstance(config_name, str) or not isinstance(split_name, str): raise StatusError("Parameters 'dataset', 'config' and 'split' are required", 400) + if dataset_name in DATASETS_BLOCKLIST: + raise Status400Error("this dataset is not supported for now.") rows_response, rows_error, status_code = get_rows_response(dataset_name, config_name, split_name) return get_response(rows_response or rows_error, status_code, MAX_AGE_LONG_SECONDS) except StatusError as err: diff --git a/src/datasets_preview_backend/routes/splits.py b/src/datasets_preview_backend/routes/splits.py index 3c99c42159..267ccb43a2 100644 --- a/src/datasets_preview_backend/routes/splits.py +++ b/src/datasets_preview_backend/routes/splits.py @@ -4,7 +4,8 @@ from starlette.responses import Response from datasets_preview_backend.config import MAX_AGE_LONG_SECONDS -from datasets_preview_backend.exceptions import StatusError +from datasets_preview_backend.constants import DATASETS_BLOCKLIST +from datasets_preview_backend.exceptions import Status400Error, StatusError from datasets_preview_backend.io.cache import get_splits_response from datasets_preview_backend.routes._utils import get_response @@ -17,7 +18,9 @@ async def splits_endpoint(request: Request) -> Response: try: if not isinstance(dataset_name, str): - raise StatusError("Parameter 'dataset' is required", 400) + raise Status400Error("Parameter 'dataset' is required") + if dataset_name in DATASETS_BLOCKLIST: + raise Status400Error("this dataset is not supported for now.") splits_response, splits_error, status_code = get_splits_response(dataset_name) return get_response(splits_response or splits_error, status_code, MAX_AGE_LONG_SECONDS) except StatusError as err: