diff --git a/src/cdf/core/project.py b/src/cdf/core/project.py index fd9c7b5..8adfd29 100644 --- a/src/cdf/core/project.py +++ b/src/cdf/core/project.py @@ -551,17 +551,39 @@ def _hydrate_workspaces(cls, value: t.Any, info: pydantic.ValidationInfo): If the workspaces is a path, load the configuration from the path. """ if isinstance(value, str): - value = list(map(lambda s: s.strip(), value.split(","))) + # ws1; ws2; ws3 + value = list(map(lambda s: s.strip(), value.split(";"))) elif isinstance(value, dict): - value = [str(v["path"]) for v in value.values()] + # ws name : {ws config} + _buf = [] + for ws_name, ws_config in value.items(): + ws_config.setdefault("name", ws_name) + _buf.append(ws_config) + value = _buf if isinstance(value, list): - for i, maybe_path in enumerate(value): - if isinstance(maybe_path, str): - path = Path(info.data["path"]) / maybe_path + # [{ws1 config} | ws1 path, {ws2 config}] + for i, obj in enumerate(value): + if isinstance(obj, (str, Path)): + # ws1 path + # ensure ws path is absolute, if not, resolve it + # relative to the project path + obj_path = Path(obj) + if obj_path.is_absolute(): + path = obj_path + else: + path = Path(info.data["path"]) / obj + # load the configuration from the path config = _load_config(path) config["path"] = path value[i] = config - return value + elif isinstance(obj, dict): + # {ws1 config} + obj_path = Path(obj["path"]) + if not obj_path.is_absolute(): + obj["path"] = Path(info.data["path"]) / obj["path"] + if not (hasattr(value, "__iter__") and not isinstance(value, (str, bytes))): + raise ValueError("Invalid workspaces configuration, must be an iterable") + return tuple(value) @pydantic.model_validator(mode="after") def _validate_workspaces(self): diff --git a/src/cdf/core/specification/base.py b/src/cdf/core/specification/base.py index 498713c..18a6036 100644 --- a/src/cdf/core/specification/base.py +++ b/src/cdf/core/specification/base.py @@ -56,7 +56,7 @@ class BaseComponent( pydantic.Field( ..., default_factory=_gen_anon_name, - pattern=r"^[a-zA-Z0-9_-]+$", + pattern=r"^[a-zA-Z0-9_\-\/]+$", max_length=64, ), ] @@ -220,8 +220,8 @@ def path(self) -> Path: @pydantic.model_validator(mode="before") @classmethod - def _infer_leaf_path_validator(cls, values: t.Any) -> t.Any: - """Infer the leaf path from the name if component_path is not provided. + def _path_from_name_validator(cls, values: t.Any) -> t.Any: + """Infer the path from the name if component_path is not provided. Given a name, we apply certain heuristics to infer the path of the component if a path is not explicitly provided. The heuristics are as follows: @@ -242,27 +242,24 @@ def _infer_leaf_path_validator(cls, values: t.Any) -> t.Any: ext = getattr(cls._extension, "default") typ = getattr(cls._folder, "default")[:-1] if name.endswith(f"_{typ}"): - leaf_path = f"{name}.{ext}" + p = f"{name}.{ext}" else: - leaf_path = f"{name}_{typ}.{ext}" - values.setdefault("path", leaf_path) + p = f"{name}_{typ}.{ext}" + values.setdefault("path", p) return values @pydantic.field_validator("name", mode="before") @classmethod - def _physical_name_validator(cls, name: t.Any) -> t.Any: - """Canonicalizes names which are pathlike. - - So a name like `some/path/to/file.py` would become `some_path_to_file`. - """ + def _component_name_validator(cls, name: t.Any) -> t.Any: + """Strip the extension from the name.""" if isinstance(name, str): - return name.rsplit(".", 1)[0].replace(os.sep, "_") + return name.rsplit(".", 1)[0] return name @pydantic.field_validator("component_path", mode="before") @classmethod def _component_path_validator(cls, component_path: t.Any) -> Path: - """Ensure the component path is a Path and that is a child of the expected folder.""" + """Ensure the component path is a Path and that its a child of the expected folder.""" path = Path(component_path) if path.is_absolute(): raise ValueError("Component path must be a relative path.") diff --git a/tests/core/test_project.py b/tests/core/test_project.py index 1f70315..a8d80e2 100644 --- a/tests/core/test_project.py +++ b/tests/core/test_project.py @@ -1,5 +1,7 @@ """Tests for the core.project module.""" +from pathlib import Path + import dlt import pytest @@ -83,3 +85,94 @@ def test_round_trip_serialization(project: Project): roundtrip = Project.model_validate(obj) assert roundtrip == project assert roundtrip.is_newer_than(project) + assert ( + project["workspaces.alex.scripts.nested/hello"] + == roundtrip["workspaces.alex.scripts.nested/hello"] + ) + + +@pytest.fixture +def python_project(): + city_spec = { + "path": Path("pipelines/us_cities_pipeline.py"), + "cron_string": "@daily", + "description": "Get US city data", + "metrics": { + "*": [ + { + "name": "cdf_builtin_metrics_count", + "description": "Counts the number of items in a dataset", + "entrypoint": "cdf.builtin.metrics:count", + }, + { + "name": "cdf_builtin_metrics_max_value", + "description": "Returns the maximum value of a key in a dataset", + "entrypoint": "cdf.builtin.metrics:max_value", + "options": {"key": "zip_code"}, + }, + ] + }, + "filters": {}, + "dataset_name": "test_city", + "options": { + "progress": None, + "full_refresh": False, + "loader_file_format": "insert_values", + "runtime": {"dlthub_telemetry": False}, + }, + } + dota_spec = { + "cron_string": "@daily", + "name": "dota2", + "description": "Dota2 is a Massive Online Battle Arena game based on Warcraft.", + "path": Path("pipelines/dota2_pipeline.py"), + } + local_spec = { + "name": "local", + "description": "No description provided.", + "path": Path("sinks/local_sink.py"), + } + httpbin_spec = { + "cron_string": "@daily", + "name": "httpbin", + "description": "A publisher that pushes data to httpbin.org", + "path": Path("publishers/httpbin_publisher.py"), + "depends_on": ["mart.zips"], + } + hello_spec = { + "cron_string": "@daily", + "name": "hello", + "description": "No description provided.", + "path": Path("scripts/hello_script.py"), + } + return Project.model_validate( + { + "path": Path("examples/sandbox").resolve(), + "name": "data-platform", + "version": "0.2.0", + "workspaces": { + "datateam": { + "path": Path("examples/sandbox/alex").resolve(), + "pipelines": {"cities": city_spec, "dota": dota_spec}, + "sinks": {"local": local_spec}, + "publishers": {"httpbin": httpbin_spec}, + "scripts": {"hello": hello_spec}, + } + }, + "filesystem": {"uri": "file://_storage", "options": {}}, + "feature_flags": { + "provider": "filesystem", + "filename": "@jinja dev_flags_{{ 1 + 1}}.json", + }, + } + ) + + +def test_custom_project(python_project: Project): + """Test creating a project programmatically. + + This project has a custom structure and is not loaded from a file. Components + are still ultimately based on python files, however the configuration wrapping + these components is done in code which offers more flexibility. + """ + assert python_project.name == "data-platform"