From 0e91c499780a888876f690f7160918573b3b5ce9 Mon Sep 17 00:00:00 2001
From: z3z1ma <butler.alex2010@gmail.com>
Date: Mon, 19 Aug 2024 18:48:41 -0700
Subject: [PATCH] chore: clean up old code

---
 examples/README.md                            |    1 -
 examples/sandbox/alex/cdf.yml                 |   41 -
 examples/sandbox/alex/config.py               |   38 -
 examples/sandbox/alex/models/zips.sql         |    8 -
 .../alex/notebooks/hello_world_notebook.ipynb |   78 --
 .../sandbox/alex/pipelines/dota2_pipeline.py  |  132 --
 .../alex/pipelines/hackernews_pipeline.py     |  141 --
 .../sandbox/alex/pipelines/test1/__init__.py  |    0
 .../sandbox/alex/pipelines/test1/chore.py     |    2 -
 .../alex/pipelines/us_cities_pipeline.py      |   35 -
 .../alex/publishers/httpbin_publisher.py      |   19 -
 examples/sandbox/alex/schema.yaml             |   12 -
 examples/sandbox/alex/scripts/hello_script.py |    1 -
 .../alex/scripts/nested/hello_script.py       |    5 -
 examples/sandbox/alex/sinks/fs_sink.py        |   13 -
 examples/sandbox/alex/sinks/local_sink.py     |   32 -
 examples/sandbox/cdf.yml                      |   23 -
 src/cdf/cli.py                                | 1211 -----------------
 src/cdf/integrations/sqlmesh.py               |  147 --
 src/cdf/legacy/__init__.py                    |    0
 src/cdf/legacy/config.py                      |  151 --
 src/cdf/legacy/constants.py                   |   13 -
 src/cdf/legacy/context.py                     |   47 -
 src/cdf/legacy/filesystem.py                  |  106 --
 src/cdf/legacy/logger.py                      |  169 ---
 src/cdf/legacy/project.py                     |  989 --------------
 src/cdf/legacy/runtime/__init__.py            |   12 -
 src/cdf/legacy/runtime/common.py              |   43 -
 src/cdf/legacy/runtime/notebook.py            |  109 --
 src/cdf/legacy/runtime/pipeline.py            |  488 -------
 src/cdf/legacy/runtime/publisher.py           |   83 --
 src/cdf/legacy/runtime/script.py              |   65 -
 src/cdf/legacy/specification/__init__.py      |   24 -
 src/cdf/legacy/specification/base.py          |  523 -------
 src/cdf/legacy/specification/notebook.py      |   45 -
 src/cdf/legacy/specification/pipeline.py      |  223 ---
 src/cdf/legacy/specification/publisher.py     |   16 -
 src/cdf/legacy/specification/script.py        |   11 -
 src/cdf/legacy/specification/sink.py          |   70 -
 src/cdf/legacy/state.py                       |  407 ------
 src/cdf/legacy/utility/__init__.py            |   25 -
 src/cdf/legacy/utility/file.py                |   75 -
 tests/legacy/specification/test_notebook.py   |    0
 tests/legacy/specification/test_pipeline.py   |    0
 tests/legacy/specification/test_publisher.py  |    0
 tests/legacy/specification/test_script.py     |    0
 tests/legacy/specification/test_sink.py       |    0
 tests/legacy/test_context.py                  |    0
 tests/legacy/test_filesystem.py               |    0
 tests/legacy/test_packaging.py                |    0
 tests/legacy/test_project.py                  |  268 ----
 tests/legacy/utility/test_file_.py            |    0
 tests/test_cli.py                             |   12 -
 53 files changed, 5913 deletions(-)
 delete mode 100644 examples/README.md
 delete mode 100644 examples/sandbox/alex/cdf.yml
 delete mode 100644 examples/sandbox/alex/config.py
 delete mode 100644 examples/sandbox/alex/models/zips.sql
 delete mode 100644 examples/sandbox/alex/notebooks/hello_world_notebook.ipynb
 delete mode 100644 examples/sandbox/alex/pipelines/dota2_pipeline.py
 delete mode 100644 examples/sandbox/alex/pipelines/hackernews_pipeline.py
 delete mode 100644 examples/sandbox/alex/pipelines/test1/__init__.py
 delete mode 100644 examples/sandbox/alex/pipelines/test1/chore.py
 delete mode 100644 examples/sandbox/alex/pipelines/us_cities_pipeline.py
 delete mode 100644 examples/sandbox/alex/publishers/httpbin_publisher.py
 delete mode 100644 examples/sandbox/alex/schema.yaml
 delete mode 100644 examples/sandbox/alex/scripts/hello_script.py
 delete mode 100644 examples/sandbox/alex/scripts/nested/hello_script.py
 delete mode 100644 examples/sandbox/alex/sinks/fs_sink.py
 delete mode 100644 examples/sandbox/alex/sinks/local_sink.py
 delete mode 100644 examples/sandbox/cdf.yml
 delete mode 100644 src/cdf/cli.py
 delete mode 100644 src/cdf/integrations/sqlmesh.py
 delete mode 100644 src/cdf/legacy/__init__.py
 delete mode 100644 src/cdf/legacy/config.py
 delete mode 100644 src/cdf/legacy/constants.py
 delete mode 100644 src/cdf/legacy/context.py
 delete mode 100644 src/cdf/legacy/filesystem.py
 delete mode 100644 src/cdf/legacy/logger.py
 delete mode 100644 src/cdf/legacy/project.py
 delete mode 100644 src/cdf/legacy/runtime/__init__.py
 delete mode 100644 src/cdf/legacy/runtime/common.py
 delete mode 100644 src/cdf/legacy/runtime/notebook.py
 delete mode 100644 src/cdf/legacy/runtime/pipeline.py
 delete mode 100644 src/cdf/legacy/runtime/publisher.py
 delete mode 100644 src/cdf/legacy/runtime/script.py
 delete mode 100644 src/cdf/legacy/specification/__init__.py
 delete mode 100644 src/cdf/legacy/specification/base.py
 delete mode 100644 src/cdf/legacy/specification/notebook.py
 delete mode 100644 src/cdf/legacy/specification/pipeline.py
 delete mode 100644 src/cdf/legacy/specification/publisher.py
 delete mode 100644 src/cdf/legacy/specification/script.py
 delete mode 100644 src/cdf/legacy/specification/sink.py
 delete mode 100644 src/cdf/legacy/state.py
 delete mode 100644 src/cdf/legacy/utility/__init__.py
 delete mode 100644 src/cdf/legacy/utility/file.py
 delete mode 100644 tests/legacy/specification/test_notebook.py
 delete mode 100644 tests/legacy/specification/test_pipeline.py
 delete mode 100644 tests/legacy/specification/test_publisher.py
 delete mode 100644 tests/legacy/specification/test_script.py
 delete mode 100644 tests/legacy/specification/test_sink.py
 delete mode 100644 tests/legacy/test_context.py
 delete mode 100644 tests/legacy/test_filesystem.py
 delete mode 100644 tests/legacy/test_packaging.py
 delete mode 100644 tests/legacy/test_project.py
 delete mode 100644 tests/legacy/utility/test_file_.py
 delete mode 100644 tests/test_cli.py

diff --git a/examples/README.md b/examples/README.md
deleted file mode 100644
index 72c0fa41..00000000
--- a/examples/README.md
+++ /dev/null
@@ -1 +0,0 @@
-These are example projects that use cdf.
diff --git a/examples/sandbox/alex/cdf.yml b/examples/sandbox/alex/cdf.yml
deleted file mode 100644
index a2115482..00000000
--- a/examples/sandbox/alex/cdf.yml
+++ /dev/null
@@ -1,41 +0,0 @@
-default:
-  name: alex
-  destination:
-    replace_strategy: insert-from-staging
-  pipelines:
-    # The pipeline name is based on the dict key by default, metadata follows in the body
-    us_cities:
-      description: Get US city data
-      dataset_name: us_cities_v0_{version}
-      version: 1
-      metrics:
-        "*":
-          - entrypoint: cdf.builtin.metrics:count
-          - entrypoint: cdf.builtin.metrics:max_value
-            options:
-              key: zip_code
-      options:
-        progress: ~
-        full_refresh: false
-        loader_file_format: insert_values
-        load:
-          delete_completed_jobs: true
-        runtime:
-          dlthub_telemetry: false
-    # Heuristics can populate enough information such that the below is the minimum definition
-    dota2: {}
-    hackernews: {}
-  sinks:
-    local: {}
-    fs_sink.py: {}
-  publishers:
-    httpbin:
-      depends_on:
-        - mart.zips
-  scripts:
-    hello: {}
-    nested/hello: {}
-  notebooks:
-    hello_world:
-      storage_path: reports/tests1/{name}/{timestamp}{ext}
-      gc_duration: 0
diff --git a/examples/sandbox/alex/config.py b/examples/sandbox/alex/config.py
deleted file mode 100644
index feac8f13..00000000
--- a/examples/sandbox/alex/config.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import cdf
-import sqlmesh
-from cdf.integrations.sqlmesh import CDFNotificationTarget
-
-workspace = cdf.get_workspace(__file__).unwrap()
-
-config = sqlmesh.Config.model_validate(
-    dict(
-        gateways=dict(workspace.get_transform_gateways()),
-        project=workspace.name,
-        default_gateway="local",
-        model_defaults={
-            "dialect": "duckdb",
-            "start": "2020-01-01",
-        },
-        plan={
-            "auto_categorize_changes": {
-                "sql": "full",
-                "seed": "semi",
-                "external": "semi",
-            }
-        },
-        # username=getpass.getuser(),
-        physical_schema_override={},
-        format={
-            "normalize": True,
-            "pad": 4,
-            "indent": 4,
-            "normalize_functions": "lower",
-            "leading_comma": False,
-            "max_text_width": 120,
-            "append_newline": True,
-        },
-        ui={"format_on_save": True},
-    )
-)
-
-config.notification_targets = [CDFNotificationTarget(workspace=workspace)]
diff --git a/examples/sandbox/alex/models/zips.sql b/examples/sandbox/alex/models/zips.sql
deleted file mode 100644
index ea4727f1..00000000
--- a/examples/sandbox/alex/models/zips.sql
+++ /dev/null
@@ -1,8 +0,0 @@
-/* This is a simple model that selects distinct zip codes from the cities table */
-MODEL (
-    name mart.zips
-);
-
-SELECT DISTINCT
-    zip_code
-FROM us_cities_v0_1.cities
diff --git a/examples/sandbox/alex/notebooks/hello_world_notebook.ipynb b/examples/sandbox/alex/notebooks/hello_world_notebook.ipynb
deleted file mode 100644
index 6638e406..00000000
--- a/examples/sandbox/alex/notebooks/hello_world_notebook.ipynb
+++ /dev/null
@@ -1,78 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4a6c5cd3-4334-47e7-b512-34dc238626d1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "sys.executable"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c3810128-bf25-4e8c-99a6-2034101cd5c9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sys.path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8012b40d-1bf1-49b7-88b1-b3a860bebfb2",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import cdf\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5dc40c68-eaba-49df-8fe0-6370e7d5130e",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "os.getcwd()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "c124f374-cb76-49aa-84aa-0e142e86d7df",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "p = cdf.find_nearest().unwrap()\n",
-    "p.name"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/examples/sandbox/alex/pipelines/dota2_pipeline.py b/examples/sandbox/alex/pipelines/dota2_pipeline.py
deleted file mode 100644
index 59229559..00000000
--- a/examples/sandbox/alex/pipelines/dota2_pipeline.py
+++ /dev/null
@@ -1,132 +0,0 @@
-"""Dota2 is a Massive Online Battle Arena game based on Warcraft."""
-
-import dlt
-import dlt.sources.helpers.requests as requests
-
-import cdf
-
-
-@dlt.resource(write_disposition="merge", primary_key="account_id")
-def pro_players():
-    """Get list of pro players"""
-    yield requests.get("https://api.opendota.com/api/proPlayers").json()
-
-
-@dlt.resource(write_disposition="merge", primary_key="match_id")
-def pro_matches():
-    """Get list of pro matches"""
-    yield requests.get("https://api.opendota.com/api/proMatches").json()
-
-
-@dlt.resource(write_disposition="replace")
-def distribution():
-    """Distributions of MMR data by bracket and country"""
-    yield requests.get("https://api.opendota.com/api/distributions").json()
-
-
-@dlt.resource(write_disposition="replace")
-def rankings():
-    """Top players by hero"""
-    yield requests.get("https://api.opendota.com/api/rankings").json()
-
-
-@dlt.resource(write_disposition="replace")
-def benchmarks():
-    """Benchmarks of average stat values for a hero"""
-    yield requests.get("https://api.opendota.com/api/benchmarks").json()
-
-
-@dlt.resource(write_disposition="replace")
-def heroes():
-    """Get hero data"""
-    yield requests.get("https://api.opendota.com/api/heroes").json()
-
-
-@dlt.resource(write_disposition="replace")
-def hero_stats():
-    """Get stats about hero performance in recent matches"""
-    yield requests.get("https://api.opendota.com/api/heroStats").json()
-
-
-@dlt.resource(write_disposition="replace")
-def leagues():
-    """Get league data"""
-    yield requests.get("https://api.opendota.com/api/leagues").json()
-
-
-@dlt.resource(write_disposition="replace")
-def teams():
-    """Get team data"""
-    yield requests.get("https://api.opendota.com/api/teams").json()
-
-
-@dlt.resource(write_disposition="replace")
-def constants():
-    """Download all constants from odota/dotaconstants"""
-
-    for table in (
-        "game_mode",
-        "item_colors",
-        "lobby_type",
-        "order_types",
-        "patch",
-        "permanent_buffs",
-        "player_colors",
-        "skillshots",
-        "xp_level",
-    ):
-        raw_data = requests.get(
-            f"https://raw.githubusercontent.com/odota/dotaconstants/master/json/{table}.json"
-        ).json()
-
-        if table in ("game_mode", "lobby_type"):
-            data = list(raw_data.values())
-        elif table in (
-            "item_colors",
-            "order_types",
-            "permanent_buffs",
-            "player_colors",
-            "skillshots",
-        ):
-            data = [{"id": k, "value": v} for k, v in raw_data.items()]
-        elif table == "xp_level":
-            data = [{"level": i, "xp": v} for i, v in enumerate(raw_data)]
-        else:
-            data = raw_data
-
-        yield dlt.mark.with_table_name(data, table)
-
-
-@dlt.source
-def dota2_stats():
-    """This source contains Dota 2 data from OpenDota API and repository"""
-    return (
-        pro_players(),
-        pro_matches(),
-        distribution(),
-        rankings(),
-        benchmarks(),
-        heroes(),
-        hero_stats(),
-        leagues(),
-        teams(),
-        constants(),
-    )
-
-
-if cdf.is_main(__name__):
-    # Define a pipeline
-    pipe = cdf.pipeline()
-
-    # Instantiate the source
-    source = dota2_stats()
-
-    # Run the pipeline
-    pipe.run(
-        source.with_resources(
-            "pro_players",
-            "pro_matches",
-            "teams",
-            "heroes",
-        )
-    )
diff --git a/examples/sandbox/alex/pipelines/hackernews_pipeline.py b/examples/sandbox/alex/pipelines/hackernews_pipeline.py
deleted file mode 100644
index 6e146855..00000000
--- a/examples/sandbox/alex/pipelines/hackernews_pipeline.py
+++ /dev/null
@@ -1,141 +0,0 @@
-import time
-import typing as t
-from datetime import datetime
-
-import dlt
-from dlt.sources.helpers import requests
-
-import cdf
-
-URL = "https://hn.algolia.com/api/v1/search_by_date"
-
-
-@dlt.source(name="hackernews")
-def hn_search(
-    keywords: t.List[str] = dlt.config.value,
-    start_date: datetime = dlt.config.value,
-    end_date: datetime = datetime.today(),
-    text: str = "any",
-    daily_load: bool = False,
-):
-    """Source method for the Algolia Hacker News Search API: https://hn.algolia.com/api
-
-    Args:
-        keywords: list of keywords for which the data needs to be loaded
-        start_date: start date in datetime or "yyyy-mm-dd" format
-        end_date: end date in datetime or "yyyy-mm-dd" format
-        text: possible values: "story","comment". For any other value, everything is loaded.
-        daily_load: loads data in daily intervals when set to True (default: weekly)
-    """
-
-    # Read start date as string or datetime and convert it to UNIX timestamp
-    if isinstance(start_date, str):
-        start_timestamp = int(
-            time.mktime(datetime.strptime(start_date, "%Y-%m-%d").timetuple())
-        )
-    else:
-        start_timestamp = int(time.mktime(start_date.timetuple()))  # type: ignore
-
-    # Read end date as string or datetime and convert it to UNIX timestamp
-    if isinstance(end_date, str):
-        end_timestamp = int(
-            time.mktime(datetime.strptime(end_date, "%Y-%m-%d").timetuple())
-        )
-    else:
-        end_timestamp = int(time.mktime(end_date.timetuple()))
-
-    today = int(time.mktime(datetime.today().timetuple()))
-
-    # Don't load the data for dates after the current date
-    end_timestamp = min(today, end_timestamp)
-
-    # Ensure that the input start date is smaller than the input end date
-    if start_timestamp > end_timestamp:
-        raise ValueError(f"{start_date=} is larger than {end_date=}")
-
-    # Specify text = "comment" or text="story" when calling the function
-    # to load only comments or stories
-    if text in ["comment", "story"]:
-        tags = text
-    # Pass any other value to load everything (default behaviour)
-    else:
-        tags = "(story,comment)"
-
-    return keyword_hits(keywords, start_timestamp, end_timestamp, tags, daily_load)
-
-
-@dlt.resource(name="keyword_hits", write_disposition="append")
-def keyword_hits(
-    keywords,
-    start_timestamp,
-    end_timestamp,
-    tags,
-    daily_load=False,
-):
-    """This methods makes a call to the Algolia Hacker News and returns all the hits corresponding the the input keywords
-
-    Since the API response is limited to 1000 hits,
-    a separate call is made for each keyword for each week between the start and end dates
-
-    If daily_load=True, then a single call is made for each keyword for the previous day
-
-    Args:
-        keywords: list of keywords for which the data needs to be loaded
-        start_timestamp: UNIX timestamp for the start date
-        end_timestamp: UNIX timestamp for the end date
-        tags: parameter for the API call to specify "story", "comment" or "(story,comment)"
-        daily_load: loads data in daily intervals when set to True (default: weekly)
-    """
-
-    def _generate_hits(keyword, batch_start_date, batch_end_date, tags):
-        """This function makes the API call and returns all the hits for the input parameters"""
-        params = {
-            "query": f'"{keyword}"',
-            "tags": f"{tags}",
-            "numericFilters": f"""created_at_i>={batch_start_date},created_at_i<{batch_end_date}""",
-            "hitsPerPage": 1000,
-        }
-        response = requests.get(URL, params=params)
-        response.raise_for_status()
-
-        return response.json()["hits"]
-
-    time_delta = (
-        86400 if daily_load else 604800
-    )  # The length of a day/week in UNIX timestamp
-
-    # Iterate across all keywords
-    for keyword in keywords:
-        batch_start_date = start_timestamp
-        batch_end_date = batch_start_date + time_delta
-
-        # Iterate across each week between the start and end dates
-        while batch_end_date < end_timestamp + time_delta:
-            batch_end_date = min(
-                batch_end_date, end_timestamp
-            )  # Prevent loading data ahead of the end date
-            # The response json
-            data = _generate_hits(keyword, batch_start_date, batch_end_date, tags)
-
-            for hits in data:
-                yield {
-                    key: value
-                    for (key, value) in hits.items()
-                    if not key.startswith(
-                        "_"
-                    )  # Filtering down to relevant fields from the response json
-                }
-
-            batch_start_date = batch_end_date
-            batch_end_date += time_delta
-
-
-if cdf.is_main(__name__):
-    # Create a source
-    source = hn_search(keywords=["rust"])
-
-    # Create the externally managed pipeline
-    pipeline = cdf.pipeline()
-
-    # Run the pipeline
-    pipeline.run(source)
diff --git a/examples/sandbox/alex/pipelines/test1/__init__.py b/examples/sandbox/alex/pipelines/test1/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/examples/sandbox/alex/pipelines/test1/chore.py b/examples/sandbox/alex/pipelines/test1/chore.py
deleted file mode 100644
index b0e9788c..00000000
--- a/examples/sandbox/alex/pipelines/test1/chore.py
+++ /dev/null
@@ -1,2 +0,0 @@
-def foo():
-    _ = 1
diff --git a/examples/sandbox/alex/pipelines/us_cities_pipeline.py b/examples/sandbox/alex/pipelines/us_cities_pipeline.py
deleted file mode 100644
index c4c85d49..00000000
--- a/examples/sandbox/alex/pipelines/us_cities_pipeline.py
+++ /dev/null
@@ -1,35 +0,0 @@
-"""
-PIPELINE (
-    name us_cities,
-    description 'Load US cities',
-    cron '0 0 * * *',
-);
-"""
-
-import dlt
-import requests
-
-import cdf
-
-# A relative import from the workspace
-from .test1.chore import foo
-
-
-@dlt.resource(write_disposition="append", standalone=True)
-def us_cities():
-    """Load US cities"""
-    foo()  # Call a function from a relative import
-    yield requests.get(
-        "https://raw.githubusercontent.com/millbj92/US-Zip-Codes-JSON/master/USCities.json"
-    ).json()
-
-
-if cdf.is_main(__name__):
-    # Define a pipeline
-    pipeline = cdf.pipeline()
-
-    # Run the pipeline
-    load_info = pipeline.run(us_cities(), table_name="cities", destination="duckdb")
-
-    # Print the load information
-    print(load_info)
diff --git a/examples/sandbox/alex/publishers/httpbin_publisher.py b/examples/sandbox/alex/publishers/httpbin_publisher.py
deleted file mode 100644
index 8af6e124..00000000
--- a/examples/sandbox/alex/publishers/httpbin_publisher.py
+++ /dev/null
@@ -1,19 +0,0 @@
-"""A publisher that pushes data to httpbin.org"""
-
-import requests
-
-import cdf
-
-w = cdf.get_workspace(__file__).unwrap()
-context = w.get_transform_context("local")
-
-df = context.fetchdf("SELECT * FROM mart.zips")
-
-zip_ = df.iloc[0, 0]
-
-r = requests.post(
-    "https://httpbin.org/post",
-    data={"zip": zip_},
-)
-r.raise_for_status()
-print(r.json())
diff --git a/examples/sandbox/alex/schema.yaml b/examples/sandbox/alex/schema.yaml
deleted file mode 100644
index 6368fc51..00000000
--- a/examples/sandbox/alex/schema.yaml
+++ /dev/null
@@ -1,12 +0,0 @@
-- name: '"cdf"."us_cities_v0_1"."cities"'
-  columns:
-    zip_code: BIGINT
-    latitude: DOUBLE
-    longitude: DOUBLE
-    city: TEXT
-    state: TEXT
-    county: TEXT
-    _dlt_load_id: TEXT
-    _dlt_id: TEXT
-    latitude__v_text: TEXT
-    longitude__v_text: TEXT
diff --git a/examples/sandbox/alex/scripts/hello_script.py b/examples/sandbox/alex/scripts/hello_script.py
deleted file mode 100644
index f7cf60e1..00000000
--- a/examples/sandbox/alex/scripts/hello_script.py
+++ /dev/null
@@ -1 +0,0 @@
-print("Hello, world!")
diff --git a/examples/sandbox/alex/scripts/nested/hello_script.py b/examples/sandbox/alex/scripts/nested/hello_script.py
deleted file mode 100644
index a36964a3..00000000
--- a/examples/sandbox/alex/scripts/nested/hello_script.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import cdf
-
-w = cdf.get_workspace(__file__).unwrap()
-
-print(f"Hello, world from {w.name}!")
diff --git a/examples/sandbox/alex/sinks/fs_sink.py b/examples/sandbox/alex/sinks/fs_sink.py
deleted file mode 100644
index 7dd143ad..00000000
--- a/examples/sandbox/alex/sinks/fs_sink.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import dlt
-from sqlmesh.core.config import GatewayConfig, parse_connection_config
-
-ingest = dlt.destinations.filesystem(
-    "file://_storage", layout="{table_name}/{load_id}.{file_id}.{ext}.gz"
-)
-
-transform = GatewayConfig(
-    connection=parse_connection_config(
-        {"type": "duckdb", "database": "cdf.duckdb", "extensions": ["httpfs"]}
-    ),
-    state_schema="_cdf_state",
-)
diff --git a/examples/sandbox/alex/sinks/local_sink.py b/examples/sandbox/alex/sinks/local_sink.py
deleted file mode 100644
index 65c38d2c..00000000
--- a/examples/sandbox/alex/sinks/local_sink.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import dlt
-import cdf
-import duckdb
-
-
-p = (
-    cdf.find_nearest(__file__)
-    .bind(lambda p: p.get_workspace("alex"))
-    .map(lambda w: w.path / "cdf.duckdb")
-    .unwrap()
-)
-
-LOCALDB = str(p)
-
-conn = duckdb.connect(LOCALDB)
-conn.install_extension("httpfs")
-conn.load_extension("httpfs")
-conn.close()
-
-
-ingest = dlt.destinations.duckdb(LOCALDB)
-
-stage = dlt.destinations.filesystem(
-    "file://_storage",
-    layout="{table_name}/{load_id}.{file_id}.{ext}.gz",
-)
-
-transform = dict(
-    connection=cdf.transform_connection(
-        "duckdb", database=LOCALDB, extensions=["httpfs"]
-    )
-)
diff --git a/examples/sandbox/cdf.yml b/examples/sandbox/cdf.yml
deleted file mode 100644
index dd6807c9..00000000
--- a/examples/sandbox/cdf.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-default:
-  name: cdf-example
-  version: 0.1.0
-  workspaces:
-  - alex
-  filesystem:
-    uri: file://_storage
-  feature_flags:
-    provider: filesystem
-    filename: flags.json
-  state:
-    connection:
-      type: duckdb
-      database: cdf.duckdb
-  something: ok
-prod:
-  filesystem:
-    provider: gcs
-    root: harness_analytics_staging/cdf_test_1
-dev:
-  feature_flags:
-    provider: filesystem
-    filename: "@jinja dev_flags_{{ 1 + 1}}.json"
diff --git a/src/cdf/cli.py b/src/cdf/cli.py
deleted file mode 100644
index 26cf1cae..00000000
--- a/src/cdf/cli.py
+++ /dev/null
@@ -1,1211 +0,0 @@
-"""CLI for cdf."""
-
-import asyncio
-import itertools
-import json
-import os
-import subprocess
-import sys
-import tempfile
-import typing as t
-from enum import Enum
-from pathlib import Path
-
-import dlt
-import pydantic
-import rich
-import typer
-from dlt.common.utils import update_dict_nested
-from dlt.common.versioned_state import (
-    generate_state_version_hash,
-    json_decode_state,
-    json_encode_state,
-)
-
-import cdf.legacy.constants as c
-import cdf.legacy.context as context
-import cdf.legacy.logger as logger
-from cdf.legacy.project import (
-    FeatureFlagConfig,
-    FilesystemConfig,
-    Workspace,
-    load_project,
-)
-from cdf.legacy.runtime import (
-    execute_notebook_specification,
-    execute_pipeline_specification,
-    execute_publisher_specification,
-    execute_script_specification,
-)
-from cdf.legacy.specification import (
-    CoreSpecification,
-    NotebookSpecification,
-    PipelineSpecification,
-    PublisherSpecification,
-    ScriptSpecification,
-    SinkSpecification,
-)
-from cdf.proxy import run_mysql_proxy, run_plan_server
-from cdf.types import M
-
-WorkspaceMonad = M.Result[Workspace, Exception]
-
-app = typer.Typer(
-    rich_markup_mode="rich",
-    epilog="Made with [red]♥[/red] by [bold]z3z1ma[/bold].",
-    add_completion=False,
-    no_args_is_help=True,
-)
-
-console = rich.console.Console()
-
-
-@app.callback()
-def main(
-    ctx: typer.Context,
-    workspace: t.Annotated[
-        t.Optional[str],
-        typer.Option(
-            ...,
-            "--workspace",
-            "-w",
-            help="The workspace to use.",
-            envvar="CDF_WORKSPACE",
-        ),
-    ] = None,
-    path: t.Annotated[
-        Path,
-        typer.Option(
-            ..., "--path", "-p", help="Path to the project.", envvar="CDF_ROOT"
-        ),
-    ] = Path("."),
-    debug: t.Annotated[
-        bool, typer.Option(..., "--debug", "-d", help="Enable debug mode.")
-    ] = False,
-    environment: t.Annotated[
-        t.Optional[str], typer.Option(..., "--env", "-e", help="Environment to use.")
-    ] = None,
-    log_level: t.Annotated[
-        t.Optional[str],
-        typer.Option(
-            ...,
-            "--log-level",
-            "-l",
-            help="The log level to use.",
-            envvar="LOG_LEVEL",  # A common environment variable for log level
-        ),
-    ] = None,
-) -> None:
-    """CDF (continuous data framework) is a framework for end to end data processing."""
-    if environment:
-        os.environ[c.CDF_ENVIRONMENT] = environment
-    if log_level:
-        os.environ[c.CDF_LOG_LEVEL] = log_level.upper()
-    if debug:
-        context.debug_mode.set(True)
-    logger.configure(log_level.upper() if log_level else "INFO")
-    logger.apply_patches()
-    logger.warning(
-        "The CDF CLI command is DEPRECATED and will be removed in a future release. A local python file which imports cdf and exposes the Workspace.cli method is the way to interact with CDF"
-    )
-    ctx.obj = load_project(path).bind(lambda p: p.get_workspace(workspace))
-
-
-@app.command(rich_help_panel="Project Management")
-def init(ctx: typer.Context) -> None:
-    """:art: Initialize a new project."""
-    typer.echo(ctx.obj)
-
-
-@app.command(rich_help_panel="Project Management")
-def index(ctx: typer.Context, hydrate: bool = False) -> None:
-    """:page_with_curl: Print an index of [b][blue]Pipelines[/blue], [red]Models[/red], [yellow]Publishers[/yellow][/b], and other components."""
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if not hydrate:
-        console.print("Pipelines", W.pipelines)
-        console.print("Sinks", W.sinks)
-        console.print("Publishers", W.publishers)
-        console.print("Scripts", W.scripts)
-        console.print("Notebooks", W.notebooks)
-    else:
-        console.print_json(W.model_dump_json())
-
-
-@app.command(rich_help_panel="Project Management")
-def path(ctx: typer.Context) -> None:
-    """:office: Print the current workspace path."""
-    typer.echo(ctx.obj.unwrap().path)
-
-
-def _describe(
-    *displayables: t.Tuple[str, t.Tuple[CoreSpecification, ...]], diag: str = ""
-):
-    for color, components in displayables:
-        for component in components:
-            doc = "\n".join(
-                map(lambda ln: "> " + ln, component.description.splitlines())
-            )
-            console.print(
-                f"[{color}]{type(component).__name__}[/{color}]: [b]{component.name}[/b]"
-            )
-            console.print(f"[dim]{doc}[/dim]\n")
-    if diag:
-        console.print(f"[yellow]{diag}[/yellow]\n")
-
-
-@app.command(rich_help_panel="Core")
-def pipeline(
-    ctx: typer.Context,
-    pipeline_to_sink: t.Annotated[
-        t.Optional[str],
-        typer.Argument(help="The pipeline and sink separated by a colon."),
-    ] = None,
-    select: t.List[str] = typer.Option(
-        ...,
-        "-s",
-        "--select",
-        default_factory=lambda: [],
-        help="Glob pattern for resources to run. Can be specified multiple times.",
-    ),
-    exclude: t.List[str] = typer.Option(
-        ...,
-        "-x",
-        "--exclude",
-        default_factory=lambda: [],
-        help="Glob pattern for resources to exclude. Can be specified multiple times.",
-    ),
-    force_replace: t.Annotated[
-        bool,
-        typer.Option(
-            ...,
-            "-F",
-            "--force-replace",
-            help="Force the write disposition to replace ignoring state. Useful to force a reload of incremental resources.",
-        ),
-    ] = False,
-    no_stage: t.Annotated[
-        bool,
-        typer.Option(
-            ...,
-            "--no-stage",
-            help="Do not stage the data in the staging destination of the sink even if defined.",
-        ),
-    ] = False,
-) -> t.Any:
-    """:inbox_tray: Ingest data from a [b blue]Pipeline[/b blue] into a data store where it can be [b red]Transformed[/b red].
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline_to_sink: The pipeline and sink separated by a colon.
-        select: The resources to ingest as a sequence of glob patterns.
-        exclude: The resources to exclude as a sequence of glob patterns.
-        force_replace: Whether to force replace the write disposition.
-        no_stage: Allows selective disabling of intermediate staging even if configured in sink.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if pipeline_to_sink is None:
-        return _describe(
-            ("blue", W.pipelines),
-            ("violet", W.sinks),
-            diag="To ingest data, use the `pipeline` command with the pipeline:sink combination.",
-        )
-    source, destination = pipeline_to_sink.split(":", 1)
-    return (
-        W.get_pipeline_spec(source)
-        .bind(
-            lambda pipe: execute_pipeline_specification(
-                pipe,
-                W.get_sink_spec(destination).unwrap_or((destination, None)),
-                select=select,
-                exclude=exclude,
-                force_replace=force_replace,
-                enable_stage=(not no_stage),
-            )
-        )
-        .unwrap()
-    )
-
-
-@app.command(rich_help_panel="Develop")
-def discover(
-    ctx: typer.Context,
-    pipeline: t.Annotated[
-        t.Optional[str],
-        typer.Argument(help="The pipeline in which to discover resources."),
-    ] = None,
-    no_quiet: t.Annotated[
-        bool,
-        typer.Option(
-            help="Pipeline stdout is suppressed by default, this disables that."
-        ),
-    ] = False,
-) -> None:
-    """:mag: Dry run a [b blue]Pipeline[/b blue] and enumerates the discovered resources.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline: The pipeline in which to discover resources.
-        no_quiet: Whether to suppress the pipeline stdout.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if pipeline is None:
-        return _describe(
-            ("blue", W.pipelines),
-            diag="To discover resources, use the `discover` command with the pipeline name.",
-        )
-    for i, source in enumerate(
-        M.ok(W)
-        .bind(lambda w: w.get_pipeline_spec(pipeline))
-        .bind(
-            lambda spec: execute_pipeline_specification(
-                spec, "dummy", dry_run=True, quiet=not no_quiet
-            )
-        )
-        .map(lambda rv: rv.pipeline.tracked_sources)
-        .unwrap()
-    ):
-        console.print(f"{i}: {source.name}")
-        for j, resource in enumerate(source.resources.values(), 1):
-            console.print(f"{i}.{j}: {resource.name} (enabled: {resource.selected})")
-
-
-@app.command(rich_help_panel="Develop")
-def head(
-    ctx: typer.Context,
-    pipeline: t.Annotated[
-        t.Optional[str], typer.Argument(help="The pipeline to inspect.")
-    ] = None,
-    resource: t.Annotated[
-        t.Optional[str], typer.Argument(help="The resource to inspect.")
-    ] = None,
-    n: t.Annotated[int, typer.Option("-n", "--rows")] = 5,
-) -> None:
-    """:wrench: Prints the first N rows of a [b green]Resource[/b green] within a [b blue]pipeline[/b blue]. Defaults to [cyan]5[/cyan].
-
-    This is useful for quickly inspecting data :detective: and verifying that it is coming over the wire correctly.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline: The pipeline to inspect.
-        resource: The resource to inspect.
-        n: The number of rows to print.
-
-    Raises:
-        typer.BadParameter: If the resource is not found in the pipeline.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if pipeline is None:
-        return _describe(
-            ("blue", W.pipelines),
-            diag="To inspect a data pipeline, use the `head` command with the pipeline name.",
-        )
-    resource_iter = filter(
-        lambda r: r.name == resource or resource is None,
-        (
-            resource
-            for source in M.ok(W)
-            .bind(lambda w: w.get_pipeline_spec(pipeline))
-            .bind(
-                lambda spec: execute_pipeline_specification(
-                    spec, "dummy", dry_run=True, quiet=True
-                )
-            )
-            .map(lambda rv: rv.pipeline.tracked_sources)
-            .unwrap()
-            for resource in source.resources.values()
-        ),
-    )
-    if resource is None:
-        console.print("[b green]Resources[/b green]:")
-        for r in resource_iter:
-            console.print(f"- {r.name}")
-        console.print(
-            f"\n[yellow]To inspect a resource, use `cdf head {pipeline} [cyan]<resource>[/cyan]`[/yellow].\n"
-        )
-        return
-    target = next(resource_iter, None)
-    if target is None:
-        raise typer.BadParameter(
-            f"Resource {resource} not found in pipeline {pipeline}.",
-            param_hint="resource",
-        )
-    list(
-        map(
-            lambda row: console.print(row[1]),
-            itertools.takewhile(lambda row: row[0] < n, enumerate(target)),
-        )
-    )
-
-
-@app.command(rich_help_panel="Core")
-def publish(
-    ctx: typer.Context,
-    sink_to_publisher: t.Annotated[
-        t.Optional[str],
-        typer.Argument(help="The sink and publisher separated by a colon."),
-    ] = None,
-    skip_verification: t.Annotated[
-        bool,
-        typer.Option(
-            help="Skip the verification of the publisher dependencies.",
-        ),
-    ] = False,
-) -> t.Any:
-    """:outbox_tray: [b yellow]Publish[/b yellow] data from a data store to an [violet]External[/violet] system.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        sink_to_publisher: The sink and publisher separated by a colon.
-        skip_verification: Whether to skip the verification of the publisher dependencies.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if sink_to_publisher is None:
-        return _describe(
-            ("violet", W.sinks),
-            ("yellow", W.publishers),
-            diag="To publish data, use the `publish` command with the sink:publisher combination.",
-        )
-    source, publisher = sink_to_publisher.split(":", 1)
-    return (
-        W.get_publisher_spec(publisher)
-        .bind(
-            lambda p: execute_publisher_specification(
-                p, W.get_transform_context(source), skip_verification
-            )
-        )
-        .unwrap()
-    )
-
-
-@app.command(rich_help_panel="Core")
-def script(
-    ctx: typer.Context,
-    script: t.Annotated[
-        t.Optional[str], typer.Argument(help="The script to execute.")
-    ] = None,
-    quiet: t.Annotated[bool, typer.Option(help="Suppress the script stdout.")] = False,
-) -> t.Any:
-    """:hammer: Execute a [b yellow]Script[/b yellow] within the context of the current workspace.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        script: The script to execute.
-        quiet: Whether to suppress the script stdout.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if script is None:
-        return _describe(
-            ("yellow", W.scripts),
-            diag="To execute a script, use the `script` command with the script name.",
-        )
-    return (
-        M.ok(W)
-        .bind(lambda w: w.get_script_spec(script))
-        .bind(lambda s: execute_script_specification(s, capture_stdout=quiet))
-        .unwrap()
-    )
-
-
-@app.command(rich_help_panel="Core")
-def notebook(
-    ctx: typer.Context,
-    notebook: t.Annotated[
-        t.Optional[str], typer.Argument(help="The notebook to execute.")
-    ] = None,
-    params: t.Annotated[
-        str,
-        typer.Option(
-            ...,
-            help="The parameters to pass to the notebook as a json formatted string.",
-        ),
-    ] = "{}",
-) -> t.Any:
-    """:notebook: Execute a [b yellow]Notebook[/b yellow] within the context of the current workspace.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        notebook: The notebook to execute.
-        params: The parameters to pass to the notebook as a json formatted string.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    if notebook is None:
-        return _describe(
-            ("yellow", W.notebooks),
-            diag="To execute a notebook, use the `notebook` command with the notebook name.",
-        )
-    return (
-        M.ok(W)
-        .bind(lambda w: w.get_notebook_spec(notebook))
-        .bind(lambda s: execute_notebook_specification(s, **json.loads(params)))
-        .unwrap()
-    )
-
-
-@app.command(
-    rich_help_panel="Utilities",
-    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
-)
-def jupyter_lab(ctx: typer.Context) -> None:
-    """:star2: Start a Jupyter Lab server in the context of a workspace."""
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: subprocess.run(
-            ["jupyter", "lab", *ctx.args],
-            cwd=w.path,
-            check=False,
-            env={
-                **os.environ,
-                "PYTHONPATH": ":".join(
-                    (str(w.path.resolve()), *sys.path, str(w.path.parent.resolve()))
-                ),
-            },
-        )
-    )
-
-
-class _SpecType(str, Enum):
-    """An enum of specs which can be described via the `spec` command."""
-
-    pipeline = "pipeline"
-    publisher = "publisher"
-    script = "script"
-    notebook = "notebook"
-    sink = "sink"
-    feature_flags = "feature_flags"
-    filesystem = "filesystem"
-
-
-@app.command(rich_help_panel="Develop")
-def spec(name: _SpecType, json_schema: bool = False) -> None:
-    """:blue_book: Print the fields for a given spec type.
-
-    \f
-    Args:
-        name: The name of the spec to print.
-        json_schema: Whether to print the JSON schema for the spec.
-    """
-
-    def _print_spec(spec: t.Type[pydantic.BaseModel]) -> None:
-        console.print(f"[bold]{spec.__name__}:[/bold]")
-        for name, info in spec.model_fields.items():
-            typ = getattr(info.annotation, "__name__", info.annotation)
-            desc = info.description or "No description provided."
-            d = f"- [blue]{name}[/blue] ({typ!s}): {desc}"
-            if "Undefined" not in str(info.default):
-                d += f" Defaults to `{info.default}`)"
-            console.print(d)
-        console.print()
-
-    def _print(s: t.Type[pydantic.BaseModel]) -> None:
-        console.print(s.model_json_schema()) if json_schema else _print_spec(s)
-
-    if name == _SpecType.pipeline:
-        _print(PipelineSpecification)
-    elif name == _SpecType.publisher:
-        _print(PublisherSpecification)
-    elif name == _SpecType.script:
-        _print(ScriptSpecification)
-    elif name == _SpecType.notebook:
-        _print(NotebookSpecification)
-    elif name == _SpecType.sink:
-        _print(SinkSpecification)
-    elif name == _SpecType.feature_flags:
-        for spec in t.get_args(FeatureFlagConfig):
-            _print(spec)
-    elif name == _SpecType.filesystem:
-        _print(FilesystemConfig)
-    else:
-        raise ValueError(f"Invalid spec type {name}.")
-
-
-class _ExportFormat(str, Enum):
-    """An enum of export formats which can be used with the `export` command."""
-
-    json = "json"
-    yaml = "yaml"
-    yml = "yml"
-    py = "py"
-    python = "python"
-    dict = "dict"
-
-
-app.add_typer(
-    schema := typer.Typer(
-        rich_markup_mode="rich",
-        epilog="Made with [red]♥[/red] by [bold]z3z1ma[/bold].",
-        add_completion=False,
-        no_args_is_help=True,
-    ),
-    name="schema",
-    help=":construction: Schema management commands.",
-    rich_help_panel="Develop",
-)
-
-
-@schema.command("dump")
-def schema_dump(
-    ctx: typer.Context,
-    pipeline_to_sink: t.Annotated[
-        str,
-        typer.Argument(
-            help="The pipeline:sink combination from which to fetch the schema."
-        ),
-    ],
-    format: t.Annotated[
-        _ExportFormat, typer.Option(help="The format to dump the schema in.")
-    ] = _ExportFormat.json,
-) -> None:
-    """:computer: Dump the schema of a [b blue]pipeline[/b blue]:[violet]sink[/violet] combination.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline_to_sink: The pipeline:sink combination from which to fetch the schema.
-        format: The format to dump the schema in.
-
-    Raises:
-        typer.BadParameter: If the pipeline or sink are not found.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    source, destination = pipeline_to_sink.split(":", 1)
-    spec = W.get_pipeline_spec(source).unwrap()
-    rv = execute_pipeline_specification(
-        spec,
-        W.get_sink_spec(destination).unwrap_or((destination, None)),
-        dry_run=True,
-        quiet=True,
-    ).unwrap()
-    if format == _ExportFormat.json:
-        console.print(rv.pipeline.default_schema.to_pretty_json())
-    elif format in (_ExportFormat.py, _ExportFormat.python, _ExportFormat.dict):
-        console.print(rv.pipeline.default_schema.to_dict())
-    elif format in (_ExportFormat.yaml, _ExportFormat.yml):
-        console.print(rv.pipeline.default_schema.to_pretty_yaml())
-    else:
-        raise ValueError(
-            f"Invalid format {format}. Must be one of {list(_ExportFormat)}"
-        )
-
-
-@schema.command("edit")
-def schema_edit(
-    ctx: typer.Context,
-    pipeline_to_sink: t.Annotated[
-        str,
-        typer.Argument(
-            help="The pipeline:sink combination from which to fetch the schema."
-        ),
-    ],
-) -> None:
-    """:pencil: Edit the schema of a [b blue]pipeline[/b blue]:[violet]sink[/violet] combination using the system editor.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline_to_sink: The pipeline:sink combination from which to fetch the schema.
-
-    Raises:
-        typer.BadParameter: If the pipeline or sink are not found.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    source, destination = pipeline_to_sink.split(":", 1)
-    sink, _ = (
-        W.get_sink_spec(destination)
-        .map(lambda s: s.get_ingest_config())
-        .unwrap_or((destination, None))
-    )
-    spec = W.get_pipeline_spec(source).unwrap()
-    logger.info(f"Clearing local schema and state for {source}.")
-    pipe = spec.create_pipeline(dlt.Pipeline, destination=sink, staging=None)
-    pipe.drop()
-    logger.info(f"Syncing schema for {source}:{destination}.")
-    rv = execute_pipeline_specification(spec, sink, dry_run=True, quiet=True).unwrap()
-    schema = rv.pipeline.default_schema.clone()
-    with tempfile.TemporaryDirectory() as tmpdir:
-        fname = f"{schema.name}.schema.yaml"
-        with open(os.path.join(tmpdir, fname), "w") as f:
-            f.write(schema.to_pretty_yaml())
-        logger.info(f"Editing schema {schema.name}.")
-        subprocess.run([os.environ.get("EDITOR", "vi"), f.name], check=True)
-        pipe_mut = spec.create_pipeline(
-            dlt.Pipeline, import_schema_path=tmpdir, destination=sink, staging=None
-        )
-        schema_mut = pipe_mut.default_schema
-        if schema_mut.version > schema.version:
-            with pipe_mut.destination_client() as client:
-                logger.info(
-                    f"Updating schema {schema.name} to version {schema_mut.version} in {destination}."
-                )
-                client.update_stored_schema()
-            logger.info("Schema updated.")
-        else:
-            logger.info("Schema not updated.")
-
-
-app.add_typer(
-    state := typer.Typer(
-        rich_markup_mode="rich",
-        epilog="Made with [red]♥[/red] by [bold]z3z1ma[/bold].",
-        add_completion=False,
-        no_args_is_help=True,
-    ),
-    name="state",
-    help=":construction: State management commands.",
-    rich_help_panel="Develop",
-)
-
-
-@state.command("dump")
-def state_dump(
-    ctx: typer.Context,
-    pipeline_to_sink: t.Annotated[
-        str,
-        typer.Argument(
-            help="The pipeline:sink combination from which to fetch the schema."
-        ),
-    ],
-) -> None:
-    """:computer: Dump the state of a [b blue]pipeline[/b blue]:[violet]sink[/violet] combination.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline_to_sink: The pipeline:sink combination from which to fetch the state.
-
-    Raises:
-        typer.BadParameter: If the pipeline or sink are not found.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    source, destination = pipeline_to_sink.split(":", 1)
-    W.get_pipeline_spec(source).bind(
-        lambda spec: execute_pipeline_specification(
-            spec,
-            W.get_sink_spec(destination).unwrap_or((destination, None)),
-            dry_run=True,
-            quiet=True,
-        )
-    ).map(lambda rv: console.print(rv.pipeline.state))
-
-
-@state.command("edit")
-def state_edit(
-    ctx: typer.Context,
-    pipeline_to_sink: t.Annotated[
-        str,
-        typer.Argument(
-            help="The pipeline:sink combination from which to fetch the state."
-        ),
-    ],
-) -> None:
-    """:pencil: Edit the state of a [b blue]pipeline[/b blue]:[violet]sink[/violet] combination using the system editor.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        pipeline_to_sink: The pipeline:sink combination from which to fetch the state.
-
-    Raises:
-        typer.BadParameter: If the pipeline or sink are not found.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    source, destination = pipeline_to_sink.split(":", 1)
-    sink, _ = (
-        W.get_sink_spec(destination)
-        .map(lambda s: s.get_ingest_config())
-        .unwrap_or((destination, None))
-    )
-    spec = W.get_pipeline_spec(source).unwrap()
-    logger.info(f"Clearing local state and state for {source}.")
-    pipe = spec.create_pipeline(dlt.Pipeline, destination=sink, staging=None)
-    pipe.drop()
-    logger.info(f"Syncing state for {source}:{destination}.")
-    rv = execute_pipeline_specification(spec, sink, dry_run=True, quiet=True).unwrap()
-    with (
-        tempfile.NamedTemporaryFile(suffix=".json") as tmp,
-        rv.pipeline.managed_state(extract_state=True) as state,
-    ):
-        pre_hash = generate_state_version_hash(state, exclude_attrs=["_local"])
-        tmp.write(json.dumps(json.loads(json_encode_state(state)), indent=2).encode())
-        tmp.flush()
-        logger.info(f"Editing state in {destination}.")
-        subprocess.run([os.environ.get("EDITOR", "vi"), tmp.name], check=True)
-        with open(tmp.name, "r") as f:
-            update_dict_nested(t.cast(dict, state), json_decode_state(f.read()))
-        post_hash = generate_state_version_hash(state, exclude_attrs=["_local"])
-    if pre_hash != post_hash:
-        execute_pipeline_specification(
-            spec, sink, select=[], exclude=["*"], quiet=True
-        ).unwrap()
-        logger.info("State updated.")
-    else:
-        logger.info("State not updated.")
-
-
-app.add_typer(
-    model := typer.Typer(
-        rich_markup_mode="rich",
-        epilog="Made with [red]♥[/red] by [bold]z3z1ma[/bold].",
-        add_completion=False,
-        no_args_is_help=True,
-    ),
-    name="model",
-    help=":construction: Model management commands.",
-    rich_help_panel="Core",
-)
-
-
-@model.command("evaluate")
-def model_evaluate(
-    ctx: typer.Context,
-    model: t.Annotated[
-        str,
-        typer.Argument(help="The model to evaluate. Can be prefixed with the gateway."),
-    ],
-    start: str = typer.Option(
-        "1 month ago",
-        help="The start time to evaluate the model from. Defaults to 1 month ago.",
-    ),
-    end: str = typer.Option(
-        "now",
-        help="The end time to evaluate the model to. Defaults to now.",
-    ),
-    limit: t.Optional[int] = typer.Option(
-        None, help="The number of rows to limit the evaluation to."
-    ),
-) -> None:
-    """:bar_chart: Evaluate a [b red]Model[/b red] and print the results. A thin wrapper around `sqlmesh evaluate`
-
-    \f
-    Args:
-        ctx: The CLI context.
-        model: The model to evaluate. Can be prefixed with the gateway.
-        limit: The number of rows to limit the evaluation to.
-    """
-    if ":" in model:
-        gateway, model = model.split(":", 1)
-    else:
-        gateway = None
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: console.print(
-            w.get_transform_context(gateway).evaluate(
-                model, limit=limit, start=start, end=end, execution_time="now"
-            )
-        )
-    )
-
-
-@model.command("render")
-def model_render(
-    ctx: typer.Context,
-    model: t.Annotated[
-        str,
-        typer.Argument(help="The model to evaluate. Can be prefixed with the gateway."),
-    ],
-    start: str = typer.Option(
-        "1 month ago",
-        help="The start time to evaluate the model from. Defaults to 1 month ago.",
-    ),
-    end: str = typer.Option(
-        "now",
-        help="The end time to evaluate the model to. Defaults to now.",
-    ),
-    expand: t.List[str] = typer.Option([], help="The referenced models to expand."),
-    dialect: t.Optional[str] = typer.Option(
-        None, help="The SQL dialect to use for rendering."
-    ),
-) -> None:
-    """:bar_chart: Render a [b red]Model[/b red] and print the query. A thin wrapper around `sqlmesh render`
-
-    \f
-    Args:
-        ctx: The CLI context.
-        model: The model to evaluate. Can be prefixed with the gateway.
-        start: The start time to evaluate the model from. Defaults to 1 month ago.
-        end: The end time to evaluate the model to. Defaults to now.
-        expand: The referenced models to expand.
-        dialect: The SQL dialect to use for rendering.
-    """
-    if ":" in model:
-        gateway, model = model.split(":", 1)
-    else:
-        gateway = None
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: w.get_transform_context(gateway),
-    ).map(
-        lambda sqlmesh_ctx: console.print(
-            sqlmesh_ctx.render(
-                model, start=start, end=end, execution_time="now", expand=expand
-            ).sql(dialect or sqlmesh_ctx.default_dialect, pretty=True)
-        )
-    )
-
-
-@model.command("name")
-def model_name(
-    ctx: typer.Context,
-    model: t.Annotated[
-        str,
-        typer.Argument(
-            help="The model to convert the physical name. Can be prefixed with the gateway."
-        ),
-    ],
-) -> None:
-    """:bar_chart: Get a [b red]Model[/b red]'s physical table name. A thin wrapper around `sqlmesh table_name`
-
-    \f
-    Args:
-        ctx: The CLI context.
-        model: The model to evaluate. Can be prefixed with the gateway.
-    """
-    if ":" in model:
-        gateway, model = model.split(":", 1)
-    else:
-        gateway = None
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: console.print(
-            w.get_transform_context(gateway).table_name(model, False)
-        )
-    )
-
-
-@model.command("diff")
-def model_diff(
-    ctx: typer.Context,
-    model: t.Annotated[
-        str,
-        typer.Argument(help="The model to evaluate. Can be prefixed with the gateway."),
-    ],
-    source_target: t.Annotated[
-        str,
-        typer.Argument(help="The source and target environments separated by a colon."),
-    ],
-    show_sample: bool = typer.Option(
-        False, help="Whether to show a sample of the diff."
-    ),
-) -> None:
-    """:bar_chart: Compute the diff of a [b red]Model[/b red] across 2 environments. A thin wrapper around `sqlmesh table_diff`
-
-    \f
-    Args:
-        ctx: The CLI context.
-        model: The model to evaluate. Can be prefixed with the gateway.
-        source_target: The source and target environments separated by a colon.
-    """
-    if ":" in model:
-        gateway, model = model.split(":", 1)
-    else:
-        gateway = None
-    source, target = source_target.split(":", 1)
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: console.print(
-            w.get_transform_context(gateway).table_diff(
-                source, target, model_or_snapshot=model, show_sample=show_sample
-            )
-        )
-    )
-
-
-@model.command("prototype")
-def model_prototype(
-    ctx: typer.Context,
-    dependencies: t.List[str] = typer.Option(
-        [],
-        "-d",
-        "--dependencies",
-        help="The dependencies to include in the prototype.",
-    ),
-    start: str = typer.Option(
-        "1 month ago",
-        help="The start time to evaluate the model from. Defaults to 1 month ago.",
-    ),
-    end: str = typer.Option(
-        "now",
-        help="The end time to evaluate the model to. Defaults to now.",
-    ),
-    limit: int = typer.Option(
-        5_000_000,
-        help="The number of rows to limit the evaluation to.",
-    ),
-):
-    """:bar_chart: Prototype a model and save the results to disk.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        dependencies: The dependencies to include in the prototype.
-        start: The start time to evaluate the model from. Defaults to 1 month ago.
-        end: The end time to evaluate the model to. Defaults to now.
-        limit: The number of rows to limit the evaluation to.
-    """
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    sqlmesh_ctx = W.get_transform_context()
-    for dep in dependencies:
-        df = sqlmesh_ctx.evaluate(
-            dep,
-            start=start,
-            end=end,
-            execution_time="now",
-            limit=limit,
-        )
-        df.to_parquet(f"{dep}.parquet", index=False)
-
-
-app.add_typer(
-    proxy := typer.Typer(
-        rich_markup_mode="rich",
-        epilog="Made with [red]♥[/red] by [bold]z3z1ma[/bold].",
-        add_completion=False,
-        no_args_is_help=True,
-    ),
-    name="proxy",
-    help=":satellite: Proxy management commands.",
-    rich_help_panel="Core",
-)
-
-
-@proxy.command("mysql")
-def proxy_mysql(
-    ctx: typer.Context,
-    gateway: t.Annotated[
-        t.Optional[str],
-        typer.Argument(
-            help="The gateway to use for the server. Defaults to the default gateway."
-        ),
-    ] = None,
-) -> None:
-    """:satellite: Start a SQLMesh proxy server.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        gateway: The gateway to use for the server. Defaults to the default gateway.
-    """
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: asyncio.run(run_mysql_proxy(w.get_transform_context(gateway)))
-    ).unwrap()
-
-
-@proxy.command("planner")
-def proxy_planner(
-    ctx: typer.Context,
-    gateway: t.Annotated[
-        t.Optional[str],
-        typer.Argument(
-            help="The gateway to use for the server. Defaults to the default gateway."
-        ),
-    ] = None,
-) -> None:
-    """:satellite: Start a SQLMesh proxy server.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        gateway: The gateway to use for the server. Defaults to the default gateway.
-    """
-    t.cast(WorkspaceMonad, ctx.obj).map(
-        lambda w: run_plan_server(8000, w.get_transform_context(gateway))
-    ).unwrap()
-
-
-@proxy.command("plan")
-def proxy_plan(
-    ctx: typer.Context,
-    gateway: t.Annotated[
-        t.Optional[str],
-        typer.Argument(
-            help="The gateway to use for the server. Defaults to the default gateway."
-        ),
-    ] = None,
-):
-    """:satellite: Run a SQLMesh plan delegated to a running planner.
-
-    \f
-    Args:
-        ctx: The CLI context.
-        gateway: The gateway to use for the server. Defaults to the default gateway.
-    """
-    import pickle
-
-    import requests
-
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    plan = W.get_transform_context(gateway).plan("dev", no_prompts=True)
-    res = requests.post(
-        "http://localhost:8000",
-        headers={"Content-Type": "application/octet-stream"},
-        data=pickle.dumps(plan),
-    )
-    console.print(res.json())
-
-
-app.add_typer(
-    inspect := typer.Typer(
-        rich_markup_mode="rich",
-        epilog="Made with [red]♥[/red] by [bold]z3z1ma[/bold].",
-        add_completion=False,
-        no_args_is_help=True,
-    ),
-    name="inspect",
-    help=":mag: State store inspection commands.",
-    rich_help_panel="Utilities",
-)
-
-
-@inspect.command("events")
-def inspect_events(
-    ctx: typer.Context,
-    limit: t.Annotated[
-        int,
-        typer.Option(..., help="The number of audit logs to list. Defaults to 10."),
-    ] = 10,
-    failed_only: t.Annotated[
-        bool,
-        typer.Option(
-            help="List only the audit logs with errors.",
-        ),
-    ] = False,
-) -> None:
-    """:mag: List the audit logs for the current workspace."""
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    import pandas as pd
-
-    with pd.option_context("display.max_rows", limit):
-        console.print(W.state.fetch_audits(limit=limit, failed_only=failed_only))
-
-
-@inspect.command("extracted")
-def inspect_extracted(
-    ctx: typer.Context,
-    load_ids: t.Annotated[
-        str,
-        typer.Argument(
-            help="A comma-separated list of load ids to list. Use '*' to list all."
-        ),
-    ] = "*",
-    limit: t.Annotated[
-        int,
-        typer.Option(
-            ..., help="The number of extracted resources to list. Defaults to 10."
-        ),
-    ] = 10,
-) -> None:
-    """:mag: List the extracted resources for the current workspace."""
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    import pandas as pd
-
-    if load_ids == "*":
-        requested_ids = []
-    else:
-        requested_ids = load_ids.split(",")
-    data = W.state.fetch_extracted(*requested_ids, limit=limit)
-    if data.empty:
-        console.print("\n[red]No data found.[/red]")
-        return
-    if requested_ids:
-        for load_id in requested_ids:
-            r = data.loc[data["load_id"] == load_id, "data"]
-            if r.empty:
-                console.print(
-                    f"\n[red]No data found for requested load id {load_id}.[/red]"
-                )
-                continue
-            console.print(f"\n[b]Data for load id {load_id}[/b]:")
-            console.print_json(r.iloc[0])
-    else:
-        with pd.option_context("display.max_rows", limit):
-            console.print(data)
-
-
-@inspect.command("normalized")
-def inspect_normalized(
-    ctx: typer.Context,
-    load_ids: t.Annotated[
-        str,
-        typer.Argument(
-            help="A comma-separated list of load ids to list. Use '*' to list all."
-        ),
-    ] = "*",
-    limit: t.Annotated[
-        int,
-        typer.Option(
-            ..., help="The number of normalized resources to list. Defaults to 10."
-        ),
-    ] = 10,
-) -> None:
-    """:mag: List the normalized resources for the current workspace."""
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    import pandas as pd
-
-    if load_ids == "*":
-        requested_ids = []
-    else:
-        requested_ids = load_ids.split(",")
-    data = W.state.fetch_normalized(*requested_ids, limit=limit)
-    if data.empty:
-        console.print("\n[red]No data found.[/red]")
-        return
-    if requested_ids:
-        for load_id in requested_ids:
-            r = data.loc[data["load_id"] == load_id, "data"]
-            if r.empty:
-                console.print(
-                    f"\n[red]No data found for requested load id {load_id}.[/red]"
-                )
-                continue
-            console.print(f"\n[b]Data for load id {load_id}[/b]:")
-            console.print_json(r.iloc[0])
-    else:
-        with pd.option_context("display.max_rows", limit):
-            console.print(data)
-
-
-@inspect.command("loaded")
-def inspect_loaded(
-    ctx: typer.Context,
-    load_ids: t.Annotated[
-        str,
-        typer.Argument(
-            help="A comma-separated list of load ids to list. Use '*' to list all."
-        ),
-    ] = "*",
-    limit: t.Annotated[
-        int,
-        typer.Option(
-            ..., help="The number of loaded resources to list. Defaults to 10."
-        ),
-    ] = 10,
-) -> None:
-    """:mag: List the loaded resources for the current workspace."""
-    W = t.cast(WorkspaceMonad, ctx.obj).unwrap()
-    import pandas as pd
-
-    if load_ids == "*":
-        requested_ids = []
-    else:
-        requested_ids = load_ids.split(",")
-    data = W.state.fetch_loaded(*requested_ids, limit=limit)
-    if data.empty:
-        console.print("\n[red]No data found.[/red]")
-        return
-    if requested_ids:
-        for load_id in requested_ids:
-            r = data.loc[data["load_id"] == load_id, "data"]
-            if r.empty:
-                console.print(
-                    f"\n[red]No data found for requested load id {load_id}.[/red]"
-                )
-                continue
-            console.print(f"\n[b]Data for load id {load_id}[/b]:")
-            console.print_json(r.iloc[0])
-    else:
-        with pd.option_context("display.max_rows", limit):
-            console.print(data)
-
-
-if __name__ == "__main__":
-    app()
diff --git a/src/cdf/integrations/sqlmesh.py b/src/cdf/integrations/sqlmesh.py
deleted file mode 100644
index b5553ebc..00000000
--- a/src/cdf/integrations/sqlmesh.py
+++ /dev/null
@@ -1,147 +0,0 @@
-import logging
-import time
-import typing as t
-
-from sqlmesh.core.notification_target import (
-    ConsoleNotificationTarget,
-    NotificationEvent,
-    NotificationStatus,
-)
-from sqlmesh.utils.errors import AuditError
-
-from cdf.legacy.project import Workspace
-
-logger = logging.getLogger(__name__)
-
-
-class CDFNotificationTarget(ConsoleNotificationTarget):
-    """A notification target which sends notifications to the state of a CDF workspace."""
-
-    workspace: Workspace
-    notify_on: t.FrozenSet[NotificationEvent] = frozenset(
-        {
-            NotificationEvent.APPLY_START,
-            NotificationEvent.APPLY_END,
-            NotificationEvent.RUN_START,
-            NotificationEvent.RUN_END,
-            NotificationEvent.MIGRATION_START,
-            NotificationEvent.MIGRATION_END,
-            NotificationEvent.APPLY_FAILURE,
-            NotificationEvent.RUN_FAILURE,
-            NotificationEvent.AUDIT_FAILURE,
-            NotificationEvent.MIGRATION_FAILURE,
-        }
-    )
-
-    _run_start: float = 0.0
-    """The time a run started"""
-    _apply_start: float = 0.0
-    """The time an apply started"""
-    _migrate_start: float = 0.0
-    """The time a migration started"""
-
-    def send(
-        self, notification_status: NotificationStatus, msg: str, **kwargs: t.Any
-    ) -> None:
-        msg += "\n(event logged in state store)"
-        if notification_status.is_failure:
-            logger.error(msg)
-        elif notification_status.is_warning:
-            logger.warning(msg)
-        else:
-            logger.info(msg)
-
-    def notify_run_start(self, environment: str) -> None:
-        """Notify the workspace of a run start"""
-        self._run_start = time.time()
-        self.workspace.state.audit(
-            "sqlmesh_run_start",
-            success=True,
-            environment=environment,
-        )
-
-    def notify_run_end(self, environment: str) -> None:
-        """Notify the workspace of a run end"""
-        self.workspace.state.audit(
-            "sqlmesh_run_end",
-            success=True,
-            environment=environment,
-            elapsed=time.time() - self._run_start,
-        )
-
-    def notify_run_failure(self, exc: str) -> None:
-        """Notify the workspace of a run failure"""
-        self.workspace.state.audit(
-            "sqlmesh_run_failure",
-            success=False,
-            error=exc,
-            elapsed=time.time() - self._run_start,
-        )
-
-    def notify_apply_start(self, environment: str, plan_id: str) -> None:
-        """Notify the workspace of an apply start"""
-        self._apply_start = time.time()
-        self.workspace.state.audit(
-            "sqlmesh_apply_start",
-            success=True,
-            environment=environment,
-            plan_id=plan_id,
-        )
-
-    def notify_apply_end(self, environment: str, plan_id: str) -> None:
-        """Notify the workspace of an apply end"""
-        self.workspace.state.audit(
-            "sqlmesh_apply_end",
-            success=True,
-            environment=environment,
-            plan_id=plan_id,
-            elapsed=time.time() - self._apply_start,
-        )
-
-    def notify_apply_failure(self, environment: str, plan_id: str, exc: str) -> None:
-        """Notify the workspace of an apply failure"""
-        self.workspace.state.audit(
-            "sqlmesh_apply_failure",
-            success=False,
-            environment=environment,
-            plan_id=plan_id,
-            error=exc,
-            elapsed=time.time() - self._apply_start,
-        )
-
-    def notify_migration_start(self) -> None:
-        """Notify the workspace of a migration start"""
-        self._migrate_start = time.time()
-        self.workspace.state.audit(
-            "sqlmesh_migration_start",
-            success=True,
-        )
-
-    def notify_migration_end(self) -> None:
-        """Notify the workspace of a migration end"""
-        self.workspace.state.audit(
-            "sqlmesh_migration_end",
-            success=True,
-            elapsed=time.time() - self._migrate_start,
-        )
-
-    def notify_migration_failure(self, exc: str) -> None:
-        """Notify the workspace of a migration failure"""
-        self.workspace.state.audit(
-            "sqlmesh_migration_failure",
-            success=False,
-            error=exc,
-            elapsed=time.time() - self._migrate_start,
-        )
-
-    def notify_audit_failure(self, audit_error: AuditError) -> None:
-        """Notify the workspace of an audit failure"""
-        self.workspace.state.audit(
-            "sqlmesh_audit_failure",
-            success=False,
-            sql=audit_error.sql(),
-            name=audit_error.audit_name,
-            model=audit_error.model_name,  # type: ignore
-            err_msg=str(audit_error),
-            elapsed=1.0,
-        )
diff --git a/src/cdf/legacy/__init__.py b/src/cdf/legacy/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/cdf/legacy/config.py b/src/cdf/legacy/config.py
deleted file mode 100644
index 50263fe7..00000000
--- a/src/cdf/legacy/config.py
+++ /dev/null
@@ -1,151 +0,0 @@
-"""The config module provides a configuration provider for CDF scoped settings.
-
-This allows for the configuration to be accessed and modified in a consistent manner across
-the codebase leveraging dlt's configuration provider interface. It also makes all of dlt's
-semantics which depend on the configuration providers seamlessly work with CDF's configuration.
-"""
-
-import typing as t
-from collections import ChainMap
-from contextlib import contextmanager
-
-import dynaconf
-from dlt.common.configuration.container import Container
-from dlt.common.configuration.providers import ConfigProvider as _ConfigProvider
-from dlt.common.configuration.providers import EnvironProvider
-from dlt.common.configuration.specs.config_providers_context import (
-    ConfigProvidersContext,
-)
-from dlt.common.utils import update_dict_nested
-
-
-class CdfConfigProvider(_ConfigProvider):
-    """A configuration provider for CDF scoped settings."""
-
-    def __init__(self, scope: t.ChainMap[str, t.Any], secret: bool = False) -> None:
-        """Initialize the provider.
-
-        Args:
-            config: The configuration ChainMap.
-        """
-        if not isinstance(scope, ChainMap):
-            scope = ChainMap(scope)
-        self._scope = scope
-        self._secret = secret
-
-    def get_value(
-        self, key: str, hint: t.Type[t.Any], pipeline_name: str, *sections: str
-    ) -> t.Tuple[t.Optional[t.Any], str]:
-        """Get a value from the configuration."""
-        _ = hint
-        if pipeline_name:
-            sections = ("pipelines", pipeline_name, "options", *sections)
-        parts = (*sections, key)
-        fqn = ".".join(parts)
-
-        try:
-            return self._scope[fqn], fqn
-        except KeyError:
-            return None, fqn
-
-    def set_value(
-        self, key: str, value: t.Any, pipeline_name: str, *sections: str
-    ) -> None:
-        """Set a value in the configuration."""
-        if pipeline_name:
-            sections = ("pipelines", pipeline_name, "options", *sections)
-        parts = (*sections, key)
-        fqn = ".".join(parts)
-        if isinstance(value, dynaconf.Dynaconf):
-            if key is None:
-                self._scope.maps[-1] = t.cast(dict, value)
-            else:
-                self._scope.maps[-1][fqn].update(value)
-            return None
-        else:
-            if key is None:
-                if isinstance(value, dict):
-                    self._scope.update(value)
-                    return None
-                else:
-                    raise ValueError("Cannot set a value without a key")
-            this = self._scope
-            for key in parts[:-1]:
-                if key not in this:
-                    this[key] = {}
-                this = this[key]
-            if isinstance(value, dict) and isinstance(this[parts[-1]], dict):
-                update_dict_nested(this[parts[-1]], value)
-            else:
-                this[parts[-1]] = value
-
-    @property
-    def name(self) -> str:
-        """The name of the provider"""
-        return "CDF Configuration Provider"
-
-    @property
-    def supports_sections(self) -> bool:
-        """This provider supports sections"""
-        return True
-
-    @property
-    def supports_secrets(self) -> bool:
-        """There is no differentiation between secrets and non-secrets for the cdf provider.
-
-        Nothing is persisted. Data is available in memory and backed by the dynaconf settings object.
-        """
-        return self._secret
-
-    @property
-    def is_writable(self) -> bool:
-        """Whether the provider is writable"""
-        return True
-
-
-@t.overload
-def get_config_providers(
-    scope: t.ChainMap[str, t.Any], /, include_env: bool = False
-) -> t.Tuple[CdfConfigProvider, CdfConfigProvider]: ...
-
-
-@t.overload
-def get_config_providers(
-    scope: t.ChainMap[str, t.Any], /, include_env: bool = True
-) -> t.Tuple[EnvironProvider, CdfConfigProvider, CdfConfigProvider]: ...
-
-
-def get_config_providers(
-    scope: t.ChainMap[str, t.Any], /, include_env: bool = True
-) -> t.Union[
-    t.Tuple[CdfConfigProvider, CdfConfigProvider],
-    t.Tuple[EnvironProvider, CdfConfigProvider, CdfConfigProvider],
-]:
-    """Get the configuration providers for the given scope."""
-    cdf_providers = (
-        CdfConfigProvider(scope),
-        CdfConfigProvider(scope, secret=True),
-    )
-    if include_env:
-        return (EnvironProvider(), *cdf_providers)
-    return cdf_providers
-
-
-@contextmanager
-def inject_configuration(
-    scope: t.ChainMap[str, t.Any], /, include_env: bool = True
-) -> t.Iterator[t.Mapping[str, t.Any]]:
-    """Inject the configuration provider into the context
-
-    This allows dlt.config and dlt.secrets to access the scope configuration. Furthermore
-    it makes the scope configuration available throughout dlt where things such as extract,
-    normalize, and load settings can be specified.
-    """
-    ctx = Container()[ConfigProvidersContext]
-    prior = ctx.providers.copy()
-    ctx.providers = list(get_config_providers(scope, include_env=include_env))
-    yield scope
-    ctx.providers = prior
-
-
-__all__ = ["CdfConfigProvider", "get_config_providers", "inject_configuration"]
diff --git a/src/cdf/legacy/constants.py b/src/cdf/legacy/constants.py
deleted file mode 100644
index 7727056c..00000000
--- a/src/cdf/legacy/constants.py
+++ /dev/null
@@ -1,13 +0,0 @@
-"""Constants used by CDF."""
-
-CDF_ENVIRONMENT = "CDF_ENVIRONMENT"
-"""Environment variable to set the environment of the project."""
-
-DEFAULT_ENVIRONMENT = "dev"
-"""Default environment for the project."""
-
-CDF_MAIN = "__cdf_main__"
-"""A sentinel value that will match the __name__ attribute of a module being executed by CDF."""
-
-CDF_LOG_LEVEL = "RUNTIME__LOG_LEVEL"
-"""Environment variable to set the log level of the project."""
diff --git a/src/cdf/legacy/context.py b/src/cdf/legacy/context.py
deleted file mode 100644
index a7e6daa1..00000000
--- a/src/cdf/legacy/context.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""The context module provides thread-safe context variables and injection mechanisms.
-
-It facilitates communication between specifications and runtime modules.
-"""
-
-import typing as t
-import uuid
-from contextvars import ContextVar
-
-import dlt
-
-if t.TYPE_CHECKING:
-    from cdf.legacy.project import Project
-
-
-active_project: ContextVar["Project"] = ContextVar("active_project")
-"""The active workspace context variable.
-
-The allows the active workspace to be passed to user-defined scripts. The workspace
-has a reference to the project configuration and filesystem.
-"""
-
-active_pipeline: ContextVar[dlt.Pipeline] = ContextVar("active_pipeline")
-"""Stores the active pipeline.
-
-This is the primary mechanism to pass a configured pipeline to user-defined scripts.
-"""
-
-debug_mode: ContextVar[bool] = ContextVar("debug_mode", default=False)
-"""The debug mode context variable.
-
-Allows us to mutate certain behaviors in the runtime based on the debug mode. User can
-optionally introspect this.
-"""
-
-extract_limit: ContextVar[int] = ContextVar("extract_limit", default=0)
-"""The extract limit context variable.
-
-Lets us set a limit on the number of items to extract from a source. This variable
-can be introspected by user-defined scripts to optimize for partial extraction.
-"""
-
-execution_id: ContextVar[str] = ContextVar("execution_id", default=str(uuid.uuid4()))
-"""The execution ID context variable."""
-
-
-__all__ = ["active_project", "active_pipeline", "debug_mode", "extract_limit"]
diff --git a/src/cdf/legacy/filesystem.py b/src/cdf/legacy/filesystem.py
deleted file mode 100644
index a96e11fc..00000000
--- a/src/cdf/legacy/filesystem.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""A central interface for filesystems thinly wrapping fsspec."""
-
-import posixpath
-import typing as t
-from pathlib import Path
-
-import dlt
-import fsspec
-from dlt.common.configuration import with_config
-from fsspec.core import strip_protocol
-from fsspec.implementations.dirfs import DirFileSystem
-from fsspec.utils import get_protocol
-
-from cdf.types import PathLike
-
-
-# TODO: Add UPath integration...
-class FilesystemAdapter:
-    """Wraps an fsspec filesystem.
-
-    The filesystem is lazily loaded. Certain methods are intercepted to include cdf-specific logic. Helper
-    methods are provided for specific operations.
-    """
-
-    @with_config(sections=("filesystem",))
-    def __init__(
-        self,
-        uri: PathLike = dlt.config.value,
-        root: t.Optional[PathLike] = None,
-        options: t.Optional[t.Dict[str, t.Any]] = None,
-    ) -> None:
-        """Load a filesystem from a provider and kwargs.
-
-        Args:
-            uri: The filesystem URI.
-            options: The filesystem provider kwargs.
-        """
-        uri = _resolve_local_uri(uri, root)
-        if isinstance(uri, Path):
-            uri = uri.resolve().as_uri()
-        options = options or {}
-        options.setdefault("auto_mkdir", True)
-        CdfFs = type("CdfFs", (DirFileSystem,), {"protocol": "cdf"})
-        self.wrapped = CdfFs(
-            path=posixpath.join(strip_protocol(uri), "x")[:-1],
-            fs=fsspec.filesystem(get_protocol(uri), **options),
-            auto_mkdir=True,
-        )
-        self.uri = uri
-        self.mapper = self.wrapped.get_mapper()
-
-    def __repr__(self) -> str:
-        return f"{type(self).__name__}(uri={self.uri!r})"
-
-    def __str__(self) -> str:
-        return self.uri
-
-    def __getattr__(self, name: str) -> t.Any:
-        """Proxy attribute access to the wrapped filesystem."""
-        return getattr(self.wrapped, name)
-
-    def __getitem__(self, value: str) -> t.Any:
-        """Get a path from the filesystem."""
-        return self.mapper[value]
-
-    def __setitem__(self, key: str, value: t.Any) -> None:
-        """Set a path in the filesystem."""
-        self.mapper[key] = value
-
-    def open(self, path: PathLike, mode: str = "r", **kwargs: t.Any) -> t.Any:
-        """Open a file from the filesystem.
-
-        Args:
-            path: The path to the file.
-            mode: The file mode.
-            kwargs: Additional kwargs.
-
-        Returns:
-            The file handle.
-        """
-        return self.wrapped.open(path, mode, **kwargs)
-
-
-def _resolve_local_uri(uri: PathLike, root: t.Optional[PathLike] = None) -> PathLike:
-    """Resolve a local URI to an absolute path. If the URI is already absolute, it is returned as-is.
-
-    URIs with protocols other than "file" are returned as-is.
-
-    Args:
-        uri: The URI to resolve.
-        root: The root path to use.
-
-    Returns:
-        The resolved URI.
-    """
-    uri_str = str(uri)
-    proto = get_protocol(uri_str)
-    root_proto = "file"
-    if root and proto == root_proto:
-        uri_str = uri_str.replace(f"{root_proto}://", "")
-        if not Path(uri_str).is_absolute():
-            uri = Path(root, uri_str).resolve().as_uri()
-    return uri
-
-
-__all__ = ["FilesystemAdapter"]
diff --git a/src/cdf/legacy/logger.py b/src/cdf/legacy/logger.py
deleted file mode 100644
index bfdad6af..00000000
--- a/src/cdf/legacy/logger.py
+++ /dev/null
@@ -1,169 +0,0 @@
-"""Logger for CDF"""
-
-from __future__ import annotations
-
-import contextlib
-import logging
-import typing as t
-import warnings
-
-from rich.logging import RichHandler
-
-if t.TYPE_CHECKING:
-
-    class Representable(t.Protocol):
-        def __str__(self) -> str: ...
-
-    class LogMethod(t.Protocol):
-        """Protocol for logger methods."""
-
-        def __call__(
-            self, msg: Representable, *args: t.Any, **kwargs: t.Any
-        ) -> None: ...
-
-
-__all__ = [
-    "configure",
-    "create",
-    "set_level",
-    "LOG_LEVEL",
-    "LOGGER",
-]
-
-
-class CDFLoggerAdapter(logging.LoggerAdapter):
-    extra: t.Dict[str, t.Any]
-    logger: logging.Logger
-
-
-LOGGER = CDFLoggerAdapter(logging.getLogger("cdf"), {})
-"""CDF logger instance."""
-
-LOG_LEVEL = logging.INFO
-"""The active log level for CDF."""
-
-
-def configure(level: int | str = logging.INFO) -> None:
-    """Configure logging.
-
-    Args:
-        level (int, optional): Logging level. Defaults to logging.INFO.
-    """
-    if LOGGER.extra.get("configured"):
-        return
-    LOGGER.setLevel(LOG_LEVEL := level)
-    console_handler = RichHandler(
-        LOG_LEVEL,
-        markup=True,
-        rich_tracebacks=True,
-        omit_repeated_times=False,
-    )
-    LOGGER.logger.addHandler(console_handler)
-    LOGGER.extra["configured"] = True
-
-
-@t.overload
-def create(name: t.Literal["cdf"] | None) -> CDFLoggerAdapter: ...
-
-
-@t.overload
-def create(name: str) -> logging.Logger: ...
-
-
-def create(name: str | None = None) -> CDFLoggerAdapter | logging.Logger:
-    """Get or create a logger.
-
-    Args:
-        name (str, optional): The name of the logger. If None, the package logger is
-            returned. Defaults to None. If a name is provided, a child logger is
-            created.
-
-    Returns:
-        The logger.
-    """
-    if name is None:
-        return LOGGER
-    return LOGGER.logger.getChild(name)
-
-
-def log_level() -> str:
-    """Returns current log level"""
-    return logging.getLevelName(LOGGER.logger.level)
-
-
-def set_level(level: int | str) -> None:
-    """Set the package log level.
-
-    Args:
-        level (int | str): The new log level.
-
-    Raises:
-        ValueError: If the log level is not valid.
-    """
-    global LOG_LEVEL
-
-    if not LOGGER.extra.get("configured"):
-        configure(LOG_LEVEL := level)
-    else:
-        LOGGER.setLevel(LOG_LEVEL := level)
-
-
-@contextlib.contextmanager
-def suppress_and_warn() -> t.Iterator[None]:
-    """Suppresses exception and logs it as warning"""
-    try:
-        yield
-    except Exception:
-        LOGGER.warning("Suppressed exception", exc_info=True)
-
-
-@contextlib.contextmanager
-def mute() -> t.Iterator[None]:
-    """Mute the logger."""
-    LOGGER.logger.disabled = True
-    try:
-        yield
-    finally:
-        LOGGER.logger.disabled = False
-
-
-def __getattr__(name: str) -> "LogMethod":
-    """Get a logger method from the package logger."""
-    if not LOGGER.extra.get("configured"):
-        configure()
-
-    def wrapper(msg: "Representable", *args: t.Any, **kwargs: t.Any) -> None:
-        stacklevel = 3 if name == "exception" else 2
-        getattr(LOGGER, name)(msg, *args, **kwargs, stacklevel=stacklevel)
-
-    return wrapper
-
-
-def _monkeypatch_dlt() -> None:
-    """Monkeypatch the dlt logging module."""
-    from dlt.common import logger
-
-    patched = create("dlt")
-    setattr(logger, "_init_logging", lambda *a, **kw: patched)
-    setattr(logger, "LOGGER", patched)
-
-
-def _monkeypatch_sqlglot() -> None:
-    """Monkeypatch the sqlglot logging module."""
-    logger = logging.getLogger("sqlglot")
-    patched = create("sqlglot")
-    logger.handlers = patched.handlers
-    logger.setLevel(logging.ERROR)
-    logger.propagate = False
-    warnings.filterwarnings(
-        "ignore",
-        message=r"^Possible nested set .*",
-        category=FutureWarning,
-        module="sqlglot",
-    )
-
-
-def apply_patches() -> None:
-    """Apply logger patches."""
-    _monkeypatch_dlt()
-    _monkeypatch_sqlglot()
diff --git a/src/cdf/legacy/project.py b/src/cdf/legacy/project.py
deleted file mode 100644
index 3abcd88b..00000000
--- a/src/cdf/legacy/project.py
+++ /dev/null
@@ -1,989 +0,0 @@
-"""The project module provides a way to define a project and its workspaces.
-
-Everything in CDF is described via a simple configuration structure. We parse this configuration
-using dynaconf which provides a simple way to load configuration from various sources such as
-environment variables, YAML, TOML, JSON, and Python files. It also provides many other features
-such as loading .env files, env-specific configuration, templating via @ tokens, and more. The
-configuration is then validated with pydantic to ensure it is correct and to give us well defined
-types to work with. The underlying dynaconf settings object is stored in the `wrapped` attribute
-of the Project and Workspace settings objects. This allows us to access the raw configuration
-values if needed. ChainMaps are used to provide a scoped view of the configuration. This enables
-a powerful layering mechanism where we can override configuration values at different levels.
-Finally, we provide a context manager to inject the project configuration into the dlt context
-which allows us to access the configuration throughout the dlt codebase and in data pipelines.
-
-Example:
-
-```toml
-# cdf.toml
-[default]
-name = "cdf-example"
-version = "0.1.0"
-workspaces = ["alex"]
-filesystem.uri = "file://_storage"
-feature_flags.provider = "filesystem"
-feature_flags.filename = "feature_flags.json"
-
-[prod]
-filesystem.uri = "gcs://bucket/path"
-```
-
-```toml
-# alex/cdf.toml
-[pipelines.us_cities] # alex/pipelines/us_cities_pipeline.py
-version = 1
-dataset_name = "us_cities_v0_{version}"
-description = "Get US city data"
-options.full_refresh = false
-options.runtime.dlthub_telemetry = false
-```
-"""
-
-import itertools
-import os
-import time
-import typing as t
-from collections import ChainMap
-from contextlib import contextmanager, suppress
-from enum import Enum
-from functools import cached_property, lru_cache
-from pathlib import Path
-
-import duckdb
-import dynaconf
-import pydantic
-from dynaconf.utils.boxing import DynaBox
-from dynaconf.vendor.box import Box
-
-import cdf.legacy.constants as c
-import cdf.legacy.specification as spec
-from cdf.integrations.feature_flag import (
-    AbstractFeatureFlagAdapter,
-    get_feature_flag_adapter_cls,
-)
-from cdf.legacy.config import inject_configuration
-from cdf.legacy.filesystem import FilesystemAdapter
-from cdf.legacy.state import StateStore
-from cdf.types import M, PathLike
-
-if t.TYPE_CHECKING:
-    from sqlmesh.core.config import GatewayConfig
-
-T = t.TypeVar("T")
-
-
-class _BaseSettings(pydantic.BaseModel):
-    """A base model for CDF settings"""
-
-    model_config = pydantic.ConfigDict(
-        frozen=True,
-        use_attribute_docstrings=True,
-        from_attributes=True,
-        populate_by_name=True,
-    )
-
-    _generation: float = pydantic.PrivateAttr(default_factory=time.monotonic)
-    """A monotonic timestamp of when the model was generated"""
-
-    def __hash__(self) -> int:
-        return hash(self.model_dump_json())
-
-    def __eq__(self, other: t.Any) -> bool:
-        if not isinstance(other, type(self)):
-            return False
-        return self.model_dump() == other.model_dump()
-
-    def is_newer_than(self, other: "Project") -> bool:
-        """Check if the model is newer than another model"""
-        return self._generation > other._generation
-
-    def is_older_than(self, other: "Project") -> bool:
-        """Check if the model is older than another model"""
-        return self._generation < other._generation
-
-    def model_dump(self, **kwargs: t.Any) -> t.Dict[str, t.Any]:
-        """Dump the model to a dictionary"""
-        kwargs.setdefault("by_alias", True)
-        return super().model_dump(**kwargs)
-
-
-class FilesystemConfig(_BaseSettings):
-    """Configuration for a filesystem provider"""
-
-    uri: str = "_storage"
-    """The filesystem URI
-
-    This is based on fsspec. See https://filesystem-spec.readthedocs.io/en/latest/index.html
-    This supports all filesystems supported by fsspec as well as filesystem chaining.
-    """
-    options_: t.Annotated[
-        t.Tuple[t.Tuple[str, t.Any], ...], pydantic.Field(alias="options")
-    ] = ()
-    """The filesystem options
-
-    Options are passed to the filesystem provider as keyword arguments.
-    """
-
-    _project: t.Optional["Project"] = None
-    """The project this configuration belongs to"""
-
-    @pydantic.field_validator("options_", mode="before")
-    @classmethod
-    def _options_validator(cls, value: t.Any) -> t.Any:
-        """Convert the options to an immutable tuple of tuples"""
-        if isinstance(value, dict):
-            value = tuple(value.items())
-        return value
-
-    @property
-    def options(self) -> t.Dict[str, t.Any]:
-        """Get the filesystem options as a dictionary"""
-        return dict(self.options_)
-
-    @property
-    def project(self) -> "Project":
-        """Get the project this configuration belongs to"""
-        if self._project is None:
-            raise ValueError("Filesystem configuration not associated with a project")
-        return self._project
-
-    @property
-    def has_project_association(self) -> bool:
-        """Check if the configuration is associated with a project"""
-        return self._project is not None
-
-    def get_adapter(self) -> M.Result[FilesystemAdapter, Exception]:
-        """Get a filesystem adapter"""
-        if self.has_project_association:
-            root = self.project.path
-        else:
-            root = None
-        try:
-            return M.ok(FilesystemAdapter(self.uri, root, self.options))
-        except Exception as e:
-            return M.error(e)
-
-
-class FeatureFlagProviderType(str, Enum):
-    """The feature flag provider"""
-
-    FILESYSTEM = "filesystem"
-    HARNESS = "harness"
-    LAUNCHDARKLY = "launchdarkly"
-    SPLIT = "split"
-    NOOP = "noop"
-
-
-class BaseFeatureFlagConfig(_BaseSettings):
-    """Base configuration for a feature flags provider"""
-
-    provider: FeatureFlagProviderType
-    """The feature flags provider"""
-
-    _project: t.Optional["Project"] = None
-    """The project this configuration belongs to"""
-
-    @property
-    def project(self) -> "Project":
-        """Get the project this configuration belongs to"""
-        if self._project is None:
-            raise ValueError("Feature flag configuration not associated with a project")
-        return self._project
-
-    @property
-    def has_project_association(self) -> bool:
-        """Check if the configuration is associated with a project"""
-        return self._project is not None
-
-    def get_adapter(
-        self, **kwargs: t.Any
-    ) -> M.Result[AbstractFeatureFlagAdapter, Exception]:
-        """Get a handle to the feature flag adapter"""
-        options = self.model_dump()
-        provider = str(options.pop("provider").value)
-        options.update(kwargs)
-        return get_feature_flag_adapter_cls(provider).map(
-            lambda cls: cls(**options, filesystem=self.project.fs_adapter.wrapped)
-        )
-
-
-class FilesystemFeatureFlagConfig(BaseFeatureFlagConfig):
-    """Configuration for a feature flags provider that uses the configured filesystem"""
-
-    provider: t.Literal[FeatureFlagProviderType.FILESYSTEM] = (
-        FeatureFlagProviderType.FILESYSTEM
-    )
-    """The feature flags provider"""
-    filename: str = "feature_flags.json"
-    """The feature flags filename.
-
-    This is a format string that can include the following variables:
-    - `name`: The project name
-    - `workspace`: The workspace name
-    - `environment`: The environment name
-    - `source`: The source name
-    - `resource`: The resource name
-    - `version`: The version number of the component
-    """
-
-
-class HarnessFeatureFlagConfig(BaseFeatureFlagConfig):
-    """Configuration for a feature flags provider that uses the Harness API"""
-
-    provider: t.Literal[FeatureFlagProviderType.HARNESS] = (
-        FeatureFlagProviderType.HARNESS
-    )
-    """The feature flags provider"""
-    api_key: str = pydantic.Field(
-        os.getenv("HARNESS_API_KEY", ...),
-        pattern=r"^[ps]at\.[a-zA-Z0-9_\-]+\.[a-fA-F0-9]+\.[a-zA-Z0-9_\-]+$",
-    )
-    """The harness API key. Get it from your user settings"""
-    sdk_key: pydantic.UUID4 = pydantic.Field(os.getenv("HARNESS_SDK_KEY", ...))
-    """The harness SDK key. Get it from the environment management page of the FF module"""
-    account: str = pydantic.Field(
-        os.getenv("HARNESS_ACCOUNT_ID", ...),
-        min_length=22,
-        max_length=22,
-        pattern=r"^[a-zA-Z0-9_\-]+$",
-    )
-    """The harness account ID. We will attempt to read it from the environment if not provided."""
-    organization: str = pydantic.Field(os.getenv("HARNESS_ORG_ID", "default"))
-    """The harness organization ID. We will attempt to read it from the environment if not provided."""
-    project_: str = pydantic.Field(
-        os.getenv("HARNESS_PROJECT_ID", ...), alias="project"
-    )
-    """The harness project ID. We will attempt to read it from the environment if not provided."""
-
-
-class LaunchDarklyFeatureFlagSettings(BaseFeatureFlagConfig):
-    """Configuration for a feature flags provider that uses the LaunchDarkly API"""
-
-    provider: t.Literal[FeatureFlagProviderType.LAUNCHDARKLY] = (
-        FeatureFlagProviderType.LAUNCHDARKLY
-    )
-    """The feature flags provider"""
-    api_key: str = pydantic.Field(
-        os.getenv("LAUNCHDARKLY_API_KEY", ...),
-        pattern=r"^[a-zA-Z0-9_\-]+$",
-    )
-    """The LaunchDarkly API key. Get it from your user settings"""
-
-
-class SplitFeatureFlagSettings(BaseFeatureFlagConfig):
-    """Configuration for a feature flags provider that uses the Split API"""
-
-    provider: t.Literal[FeatureFlagProviderType.SPLIT] = FeatureFlagProviderType.SPLIT
-    """The feature flags provider"""
-    api_key: str = pydantic.Field(
-        os.getenv("SPLIT_API_KEY", ...),
-        pattern=r"^[a-zA-Z0-9_\-]+$",
-    )
-    """The Split API key. Get it from your user settings"""
-
-
-class NoopFeatureFlagSettings(BaseFeatureFlagConfig):
-    """Configuration for a feature flags provider that does nothing"""
-
-    provider: t.Literal[FeatureFlagProviderType.NOOP] = FeatureFlagProviderType.NOOP
-    """The feature flags provider"""
-
-
-FeatureFlagConfig = t.Union[
-    FilesystemFeatureFlagConfig,
-    HarnessFeatureFlagConfig,
-    LaunchDarklyFeatureFlagSettings,
-    SplitFeatureFlagSettings,
-    NoopFeatureFlagSettings,
-]
-"""A union of all feature flag provider configurations"""
-
-
-class Workspace(_BaseSettings):
-    """A workspace is a collection of pipelines, sinks, publishers, scripts, and notebooks in a subdirectory of the project"""
-
-    workspace_path: t.Annotated[Path, pydantic.Field(alias="path")] = Path(".")
-    """The path to the workspace within the project path"""
-    project_path: Path = Path(".")
-    """The path to the project"""
-    name: t.Annotated[
-        str, pydantic.Field(pattern=r"^[a-zA-Z0-9_\-]+$", min_length=3, max_length=32)
-    ] = "default"
-    """The name of the workspace"""
-    owner: t.Optional[str] = None
-    """The owner of the workspace"""
-    pipelines: t.Tuple[spec.PipelineSpecification, ...] = ()
-    """Pipelines move data from sources to sinks"""
-    sinks: t.Tuple[spec.SinkSpecification, ...] = ()
-    """A sink is a destination for data"""
-    publishers: t.Tuple[spec.PublisherSpecification, ...] = ()
-    """Publishers send data to external systems"""
-    scripts: t.Tuple[spec.ScriptSpecification, ...] = ()
-    """Scripts are used to automate tasks"""
-    notebooks: t.Tuple[spec.NotebookSpecification, ...] = ()
-    """Notebooks are used for data analysis and reporting"""
-
-    _project: t.Optional["Project"] = None
-    """The project this workspace belongs to. Set by the project model validator."""
-
-    @pydantic.field_validator(
-        "pipelines", "sinks", "publishers", "scripts", "notebooks", mode="before"
-    )
-    @classmethod
-    def _workspace_component_validator(
-        cls, value: t.Any, info: pydantic.ValidationInfo
-    ):
-        """Parse component dictionaries into an array of components inject the workspace path"""
-        if isinstance(value, dict):
-            # name : {config}
-            cmps = []
-            for key, cmp in value.items():
-                if isinstance(cmp, (str, Path)):
-                    # name : path
-                    cmp = {"path": cmp}
-                cmp.setdefault("name", key)
-                cmps.append(cmp)
-            value = cmps
-        elif hasattr(value, "__iter__") and not isinstance(value, (str, bytes)):
-            # [{configA}, ...]
-            value = list(value)
-        else:
-            raise ValueError(
-                "Invalid workspace component configuration, must be either a dict or a list"
-            )
-        for cmp in value:
-            # TODO: gut check this, its interesting how the tree-like structure
-            # of project -> workspace -> component requires us to bubble down
-            # the accumulated path since each layer is a separate model validator
-            # and component validator ultimately relies on a fully resolvable path
-            cmp["root_path"] = Path(
-                info.data["project_path"], info.data["workspace_path"]
-            )
-        return value
-
-    @pydantic.model_validator(mode="after")
-    def _associate_components_with_workspace(self):
-        """Associate the components with the workspace"""
-        for cmp in (
-            self.pipelines
-            + self.sinks
-            + self.publishers
-            + self.scripts
-            + self.notebooks
-        ):
-            cmp._workspace = self
-        return self
-
-    @pydantic.field_serializer(
-        "pipelines", "sinks", "publishers", "scripts", "notebooks"
-    )
-    @classmethod
-    def _workspace_component_serializer(cls, value: t.Any) -> t.Dict[str, t.Any]:
-        """Serialize component arrays back to dictionaries"""
-        return {cmp.name: cmp.model_dump() for cmp in value}
-
-    @property
-    def path(self) -> Path:
-        """Get the path to the workspace"""
-        return self.project_path / self.workspace_path
-
-    def __getitem__(self, key: str) -> t.Any:
-        """Get a component by name"""
-        try:
-            if "." in key:
-                parts = key.split(".")
-                if (
-                    parts[0]
-                    in ("pipelines", "sinks", "publishers", "scripts", "notebooks")
-                    and len(parts) > 1
-                ):
-                    obj = getattr(self, parts[0])
-                    obj = next(filter(lambda cmp: cmp.name == parts[1], obj))
-                    parts = parts[2:]
-                else:
-                    obj = self
-                for part in parts:
-                    if hasattr(obj, "__getitem__"):
-                        obj = obj[part]
-                    else:
-                        obj = getattr(obj, part)
-                return obj
-            else:
-                return getattr(self, key)
-        except AttributeError:
-            pass
-        raise KeyError(f"Component not found: {key}")
-
-    def __setitem__(self, key: str, value: t.Any) -> None:
-        """Set a component by name"""
-        raise NotImplementedError("Cannot set components")
-
-    def __delitem__(self, key: str) -> None:
-        """Delete a component by name"""
-        raise NotImplementedError("Cannot delete components")
-
-    def __len__(self) -> int:
-        """Get the number of components"""
-        return (
-            len(self.pipelines)
-            + len(self.sinks)
-            + len(self.publishers)
-            + len(self.scripts)
-            + len(self.notebooks)
-        )
-
-    def __iter__(
-        self,
-    ) -> t.Iterator[spec.CoreSpecification]:
-        """Iterate over the components"""
-        return itertools.chain(
-            self.pipelines,
-            self.sinks,
-            self.publishers,
-            self.scripts,
-            self.notebooks,
-        )
-
-    def __contains__(self, key: str) -> bool:
-        """Check if a component exists"""
-        return key in self.get_component_names()
-
-    def get_component_names(self) -> t.List[str]:
-        """Get the component names"""
-        return list(
-            itertools.chain(
-                (f"pipelines.{cmp.name}" for cmp in self.pipelines),
-                (f"sinks.{cmp.name}" for cmp in self.sinks),
-                (f"publishers.{cmp.name}" for cmp in self.publishers),
-                (f"scripts.{cmp.name}" for cmp in self.scripts),
-                (f"notebooks.{cmp.name}" for cmp in self.notebooks),
-            )
-        )
-
-    keys = get_component_names
-    values = __iter__
-
-    def items(self) -> t.Iterator[t.Tuple[str, spec.CoreSpecification]]:
-        """Iterate over the components"""
-        return ((cmp, self[cmp]) for cmp in self.get_component_names())
-
-    def _get_spec(
-        self, name: str, kind: str
-    ) -> M.Result[spec.CoreSpecification, KeyError]:
-        """Get a component spec by name"""
-        for cmp in getattr(self, kind):
-            if cmp.name == name:
-                return M.ok(cmp)
-        return M.error(KeyError(f"{kind[:-1].title()} not found: {name}"))
-
-    def get_pipeline_spec(
-        self, name: str
-    ) -> M.Result[spec.PipelineSpecification, Exception]:
-        """Get a pipeline by name"""
-        return t.cast(
-            M.Result[spec.PipelineSpecification, Exception],
-            self._get_spec(name, "pipelines"),
-        )
-
-    def get_sink_spec(self, name: str) -> M.Result[spec.SinkSpecification, Exception]:
-        """Get a sink by name"""
-        return t.cast(
-            M.Result[spec.SinkSpecification, Exception],
-            self._get_spec(name, "sinks"),
-        )
-
-    def get_publisher_spec(
-        self, name: str
-    ) -> M.Result[spec.PublisherSpecification, Exception]:
-        """Get a publisher by name"""
-        return t.cast(
-            M.Result[spec.PublisherSpecification, Exception],
-            self._get_spec(name, "publishers"),
-        )
-
-    def get_script_spec(
-        self, name: str
-    ) -> M.Result[spec.ScriptSpecification, Exception]:
-        """Get a script by name"""
-        return t.cast(
-            M.Result[spec.ScriptSpecification, Exception],
-            self._get_spec(name, "scripts"),
-        )
-
-    def get_notebook_spec(
-        self, name: str
-    ) -> M.Result[spec.NotebookSpecification, Exception]:
-        """Get a notebook by name"""
-        return t.cast(
-            M.Result[spec.NotebookSpecification, Exception],
-            self._get_spec(name, "notebooks"),
-        )
-
-    @property
-    def project(self) -> "Project":
-        """Get the project this workspace belongs to"""
-        if self._project is None:
-            raise ValueError("Workspace not associated with a project")
-        return self._project
-
-    @property
-    def has_project_association(self) -> bool:
-        """Check if the workspace is associated with a project"""
-        return self._project is not None
-
-    @contextmanager
-    def inject_configuration(self) -> t.Iterator[None]:
-        """Inject the workspace configuration into the context"""
-        with self.project.inject_configuration(self.name):
-            yield
-
-    @property
-    def fs_adapter(self) -> FilesystemAdapter:
-        """Get a handle to the project filesystem adapter"""
-        return self.project.fs_adapter
-
-    @property
-    def ff_adapter(self) -> AbstractFeatureFlagAdapter:
-        """Get a handle to the project feature flag adapter"""
-        return self.project.ff_adapter
-
-    @property
-    def state(self) -> StateStore:
-        """Get a handle to the project state store"""
-        return self.project.state
-
-    def get_transform_gateways(self) -> t.Iterator[t.Tuple[str, "GatewayConfig"]]:
-        """Get the SQLMesh gateway configurations"""
-        for sink in self.sinks:
-            with suppress(KeyError):
-                yield sink.name, sink.get_transform_config()
-
-    def get_transform_context(self, name: t.Optional[str] = None):
-        """Get the SQLMesh context for the workspace
-
-        We expect a config.py file in the workspace directory that uses the
-        `get_transform_gateways` method to populate the SQLMesh Config.gateways key.
-
-        Args:
-            name: The name of the gateway to use.
-
-        Returns:
-            The SQLMesh context.
-        """
-        import sqlmesh
-
-        return sqlmesh.Context(paths=self.path, gateway=name)
-
-
-class Project(_BaseSettings):
-    """A project is a collection of workspaces and configuration settings"""
-
-    path: Path = Path(".")
-    """The path to the project"""
-    name: str = pydantic.Field(
-        pattern=r"^[a-zA-Z0-9_\-]+$",
-        min_length=3,
-        max_length=32,
-        default_factory=lambda: "CDF-" + os.urandom(4).hex(sep="-", bytes_per_sep=2),
-    )
-    """The name of the project"""
-    version: str
-    """The version of the project, this discriminates between project and workspace config"""
-    owner: t.Optional[str] = None
-    """The owner of the project"""
-    documentation: t.Optional[str] = None
-    """The project documentation"""
-    workspaces: t.Tuple[Workspace, ...] = (Workspace(),)
-    """The project workspaces"""
-    fs: t.Annotated[
-        FilesystemConfig,
-        pydantic.Field(alias="filesystem"),
-    ] = FilesystemConfig()
-    """The project filesystem settings"""
-    ff: t.Annotated[
-        FeatureFlagConfig,
-        pydantic.Field(discriminator="provider", alias="feature_flags"),
-    ] = FilesystemFeatureFlagConfig()
-    """The project feature flags provider settings"""
-    state: StateStore = StateStore()
-    """The project state connection settings"""
-
-    _wrapped_config: t.Any = {}
-    """Store a reference to the wrapped configuration"""
-
-    _extra: t.Dict[str, t.Any] = {}
-    """Stored information set via __setitem__ which is included in scoped dictionaries"""
-
-    @pydantic.field_validator("path", mode="before")
-    @classmethod
-    def _path_validator(cls, value: t.Any):
-        """Resolve the project path
-
-        The project path must be a directory. If it is a string, it will be converted to a Path object.
-        """
-        if isinstance(value, str):
-            value = Path(value)
-        if not isinstance(value, Path):
-            raise ValueError("Path must be a string or a Path object")
-        elif not value.is_dir():
-            raise FileNotFoundError(f"Project not found: {value}")
-        return value.resolve()
-
-    @pydantic.field_validator("workspaces", mode="before")
-    @classmethod
-    def _workspaces_validator(cls, value: t.Any, info: pydantic.ValidationInfo):
-        """Hydrate the workspaces if they are paths. Convert a dict to a list of workspaces.
-
-        If the workspace is a path, load the configuration from the path.
-        """
-        if isinstance(value, str):
-            # pathA; pathB; pathC
-            value = list(map(lambda s: s.strip(), value.split(";")))
-        elif isinstance(value, dict):
-            # name : {config}
-            workspaces = []
-            for name, config in value.items():
-                config.setdefault("name", name)
-                workspaces.append(config)
-            value = workspaces
-        if isinstance(value, (list, tuple)):
-            # [{configA} | pathA, {configB}, ...]
-            workspaces = []
-            project_path = Path(info.data["path"])
-            for obj in value:
-                if isinstance(obj, (str, Path)):
-                    # pathA
-                    path = Path(obj)
-                    if path.is_absolute():
-                        path = path.relative_to(project_path)
-                    config = _load_config(project_path / path)
-                    config["path"] = path
-                    config["project_path"] = project_path
-                    workspaces.append(config)
-                elif isinstance(obj, dict):
-                    # {configA}
-                    # NOTE: in the component validator, we have heuristics for getting a path
-                    # from a name but we seem to demand a path here, we should be consistent
-                    path = Path(obj.pop("path", None) or obj.pop("workspace_path"))
-                    if path.is_absolute():
-                        path = path.relative_to(project_path)
-                    obj["path"] = path
-                    obj["project_path"] = project_path
-                    workspaces.append(obj)
-                else:
-                    raise ValueError("Invalid workspace configuration")
-            value = workspaces
-        if not (hasattr(value, "__iter__") and not isinstance(value, (str, bytes))):
-            raise ValueError("Invalid workspaces configuration, must be an iterable")
-        return value
-
-    @pydantic.model_validator(mode="after")
-    def _project_workspaces_validator(self):
-        """Validate the workspaces
-
-        Workspaces must have unique names and paths.
-        Workspaces must be subdirectories of the project path.
-        Workspaces must not be subdirectories of other workspaces.
-        """
-        workspace_names = [workspace.name for workspace in self.workspaces]
-        if len(workspace_names) != len(set(workspace_names)):
-            raise ValueError("Workspace names must be unique")
-        workspace_paths = [workspace.path for workspace in self.workspaces]
-        if len(workspace_paths) != len(set(workspace_paths)):
-            raise ValueError("Workspace paths must be unique")
-        if not all(map(lambda path: path.is_relative_to(self.path), workspace_paths)):
-            raise ValueError(
-                "Workspace paths must be subdirectories of the project path"
-            )
-        if not all(
-            map(
-                lambda path: all(
-                    not other_path.is_relative_to(path)
-                    for other_path in workspace_paths
-                    if other_path != path
-                ),
-                workspace_paths,
-            )
-        ):
-            raise ValueError(
-                "Workspace paths must not be subdirectories of other workspaces"
-            )
-        return self
-
-    @pydantic.model_validator(mode="after")
-    def _associate_project_with_children(self):
-        """Bind the project to the workspaces, filesystem, and feature flags"""
-        for workspace in self.workspaces:
-            workspace._project = self
-        self.ff._project = self
-        self.fs._project = self
-        return self
-
-    @pydantic.field_serializer("workspaces")
-    @classmethod
-    def _workspace_serializer(cls, value: t.Any) -> t.Dict[str, t.Any]:
-        """Serialize the workspaces"""
-        return {workspace.name: workspace.model_dump() for workspace in value}
-
-    def __getitem__(self, key: str) -> t.Any:
-        """Get an item from the configuration"""
-        try:
-            if "." in key:
-                parts = key.split(".")
-                if parts[0] == "workspaces" and len(parts) > 1:
-                    obj = self.get_workspace(parts[1]).unwrap()
-                    parts = parts[2:]
-                else:
-                    obj = self
-                for i, part in enumerate(parts):
-                    if isinstance(obj, Workspace):
-                        return obj[".".join(parts[i:])]
-                    if hasattr(obj, "__getitem__"):
-                        obj = obj[part]
-                    else:
-                        obj = getattr(obj, part)
-                return obj
-            if key == "name":
-                return self.name
-            if key in self.model_fields:
-                return getattr(self, key)
-        except AttributeError:
-            pass
-        return self._wrapped_config[key]
-
-    def __setitem__(self, key: str, value: t.Any) -> None:
-        """Set an item in the configuration"""
-        if key in self.model_fields:
-            raise KeyError(
-                f"Cannot set `{key}` via string accessor, set the attribute directly instead"
-            )
-        self._extra[key] = value
-
-    def __delitem__(self, key: str) -> None:
-        """Delete a workspace"""
-        raise NotImplementedError("Cannot delete workspaces")
-
-    def __len__(self) -> int:
-        """Get the number of workspaces"""
-        return len(self.workspaces)
-
-    def __iter__(self) -> t.Iterator[Workspace]:
-        """Iterate over the workspaces"""
-        return iter(self.workspaces)
-
-    def __contains__(self, key: str) -> bool:
-        """Check if a workspace exists"""
-        return key in self.get_workspace_names()
-
-    def get_workspace_names(self) -> t.List[str]:
-        """Get the workspace names"""
-        return [workspace.name for workspace in self.workspaces]
-
-    keys = get_workspace_names
-    values = __iter__
-
-    def items(self) -> t.Iterator[t.Tuple[str, Workspace]]:
-        """Iterate over the workspaces"""
-        return zip(self.get_workspace_names(), self.workspaces)
-
-    def get_workspace(
-        self, name: t.Optional[str] = None
-    ) -> M.Result[Workspace, Exception]:
-        """Get a workspace by name, if no name is provided, return the default workspace"""
-        if name is None:
-            return M.ok(self.workspaces[0])
-        for workspace in self.workspaces:
-            if workspace.name == name:
-                return M.ok(workspace)
-        return M.error(KeyError(f"Workspace not found: {name}"))
-
-    def get_workspace_from_path(self, path: PathLike) -> M.Result[Workspace, Exception]:
-        """Get a workspace by path."""
-        path = Path(path).resolve()
-        for workspace in self.workspaces:
-            if path.is_relative_to(workspace.path):
-                return M.ok(workspace)
-        return M.error(ValueError(f"No workspace found at {path}."))
-
-    def to_scoped_dict(self, workspace: t.Optional[str] = None) -> ChainMap[str, t.Any]:
-        """Convert the project settings to a scoped dictionary
-
-        Lookups are performed in the following order:
-        - The extra configuration, holding data set via __setitem__.
-        - The workspace configuration, if passed.
-        - The project configuration.
-        - The wrapped configuration, if available. Typically a dynaconf settings object.
-
-        Boxing allows us to access nested values using dot notation. This is doubly useful
-        since ChainMaps will move to the next map in the chain if the dotted key is not
-        fully resolved in the current map.
-        """
-
-        def to_box(obj: t.Any) -> Box:
-            return DynaBox(obj, box_dots=True)
-
-        if workspace:
-            return (
-                self.get_workspace(workspace)
-                .map(
-                    lambda ws: ChainMap(
-                        to_box(self._extra),
-                        to_box(ws.model_dump()),
-                        to_box(self.model_dump()),
-                        self._wrapped_config,
-                    )
-                )
-                .unwrap()
-            )
-        return ChainMap(
-            to_box(self._extra),
-            to_box(self.model_dump()),
-            self._wrapped_config,
-        )
-
-    @contextmanager
-    def inject_configuration(
-        self, workspace: t.Optional[str] = None
-    ) -> t.Iterator[None]:
-        """Inject the project configuration into the context"""
-        with inject_configuration(self.to_scoped_dict(workspace)):
-            yield
-
-    @cached_property
-    def fs_adapter(self) -> FilesystemAdapter:
-        """Get a configured filesystem adapter"""
-        return self.fs.get_adapter().unwrap()
-
-    @cached_property
-    def ff_adapter(self) -> AbstractFeatureFlagAdapter:
-        """Get a handle to the project's configured feature flag adapter"""
-        return self.ff.get_adapter().unwrap()
-
-    @cached_property
-    def duckdb(self) -> duckdb.DuckDBPyConnection:
-        """Get a handle to the project's DuckDB connection"""
-        conn = duckdb.connect(":memory:")
-        conn.install_extension("httpfs")
-        conn.install_extension("json")
-        conn.register_filesystem(self.fs_adapter.wrapped)
-        conn.execute("CREATE TABLE workspaces (name TEXT PRIMARY KEY, path TEXT)")
-        for workspace in self.workspaces:
-            conn.execute(
-                "INSERT INTO workspaces (name, path) VALUES (?, ?)",
-                (workspace.name, workspace.path.as_posix()),
-            )
-        return conn
-
-    def get_workspace_path(self, name: str) -> M.Result[Path, Exception]:
-        """Get the path to a workspace by name"""
-        return self.get_workspace(name).map(lambda ws: ws.path)
-
-    @classmethod
-    def from_path(cls, root: PathLike):
-        """Load configuration data from a project root path using dynaconf.
-
-        Args:
-            root: The root path to the project.
-
-        Returns:
-            A Project object.
-        """
-        root_path = Path(root).resolve()
-        if root_path.is_file():
-            root_path = root_path.parent
-        config = _load_config(root_path)
-        config["path"] = root_path
-        project = cls.model_validate(config)
-        project._wrapped_config = config
-        return project
-
-    def activate(self) -> t.Callable[[], None]:
-        """Activate the project and return a deactivation function"""
-        from cdf.legacy.context import active_project
-
-        token = active_project.set(self)
-        ctx = self.inject_configuration()
-        ctx.__enter__()
-
-        def _deactivate() -> None:
-            """Deactivate the project"""
-            active_project.reset(token)
-            ctx.__exit__(None, None, None)
-
-        return _deactivate
-
-    @contextmanager
-    def activated(self) -> t.Iterator[None]:
-        """Activate the project for the duration of the context"""
-        deactivate = self.activate()
-        yield
-        deactivate()
-
-
-def _load_config(
-    path: Path, extensions: t.Optional[t.List[str]] = None
-) -> dynaconf.LazySettings:
-    """Load raw configuration data from a file path using dynaconf.
-
-    Args:
-        path: The path to the project or workspace directory
-
-    Returns:
-        A dynaconf.LazySettings object.
-    """
-    extensions = extensions or ["toml", "yaml", "yml", "json", "py"]
-    if not any(map(lambda ext: path.joinpath(f"cdf.{ext}").is_file(), extensions)):
-        raise FileNotFoundError(f"No cdf configuration file found: {path}")
-
-    config = dynaconf.LazySettings(
-        root_path=path,
-        settings_files=[f"cdf.{ext}" for ext in extensions],
-        environments=True,
-        envvar_prefix="CDF",
-        env_switcher=c.CDF_ENVIRONMENT,
-        env=c.DEFAULT_ENVIRONMENT,
-        load_dotenv=True,
-    )
-
-    def _eval_lazy(value: t.Any) -> t.Any:
-        """Evaluate lazy values in the configuration"""
-        if isinstance(value, dict):
-            for key, val in value.items():
-                value[key] = _eval_lazy(val)
-            return value
-        elif isinstance(value, list):
-            for i, val in enumerate(value):
-                value[i] = _eval_lazy(val)
-            return value
-        if getattr(value, "_dynaconf_lazy_format", None):
-            value = value(config)
-        return value
-
-    for key, value in config.items():
-        config[key] = _eval_lazy(value)
-
-    return config
-
-
-load_project = M.result(Project.from_path)
-"""Load configuration data from a project root path using dynaconf.
-
-Args:
-    root: The root path to the project.
-
-Returns:
-    A Result monad with a Project object if successful. Otherwise, a Result monad with an error.
-"""
-
-if not t.TYPE_CHECKING:
-    # type checker seems to not like the lru_cache decorator wrapping a monadic lift
-    # so we can safely hide this from the type checker
-    load_project = lru_cache(maxsize=25)(load_project)
-
-__all__ = [
-    "load_project",
-    "Project",
-    "Workspace",
-    "FeatureFlagConfig",
-    "FilesystemConfig",
-]
diff --git a/src/cdf/legacy/runtime/__init__.py b/src/cdf/legacy/runtime/__init__.py
deleted file mode 100644
index 39be86f9..00000000
--- a/src/cdf/legacy/runtime/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from cdf.legacy.runtime.notebook import execute_notebook_specification
-from cdf.legacy.runtime.pipeline import execute_pipeline_specification, pipeline
-from cdf.legacy.runtime.publisher import execute_publisher_specification
-from cdf.legacy.runtime.script import execute_script_specification
-
-__all__ = [
-    "execute_notebook_specification",
-    "execute_pipeline_specification",
-    "execute_publisher_specification",
-    "execute_script_specification",
-    "pipeline",
-]
diff --git a/src/cdf/legacy/runtime/common.py b/src/cdf/legacy/runtime/common.py
deleted file mode 100644
index 27341119..00000000
--- a/src/cdf/legacy/runtime/common.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import functools
-import typing as t
-
-import cdf.legacy.logger as logger
-from cdf.legacy.project import Project, Workspace
-from cdf.legacy.specification.base import BaseComponent
-from cdf.types import P
-
-T = t.TypeVar("T")
-
-
-def _get_project(obj: t.Any) -> Project:
-    """Get the project associated with the object."""
-    if isinstance(obj, Project):
-        return obj
-    if isinstance(obj, Workspace):
-        return obj.project
-    if isinstance(obj, BaseComponent):
-        return obj.workspace.project
-    raise TypeError(f"Expected a Project, Workspace or Component, got {type(obj)}")
-
-
-def with_activate_project(func: t.Callable[P, T]) -> t.Callable[P, T]:
-    """Attempt to inject the Project associated with the first argument into cdf.context.
-
-    Args:
-        func: The function to decorate.
-
-    Returns:
-        The decorated function.
-    """
-
-    @functools.wraps(func)
-    def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
-        try:
-            project = _get_project(args[0])
-        except TypeError:
-            logger.warning(f"Could not get project from {type(args[0])}")
-            return func(*args, **kwargs)
-        with project.activated():
-            return func(*args, **kwargs)
-
-    return wrapper
diff --git a/src/cdf/legacy/runtime/notebook.py b/src/cdf/legacy/runtime/notebook.py
deleted file mode 100644
index 23480967..00000000
--- a/src/cdf/legacy/runtime/notebook.py
+++ /dev/null
@@ -1,109 +0,0 @@
-"""The runtime notebook module is responsible for executing notebooks from notebook specifications.
-
-It performs the following functions:
-- Executes the notebook.
-- Writes the output to a designated location in a storage provider.
-- Cleans up the rendered notebook if required.
-"""
-
-import re
-import sys
-import time
-import typing as t
-from contextlib import nullcontext
-from datetime import date, datetime
-from pathlib import Path
-
-import papermill
-
-import cdf.legacy.logger as logger
-from cdf.legacy.runtime.common import with_activate_project
-from cdf.legacy.specification import NotebookSpecification
-from cdf.legacy.state import with_audit
-from cdf.types import M
-
-if t.TYPE_CHECKING:
-    from nbformat import NotebookNode
-
-
-@with_activate_project
-@with_audit(
-    "execute_notebook",
-    lambda spec, **params: {
-        "name": spec.name,
-        "owner": spec.owner,
-        "workspace": spec.workspace.name,
-        "project": spec.project.name,
-    },
-)
-def execute_notebook_specification(
-    spec: NotebookSpecification,
-    **params: t.Any,
-) -> M.Result["NotebookNode", Exception]:
-    """Execute a notebook specification.
-
-    Args:
-        spec: The notebook specification to execute.
-        storage: The filesystem to use for persisting the output.
-        **params: The parameters to pass to the notebook. Overrides the notebook spec parameters.
-    """
-    origpath = sys.path[:]
-    sys.path = [
-        str(spec.root_path),
-        *sys.path,
-        str(spec.root_path.parent),
-    ]
-    try:
-        merged_params = {**spec.parameters, **params}
-        output = spec.path.parent.joinpath(
-            "_rendered", f"{spec.name}.{int(time.time())}.ipynb"
-        )
-        output.parent.mkdir(parents=True, exist_ok=True)
-        if spec.has_workspace_association:
-            workspace_context = spec.workspace.inject_configuration()
-        else:
-            workspace_context = nullcontext()
-        with spec._lock, workspace_context:
-            rv: "NotebookNode" = papermill.execute_notebook(
-                spec.path,
-                output,
-                merged_params,
-                cwd=spec.root_path,
-            )
-        logger.info(
-            f"Successfully ran notebook {spec.path} with params {merged_params} rendered into {output}"
-        )
-        storage = spec.workspace.fs_adapter
-        if storage and spec.storage_path:
-            storage_path = spec.storage_path.format(
-                name=spec.name,
-                date=date.today(),
-                timestamp=datetime.now().isoformat(timespec="seconds"),
-                epoch=time.time(),
-                params=merged_params,
-                ext=spec.path.suffix,
-            )
-            logger.info(f"Persisting output to {storage_path} with {storage}")
-            storage.put_file(output, storage_path)
-        if spec.gc_duration >= 0:
-            _gc_rendered(output.parent, spec.name, spec.gc_duration)
-        return M.ok(rv)
-    except Exception as e:
-        logger.error(f"Error running notebook {spec.path}: {e}")
-        return M.error(e)
-    finally:
-        sys.path = origpath
-
-
-def _gc_rendered(path: Path, name: str, max_ttl: int) -> None:
-    """Garbage collect rendered notebooks."""
-    now = time.time()
-    for nb in path.glob(f"{name}.*.ipynb"):
-        ts_str = re.search(r"\d{10}", nb.stem)
-        if ts_str:
-            ts = int(ts_str.group())
-            if now - ts > max_ttl:
-                nb.unlink()
-
-
-__all__ = ["execute_notebook_specification"]
diff --git a/src/cdf/legacy/runtime/pipeline.py b/src/cdf/legacy/runtime/pipeline.py
deleted file mode 100644
index c40b39f4..00000000
--- a/src/cdf/legacy/runtime/pipeline.py
+++ /dev/null
@@ -1,488 +0,0 @@
-"""The runtime pipeline module is responsible for executing pipelines from pipeline specifications.
-
-It performs the following functions:
-- Injects the runtime context into the pipeline.
-- Executes the pipeline.
-- Captures metrics during extract.
-- Intercepts sources during extract. (if specified, this makes the pipeline a no-op)
-- Applies transformations to sources during extract.
-- Stages data if a staging location is provided and enabled in the runtime context.
-- Forces replace disposition if specified in the runtime context.
-- Filters resources based on glob patterns.
-- Logs a warning if dataset_name is provided in the runtime context. (since we want to manage it)
-- Creates a cdf pipeline from a dlt pipeline.
-"""
-
-import fnmatch
-import os
-import shutil
-import typing as t
-from contextlib import nullcontext, redirect_stdout, suppress
-from pathlib import Path
-
-import dlt
-from dlt.common.destination import TDestinationReferenceArg, TLoaderFileFormat
-from dlt.common.pipeline import ExtractInfo, LoadInfo, NormalizeInfo
-from dlt.common.schema.typing import (
-    TAnySchemaColumns,
-    TColumnNames,
-    TSchemaContract,
-    TWriteDisposition,
-)
-from dlt.extract.extract import Extract, data_to_sources
-from dlt.pipeline.exceptions import SqlClientNotAvailable
-from dlt.pipeline.pipeline import Pipeline
-
-import cdf.legacy.context as context
-import cdf.legacy.logger as logger
-from cdf.legacy.runtime.common import with_activate_project
-from cdf.legacy.specification import PipelineSpecification, SinkSpecification
-from cdf.legacy.state import with_audit
-from cdf.types import M, P
-
-T = t.TypeVar("T")
-
-TPipeline = t.TypeVar("TPipeline", bound=dlt.Pipeline)
-
-
-def _wrap_pipeline(default_factory: t.Callable[P, TPipeline]):
-    """Wraps dlt.pipeline such that it sources the active pipeline from the context."""
-
-    def wrapper(*args: P.args, **kwargs: P.kwargs) -> TPipeline:
-        try:
-            pipe = context.active_pipeline.get()
-            pipe.activate()
-            if kwargs:
-                logger.warning("CDF runtime detected, ignoring pipeline arguments")
-            return t.cast(TPipeline, pipe)
-        except LookupError:
-            return default_factory(*args, **kwargs)
-
-    return wrapper
-
-
-pipeline = _wrap_pipeline(dlt.pipeline)
-"""Gets the active pipeline or creates a new one with the given arguments."""
-
-
-def _apply_filters(
-    source: dlt.sources.DltSource, resource_patterns: t.List[str], invert: bool
-) -> dlt.sources.DltSource:
-    """Filters resources in a source based on a list of patterns."""
-    return source.with_resources(
-        *[
-            r
-            for r in source.selected_resources
-            if any(fnmatch.fnmatch(r, patt) for patt in resource_patterns) ^ invert
-        ]
-    )
-
-
-class RuntimePipeline(Pipeline):
-    """Overrides certain methods of the dlt pipeline to allow for cdf specific behavior."""
-
-    specification: PipelineSpecification
-
-    def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
-        super().__init__(*args, **kwargs)
-
-        self._force_replace = False
-        self._dry_run = False
-        self._metric_accumulator = {}
-        self._tracked_sources = set()
-        self._source_hooks = []
-
-    def configure(
-        self,
-        dry_run: bool = False,
-        force_replace: bool = False,
-        select: t.Optional[t.List[str]] = None,
-        exclude: t.Optional[t.List[str]] = None,
-    ) -> "RuntimePipeline":
-        """Configures options which affect the behavior of the pipeline at runtime.
-
-        Args:
-            dry_run: Whether to run the pipeline in dry run mode.
-            force_replace: Whether to force replace disposition.
-            select: A list of glob patterns to select resources.
-            exclude: A list of glob patterns to exclude resources.
-
-        Returns:
-            RuntimePipeline: The pipeline with source hooks configured.
-        """
-        S = self.specification
-
-        self._force_replace = force_replace
-        self._dry_run = dry_run
-
-        def inject_metrics_and_filters(
-            source: dlt.sources.DltSource,
-        ) -> dlt.sources.DltSource:
-            """Injects metrics and filters into the source."""
-            return S.inject_metrics_and_filters(source, self._metric_accumulator)
-
-        def apply_selection(source: dlt.sources.DltSource) -> dlt.sources.DltSource:
-            """Applies selection filters to the source."""
-            if not select:
-                return source
-            return _apply_filters(source, select, invert=False)
-
-        def apply_exclusion(source: dlt.sources.DltSource) -> dlt.sources.DltSource:
-            """Applies exclusion filters to the source."""
-            if not exclude:
-                return source
-            return _apply_filters(source, exclude, invert=True)
-
-        def apply_feature_flags(source: dlt.sources.DltSource) -> dlt.sources.DltSource:
-            """Applies feature flags to the source. User-defined selection takes precedence."""
-            if select:
-                return source
-            return S.workspace.ff_adapter.apply_source(
-                source,
-                S.workspace.project.name,
-                S.workspace.name,
-            )
-
-        self._source_hooks = [
-            inject_metrics_and_filters,
-            apply_selection,
-            apply_feature_flags,
-            apply_exclusion,
-        ]
-        return self
-
-    @property
-    def force_replace(self) -> bool:
-        """Whether to force replace disposition."""
-        return self._force_replace
-
-    @property
-    def dry_run(self) -> bool:
-        """Dry run mode."""
-        return self._dry_run
-
-    @property
-    def metric_accumulator(self) -> t.Mapping[str, t.Any]:
-        """A container for accumulating metrics during extract."""
-        return self._metric_accumulator
-
-    @property
-    def source_hooks(
-        self,
-    ) -> t.List[t.Callable[[dlt.sources.DltSource], dlt.sources.DltSource]]:
-        """The source hooks for the pipeline."""
-        return self._source_hooks
-
-    @property
-    def tracked_sources(self) -> t.Set[dlt.sources.DltSource]:
-        """The sources tracked by the pipeline."""
-        return self._tracked_sources
-
-    def extract(
-        self,
-        data: t.Any,
-        *,
-        table_name: str = None,  # type: ignore[arg-type]
-        parent_table_name: str = None,  # type: ignore[arg-type]
-        write_disposition: TWriteDisposition = None,  # type: ignore[arg-type]
-        columns: TAnySchemaColumns = None,  # type: ignore[arg-type]
-        primary_key: TColumnNames = None,  # type: ignore[arg-type]
-        schema: dlt.Schema = None,  # type: ignore[arg-type]
-        max_parallel_items: int = None,  # type: ignore[arg-type]
-        workers: int = None,  # type: ignore[arg-type]
-        schema_contract: TSchemaContract = None,  # type: ignore[arg-type]
-        **kwargs: t.Any,
-    ) -> ExtractInfo:
-        _ = kwargs
-        with self._maybe_destination_capabilities():
-            sources = data_to_sources(
-                data,
-                self,
-                schema,
-                table_name,
-                parent_table_name,
-                write_disposition,
-                columns,
-                primary_key,
-                schema_contract,
-            )
-
-        for i in range(len(sources)):
-            for hook in self._source_hooks:
-                sources[i] = hook(sources[i])
-            self._tracked_sources.add(sources[i])
-
-        if self.dry_run:
-            return self._get_step_info(
-                step=Extract(
-                    self._schema_storage,
-                    self._normalize_storage_config(),
-                    self.collector,
-                    original_data=data,
-                )
-            )
-
-        if self.force_replace:
-            write_disposition = "replace"
-
-        info = self.specification.state_adapter.with_audit(
-            "extract",
-            {
-                "pipeline": self.pipeline_name,
-                "destination": self.destination.destination_name,
-            },
-        )(super().extract)(
-            sources,
-            table_name=table_name,
-            parent_table_name=parent_table_name,
-            write_disposition=write_disposition,
-            columns=columns,
-            primary_key=primary_key,
-            schema=schema,
-            max_parallel_items=max_parallel_items,
-            workers=workers,
-            schema_contract=schema_contract,
-        )
-
-        if self.metric_accumulator:
-            logger.info(
-                "Metrics captured during %s extract, sideloading to destination...",
-                info.pipeline.pipeline_name,
-            )
-            self.specification.state_adapter.with_audit(
-                "captured_metrics",
-                {
-                    "load_ids": info.loads_ids,
-                    "pipeline": self.pipeline_name,
-                    "destination": self.destination.destination_name,
-                },  # type: ignore[arg-type]
-            )(super().extract)(
-                dlt.resource(
-                    [
-                        {
-                            "load_id": load_id,
-                            "metrics": dict(self.metric_accumulator),
-                        }
-                        for load_id in info.loads_ids
-                    ],
-                    name="cdf_runtime_metrics",
-                    write_disposition="append",
-                    columns=[
-                        {"name": "load_id", "data_type": "text"},
-                        {"name": "metrics", "data_type": "complex"},
-                    ],
-                    table_name="_cdf_metrics",
-                )
-            )
-
-        if self.specification.persist_extract_package:
-            logger.info(
-                "Persisting extract package for %s...", info.pipeline.pipeline_name
-            )
-            for package in info.load_packages:
-                # TODO: move this to a top-level function
-                root = Path(self.pipelines_dir)
-                base = Path(package.package_path).relative_to(root)
-                path = shutil.make_archive(
-                    base_name=package.load_id,
-                    format="gztar",
-                    root_dir=root,
-                    base_dir=base,
-                    logger=logger,
-                )
-                logger.info("Extract package staged at %s", path)
-                target = f"extracted/{package.load_id}.tar.gz"
-                self.specification.workspace.fs_adapter.put(path, target)
-                logger.info("Package uploaded to %s using project fs", target)
-                Path(path).unlink()
-                logger.info("Cleaned up staged package")
-                # TODO: listing and manipulating these should be first-class
-                # this will enable us to "replay" a pipeline
-                # logger.info(self.specification.workspace.filesystem.ls("extracted"))
-
-        self.specification.state_adapter.capture_extract_info(info)
-        return info
-
-    def normalize(
-        self,
-        workers: int = 1,
-        loader_file_format: TLoaderFileFormat = None,  # type: ignore[arg-type]
-    ) -> NormalizeInfo:
-        info = self.specification.state_adapter.with_audit(
-            "normalize",
-            {
-                "pipeline": self.pipeline_name,
-                "destination": self.destination.destination_name,
-            },
-        )(super().normalize)(workers, loader_file_format)
-        self.specification.state_adapter.capture_normalize_info(info)
-        return info
-
-    def load(
-        self,
-        destination: TDestinationReferenceArg = None,  # type: ignore[arg-type]
-        dataset_name: str = None,  # type: ignore[arg-type]
-        credentials: t.Any = None,  # type: ignore[arg-type]
-        *,
-        workers: int = 20,
-        raise_on_failed_jobs: bool = False,
-    ) -> LoadInfo:
-        info = self.specification.state_adapter.with_audit(
-            "load",
-            {
-                "pipeline": self.pipeline_name,
-                "destination": self.destination.destination_name,
-            },
-        )(super().load)(
-            destination,
-            dataset_name,
-            credentials,
-            workers=workers,
-            raise_on_failed_jobs=raise_on_failed_jobs,
-        )
-        self.specification.state_adapter.capture_load_info(info)
-        return info
-
-    def run(
-        self,
-        data: t.Any = None,
-        *,
-        table_name: str = None,  # type: ignore[arg-type]
-        write_disposition: TWriteDisposition = None,  # type: ignore[arg-type]
-        columns: TAnySchemaColumns = None,  # type: ignore[arg-type]
-        primary_key: TColumnNames = None,  # type: ignore[arg-type]
-        schema: dlt.Schema = None,  # type: ignore[arg-type]
-        loader_file_format: TLoaderFileFormat = None,  # type: ignore[arg-type]
-        schema_contract: TSchemaContract = None,  # type: ignore[arg-type]
-        **kwargs: t.Any,
-    ) -> LoadInfo:
-        _ = kwargs
-        if self._force_replace:
-            write_disposition = "replace"
-
-        return super().run(
-            data,
-            table_name=table_name,
-            write_disposition=write_disposition,
-            columns=columns,
-            primary_key=primary_key,
-            schema=schema,
-            loader_file_format=loader_file_format,
-            schema_contract=schema_contract,
-        )
-
-
-class PipelineResult(t.NamedTuple):
-    """The result of executing a pipeline specification."""
-
-    exports: t.Dict[str, t.Any]
-    pipeline: RuntimePipeline
-
-
-def _audit_props(
-    pipe_spec: PipelineSpecification,
-    sink_spec: t.Union[
-        TDestinationReferenceArg,
-        t.Tuple[TDestinationReferenceArg, t.Optional[TDestinationReferenceArg]],
-        SinkSpecification,
-    ],
-    select: t.Optional[t.List[str]] = None,
-    exclude: t.Optional[t.List[str]] = None,
-    force_replace: bool = False,
-    dry_run: bool = False,
-    enable_stage: bool = True,
-    quiet: bool = False,
-    **pipeline_options: t.Any,
-) -> t.Dict[str, t.Any]:
-    """The audit function for executing a pipeline specification."""
-    return {
-        "name": pipe_spec.name,
-        "owner": pipe_spec.owner,
-        "sink": getattr(sink_spec, "name", sink_spec),
-        "select": select,
-        "exclude": exclude,
-        "force_replace": force_replace,
-        "dry_run": dry_run,
-        "enable_stage": enable_stage,
-        "quiet": quiet,
-        "pipeline_options": pipeline_options,
-        "workspace": pipe_spec.workspace.name,
-        "project": pipe_spec.project.name,
-    }
-
-
-@with_activate_project
-@with_audit("execute_pipeline", _audit_props)
-def execute_pipeline_specification(
-    pipe_spec: PipelineSpecification,
-    sink_spec: t.Union[
-        TDestinationReferenceArg,
-        t.Tuple[TDestinationReferenceArg, t.Optional[TDestinationReferenceArg]],
-        SinkSpecification,
-    ],
-    select: t.Optional[t.List[str]] = None,
-    exclude: t.Optional[t.List[str]] = None,
-    force_replace: bool = False,
-    dry_run: bool = False,
-    enable_stage: bool = True,
-    quiet: bool = False,
-    **pipeline_options: t.Any,
-) -> M.Result[PipelineResult, Exception]:
-    """Executes a pipeline specification.
-
-    Args:
-        pipe_spec: The pipeline specification.
-        sink_spec: The destination where the pipeline will write data.
-        select: A list of glob patterns to select resources.
-        exclude: A list of glob patterns to exclude resources.
-        force_replace: Whether to force replace disposition.
-        dry_run: Whether to run the pipeline in dry run mode.
-        enable_stage: Whether to enable staging. If disabled, staging will be ignored.
-        quiet: Whether to suppress output.
-        pipeline_options: Additional dlt.pipeline constructor arguments.
-
-    Returns:
-        M.Result[PipelineResult, Exception]: The result of executing the pipeline specification.
-    """
-    if isinstance(sink_spec, SinkSpecification):
-        destination, staging = sink_spec.get_ingest_config()
-    elif isinstance(sink_spec, tuple):
-        destination, staging = sink_spec
-    else:
-        destination, staging = sink_spec, None
-
-    pipeline_options.update(
-        {"destination": destination, "staging": staging if enable_stage else None}
-    )
-    pipe_reference = pipe_spec.create_pipeline(
-        RuntimePipeline, **pipeline_options
-    ).configure(dry_run, force_replace, select, exclude)
-    token = context.active_pipeline.set(pipe_reference)
-
-    null = open(os.devnull, "w")
-    maybe_redirect = redirect_stdout(null) if quiet else nullcontext()
-    try:
-        with maybe_redirect:
-            result = PipelineResult(exports=pipe_spec(), pipeline=pipe_reference)
-        if dry_run:
-            return M.ok(result)
-        with (
-            suppress(KeyError, SqlClientNotAvailable),
-            pipe_reference.sql_client() as client,
-            client.with_staging_dataset(staging=True) as client_staging,
-        ):
-            strategy = dlt.config["destination.replace_strategy"]
-            if strategy in ("insert-from-staging",) and client_staging.has_dataset():
-                logger.info(
-                    f"Cleaning up staging dataset {client_staging.dataset_name}"
-                )
-                client_staging.drop_dataset()
-        return M.ok(result)
-    except Exception as e:
-        return M.error(e)
-    finally:
-        context.active_pipeline.reset(token)
-        null.close()
-
-
-__all__ = ["execute_pipeline_specification"]
diff --git a/src/cdf/legacy/runtime/publisher.py b/src/cdf/legacy/runtime/publisher.py
deleted file mode 100644
index 01810c3f..00000000
--- a/src/cdf/legacy/runtime/publisher.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""The runtime publisher module is responsible for executing publishers from publisher specifications.
-
-It performs the following functions:
-- Validates the dependencies of the publisher exist.
-- Verifies the dependencies are up-to-date.
-- Executes the publisher script.
-"""
-
-import datetime
-import logging
-import typing as t
-
-import sqlmesh
-from sqlmesh.core.dialect import normalize_model_name
-
-from cdf.legacy.runtime.common import with_activate_project
-from cdf.legacy.specification import PublisherSpecification
-from cdf.legacy.state import with_audit
-from cdf.types import M
-
-logger = logging.getLogger(__name__)
-
-
-@with_activate_project
-@with_audit(
-    "execute_publisher",
-    lambda spec, transform_ctx, skip_verification=False: {
-        "name": spec.name,
-        "owner": spec.owner,
-        "depends_on": spec.depends_on,
-        "skipped_verification": skip_verification,
-        "gateway": transform_ctx.gateway,
-        "workspace": spec.workspace.name,
-        "project": spec.project.name,
-    },
-)
-def execute_publisher_specification(
-    spec: PublisherSpecification,
-    transform_ctx: sqlmesh.Context,
-    skip_verification: bool = False,
-) -> M.Result[t.Dict[str, t.Any], Exception]:
-    """Execute a publisher specification.
-
-    Args:
-        spec: The publisher specification to execute.
-        transform_ctx: The SQLMesh context to use for execution.
-        skip_verification: Whether to skip the verification of the publisher dependencies.
-    """
-    if not skip_verification:
-        models = transform_ctx.models
-        for dependency in spec.depends_on:
-            normalized_name = normalize_model_name(
-                dependency, transform_ctx.default_catalog, transform_ctx.default_dialect
-            )
-            if normalized_name not in models:
-                return M.error(
-                    ValueError(
-                        f"Cannot find tracked dependency {dependency} in models."
-                    )
-                )
-            model = models[normalized_name]
-            snapshot = transform_ctx.get_snapshot(normalized_name)
-            if not snapshot:
-                return M.error(ValueError(f"Snapshot not found for {normalized_name}"))
-            if snapshot.missing_intervals(
-                datetime.date.today() - datetime.timedelta(days=7),
-                datetime.date.today() - datetime.timedelta(days=1),
-            ):
-                return M.error(
-                    ValueError(f"Model {model} has missing intervals. Cannot publish.")
-                )
-            logger.info(f"Model {model} has no missing intervals.")
-        logger.info("All tracked dependencies passed interval check.")
-    else:
-        logger.warning("Skipping dependency verification.")
-    try:
-        return M.ok(spec())
-    except Exception as e:
-        logger.error(f"Error running publisher script {spec.path}: {e}")
-        return M.error(e)
-
-
-__all__ = ["execute_publisher_specification"]
diff --git a/src/cdf/legacy/runtime/script.py b/src/cdf/legacy/runtime/script.py
deleted file mode 100644
index 88bff6ac..00000000
--- a/src/cdf/legacy/runtime/script.py
+++ /dev/null
@@ -1,65 +0,0 @@
-"""The runtime script module is responsible for executing scripts from script specifications.
-
-It performs the following functions:
-- Executes the script.
-- Optionally captures stdout and returns it as a string.
-"""
-
-import io
-import typing as t
-from contextlib import nullcontext, redirect_stdout
-
-import cdf.legacy.logger as logger
-from cdf.legacy.runtime.common import with_activate_project
-from cdf.legacy.specification import ScriptSpecification
-from cdf.legacy.state import with_audit
-from cdf.types import M
-
-
-@t.overload
-def execute_script_specification(
-    spec: ScriptSpecification,
-    capture_stdout: bool = False,
-) -> M.Result[t.Dict[str, t.Any], Exception]: ...
-
-
-@t.overload
-def execute_script_specification(
-    spec: ScriptSpecification,
-    capture_stdout: bool = True,
-) -> M.Result[str, Exception]: ...
-
-
-@with_activate_project
-@with_audit(
-    "execute_script",
-    lambda spec, capture_stdout=False: {
-        "name": spec.name,
-        "owner": spec.owner,
-        "workspace": spec.workspace.name,
-        "project": spec.project.name,
-    },
-)
-def execute_script_specification(
-    spec: ScriptSpecification,
-    capture_stdout: bool = False,
-) -> t.Union[M.Result[t.Dict[str, t.Any], Exception], M.Result[str, Exception]]:
-    """Execute a script specification.
-
-    Args:
-        spec: The script specification to execute.
-        capture_stdout: Whether to capture stdout and return it. False returns an empty string.
-    """
-    try:
-        buf = io.StringIO()
-        maybe_redirect = redirect_stdout(buf) if capture_stdout else nullcontext()
-        logger.info(f"Running script {spec.path}")
-        with maybe_redirect:
-            exports = spec()
-        return M.ok(buf.getvalue() if capture_stdout else exports)  # type: ignore
-    except Exception as e:
-        logger.error(f"Error running script {spec.path}: {e}")
-        return M.error(e)
-
-
-__all__ = ["execute_script_specification"]
diff --git a/src/cdf/legacy/specification/__init__.py b/src/cdf/legacy/specification/__init__.py
deleted file mode 100644
index d35f5da8..00000000
--- a/src/cdf/legacy/specification/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import typing as t
-
-from cdf.legacy.specification.notebook import NotebookSpecification
-from cdf.legacy.specification.pipeline import PipelineSpecification
-from cdf.legacy.specification.publisher import PublisherSpecification
-from cdf.legacy.specification.script import ScriptSpecification
-from cdf.legacy.specification.sink import SinkSpecification
-
-CoreSpecification = t.Union[
-    NotebookSpecification,
-    PipelineSpecification,
-    PublisherSpecification,
-    ScriptSpecification,
-    SinkSpecification,
-]
-
-__all__ = [
-    "NotebookSpecification",
-    "PipelineSpecification",
-    "PublisherSpecification",
-    "ScriptSpecification",
-    "SinkSpecification",
-    "CoreSpecification",
-]
diff --git a/src/cdf/legacy/specification/base.py b/src/cdf/legacy/specification/base.py
deleted file mode 100644
index ca146040..00000000
--- a/src/cdf/legacy/specification/base.py
+++ /dev/null
@@ -1,523 +0,0 @@
-"""Base specification classes for continuous data framework components"""
-
-from __future__ import annotations
-
-import ast
-import importlib
-import inspect
-import operator
-import os
-import runpy
-import sys
-import time
-import typing as t
-from contextlib import nullcontext, suppress
-from pathlib import Path
-from threading import Lock
-
-import dlt
-import pydantic
-from croniter import croniter
-
-import cdf.legacy.constants as c
-import cdf.legacy.logger as logger
-
-if t.TYPE_CHECKING:
-    from cdf.legacy.project import Project, Workspace
-
-T = t.TypeVar("T")
-
-_NO_DESCRIPTION = "No description provided."
-"""A default description for components if not provided or parsed."""
-
-
-def _gen_anon_name() -> str:
-    """Generate an anonymous name for a component."""
-    return f"anon_{os.urandom(8).hex()}"
-
-
-def _getmodulename(name: str) -> str:
-    """Wraps inspect.getmodulename to ensure a module name is returned."""
-    rv = inspect.getmodulename(name)
-    return rv or name
-
-
-class BaseComponent(
-    pydantic.BaseModel, use_attribute_docstrings=True, from_attributes=True
-):
-    """
-    A component specification.
-
-    Components are the building blocks of a data platform. They declaratively describe
-    the functions within a workspace which extract, load, transform, and publish data.
-    """
-
-    name: t.Annotated[
-        str,
-        pydantic.Field(
-            ...,
-            default_factory=_gen_anon_name,
-            pattern=r"^[a-zA-Z0-9_\-\/]+$",
-            max_length=64,
-        ),
-    ]
-    """The name of the component. Must be unique within the workspace."""
-    version: t.Annotated[int, pydantic.Field(1, ge=1, le=999, frozen=True)] = 1
-    """The version of the component.
-
-    Used internally to version datasets and serves as an external signal to dependees that something
-    has changed in a breaking way. All components are versioned.
-    """
-    owner: t.Optional[str] = None
-    """The owners of the component."""
-    description: str = _NO_DESCRIPTION
-    """The description of the component.
-
-    This should help users understand the purpose of the component. For scripts and entrypoints, we
-    will attempt to extract the relevant docstring.
-    """
-    tags: t.List[str] = []
-    """Tags for this component used for component queries and integrations."""
-    enabled: bool = True
-    """Whether this component is enabled. Respected in cdf operations."""
-    meta: t.Dict[str, t.Any] = {}
-    """Arbitrary user-defined metadata for this component.
-
-    Used for user-specific integrations and automation.
-    """
-
-    _workspace: t.Optional["Workspace"] = None
-    """The workspace containing the component. Set by the workspace model validator."""
-
-    _generation: float = pydantic.PrivateAttr(default_factory=time.monotonic)
-    """The generation time of the component. Used for ordering components."""
-
-    def __eq__(self, other: t.Any) -> bool:
-        """Check if two components are equal."""
-        if not isinstance(other, BaseComponent):
-            return False
-        same_name_and_version = (
-            self.name == other.name and self.version == other.version
-        )
-        if self.has_workspace_association and other.has_workspace_association:
-            same_workspace = self.workspace.name == other.workspace.name
-            if (
-                self.workspace.has_project_association
-                and other.workspace.has_project_association
-            ):
-                same_project = (
-                    self.workspace.project.name == other.workspace.project.name
-                )
-                return same_name_and_version and same_workspace and same_project
-            return same_name_and_version and same_workspace
-        return same_name_and_version
-
-    def __hash__(self) -> int:
-        """Hash the component."""
-        if not self.has_workspace_association:
-            if self.workspace.has_project_association:
-                return hash(
-                    (
-                        self.workspace.project.name,
-                        self.workspace.name,
-                        self.name,
-                        self.version,
-                    )
-                )
-            return hash((self.workspace.name, self.name, self.version))
-        return hash((self.name, self.version))
-
-    @property
-    def workspace(self) -> "Workspace":
-        """Get the workspace containing the component."""
-        if self._workspace is None:
-            raise ValueError("Component not associated with a workspace.")
-        return self._workspace
-
-    @property
-    def has_workspace_association(self) -> bool:
-        """Check if the component has a workspace association."""
-        return self._workspace is not None
-
-    @property
-    def project(self) -> "Project":
-        """Get the project containing the component."""
-        return self.workspace.project
-
-    @property
-    def state_adapter(self) -> t.Any:
-        """Get the state adapter for the component."""
-        return self.workspace.state
-
-    @property
-    def versioned_name(self) -> str:
-        """Get the versioned name of the component."""
-        return f"{self.name}_v{self.version}"
-
-    @property
-    def owners(self) -> t.List[str]:
-        """Get the owners."""
-        if not self.owner:
-            return []
-        return self.owner.split(",")
-
-    @pydantic.field_validator("tags", mode="before")
-    @classmethod
-    def _tags_validator(cls, tags: t.Any) -> t.Sequence[str]:
-        """Wrap tags in a list."""
-        if isinstance(tags, str):
-            tags = tags.split(",")
-        return tags
-
-    @pydantic.field_validator("owner", mode="before")
-    @classmethod
-    def _owner_validator(cls, owner: t.Any) -> str:
-        """Ensure owner is a string."""
-        if isinstance(owner, (list, tuple)):
-            owner = ",".join(owner)
-        return owner
-
-    @pydantic.field_validator("description", mode="after")
-    @classmethod
-    def _description_validator(cls, description: str) -> str:
-        """Ensure the description has no leading whitespace."""
-        return inspect.cleandoc(description)
-
-    @pydantic.model_validator(mode="before")  # type: ignore
-    @classmethod
-    def _spec_validator(cls, data: t.Any) -> t.Any:
-        """Perform validation on the spec ensuring forward compatibility."""
-        if isinstance(data, dict):
-            owners = data.pop("owners", None)
-            if owners is not None:
-                data["owner"] = ",".join(owners)
-        return data
-
-    def __getitem__(self, key: str) -> t.Any:
-        """Get a field from the component."""
-        if key not in self.model_fields:
-            raise KeyError(f"No attribute {key} found in component {self.name}")
-        try:
-            return getattr(self, key)
-        except AttributeError as e:
-            raise KeyError(f"Attribute {key} not found in component {self.name}") from e
-
-
-class WorkspaceComponent(BaseComponent):
-    """A component within a workspace."""
-
-    component_path: t.Annotated[Path, pydantic.Field(alias="path", frozen=True)]
-    """The path to the component within the workspace folder."""
-    root_path: t.Annotated[Path, pydantic.Field(frozen=True, exclude=True)] = Path(".")
-    """The base path from which to resolve the component path.
-
-    This is typically the union of the project path and the workspace path but
-    for standalone components (components created programmatically outside the
-    context of the cdf taxonomy), it should be set to either the current working
-    directory (default) or the system root. It is excluded from serialization.
-    """
-
-    _folder: str = "."
-    """The folder within the workspace where components are stored."""
-    _extension: str = "py"
-    """The extension for components of this type."""
-
-    @property
-    def path(self) -> Path:
-        """Get the path to the component."""
-        return self.root_path / self.component_path
-
-    @pydantic.model_validator(mode="before")
-    @classmethod
-    def _path_from_name_validator(cls, values: t.Any) -> t.Any:
-        """Infer the path from the name if component_path is not provided.
-
-        Given a name, we apply certain heuristics to infer the path of the component if a
-        path is not explicitly provided. The heuristics are as follows:
-        - If the name ends with the component extension (.py), we use the name as the path.
-        - If the name does NOT end with the component extension, we append the component type
-          if not present. So a pipeline name like `darksky` would become `darksky_pipeline`.
-        - We then append the component extension and set the path. So `darksky_pipeline.py`
-
-        The _component_path_validator validator is uniformly responsible for prefixing the
-        folder name to the path.
-        """
-        if isinstance(values, (str, Path)):
-            values = {"path": values}
-        elif isinstance(values, dict):
-            name = values.get("name")
-            if not name:
-                return values
-            if name.endswith((".py", ".ipynb")):
-                values.setdefault("path", name)
-            else:
-                ext = getattr(cls._extension, "default")
-                typ = getattr(cls._folder, "default")[:-1]
-                if name.endswith(f"_{typ}"):
-                    p = f"{name}.{ext}"
-                else:
-                    p = f"{name}_{typ}.{ext}"
-                values.setdefault("path", p)
-        return values
-
-    @pydantic.field_validator("name", mode="before")
-    @classmethod
-    def _component_name_validator(cls, name: t.Any) -> t.Any:
-        """Strip the extension from the name."""
-        if isinstance(name, str):
-            return name.rsplit(".", 1)[0]
-        return name
-
-    @pydantic.field_validator("component_path", mode="before")
-    @classmethod
-    def _component_path_validator(cls, component_path: t.Any) -> Path:
-        """Ensure the component path is a Path and that its a child of the expected folder."""
-        path = Path(component_path)
-        if path.is_absolute():
-            raise ValueError("Component path must be a relative path.")
-        ns = getattr(cls._folder, "default")
-        if path.parts[0] != ns:
-            path = Path(ns) / path
-        return path
-
-
-class Schedulable(pydantic.BaseModel):
-    """A mixin for schedulable components."""
-
-    cron_string: t.Annotated[
-        str, pydantic.Field(serialization_alias="cron", frozen=True)
-    ] = "@daily"
-    """A cron expression for scheduling the primary action associated with the component.
-
-    This is intended to be leveraged by libraries like Airflow.
-    """
-
-    @property
-    def cron(self) -> t.Optional[croniter]:
-        """Get the croniter instance."""
-        if self.cron_string is None:
-            return None
-        return croniter(self.cron_string)  # TODO: add start time here based on last run
-
-    def next_run(self) -> t.Optional[int]:
-        """Get the next run time for the component."""
-        if self.cron is None:
-            return None
-        return self.cron.get_next()
-
-    def is_due(self) -> bool:
-        """Check if the component is due to run."""
-        if self.cron is None:
-            return False
-        return self.cron.get_next() <= self.cron.get_current()
-
-    @pydantic.field_validator("cron_string", mode="before")
-    @classmethod
-    def _cron_validator(cls, cron_string: t.Any) -> str:
-        """Ensure the cron expression is valid."""
-        if isinstance(cron_string, croniter):
-            return " ".join(cron_string.expressions)
-        elif isinstance(cron_string, str):
-            try:
-                croniter(cron_string)
-            except Exception as e:
-                raise ValueError(f"Invalid cron expression: {cron_string}") from e
-            else:
-                return cron_string
-        raise TypeError(
-            f"Invalid cron type: {type(cron_string)} is not str or croniter."
-        )
-
-
-class InstallableRequirements(pydantic.BaseModel):
-    """A mixin for components that support installation of requirements."""
-
-    requirements: t.Annotated[t.List[str], pydantic.Field(frozen=True)] = []
-    """The requirements for the component."""
-
-    @pydantic.field_validator("requirements", mode="before")
-    @classmethod
-    def _requirements_validator(cls, requirements: t.Any) -> t.Sequence[str]:
-        """Wrap requirements in a list."""
-        if isinstance(requirements, str):
-            requirements = requirements.split(",")
-        return requirements
-
-    def install_requirements(self) -> None:
-        """Install the component."""
-        if not self.requirements:
-            return
-        name = getattr(self, "name", self.__class__.__name__)
-        logger.info(f"Installing requirements for {name}: {self.requirements}")
-        try:
-            import pip
-        except ImportError:
-            raise ImportError(
-                "Pip was not found. Please install pip or recreate the virtual environment."
-            )
-        pip.main(["install", *self.requirements])
-
-
-class PythonScript(WorkspaceComponent, InstallableRequirements):
-    """A python script component."""
-
-    auto_install: bool = False
-    """Whether to automatically install the requirements for the script. 
-
-    Useful for leaner Docker images which defer certain component dep installs to runtime.
-    """
-
-    _lock: Lock = pydantic.PrivateAttr(default_factory=Lock)
-    """A lock for ensuring thread safety."""
-
-    @pydantic.model_validator(mode="after")
-    def _setup_script(self):
-        """Import the entrypoint and register the component."""
-        if self.name.startswith("anon_"):
-            self.name = self.name.replace("anon_", self.path.stem)
-        if self.description == _NO_DESCRIPTION:
-            tree = ast.parse(self.path.read_text())
-            with suppress(TypeError):
-                self.description = ast.get_docstring(tree) or _NO_DESCRIPTION
-        return self
-
-    def package(self, outputdir: str) -> None:
-        """Package the component."""
-        from pex.bin import pex
-
-        name = getattr(self, "name", self.__class__.__name__)
-        logger.info(f"Packaging {name}...")
-
-        output = os.path.join(outputdir, f"{name}.pex")
-        try:
-            # --inject-env in pex can add the c.CDF_MAIN variable?
-            # or really any other variable that should be injected
-            pex.main(["-o", output, ".", *self.requirements])
-        except SystemExit as e:
-            # A failed pex build will exit with a non-zero code
-            # Successfully built pexes will exit with either 0 or None
-            if e.code is not None and e.code != 0:
-                # If the pex fails to build, delete the compromised pex
-                with suppress(FileNotFoundError):
-                    os.remove(output)
-                raise
-
-    @property
-    def main(self) -> t.Callable[[], t.Dict[str, t.Any]]:
-        """Get the entrypoint function."""
-
-        def _run() -> t.Any:
-            """Run the script"""
-            origpath = sys.path[:]
-            sys.path = [
-                str(self.root_path),
-                *sys.path,
-                str(self.root_path.parent),
-            ]
-            parts = map(
-                _getmodulename,
-                self.path.relative_to(self.root_path).parts,
-            )
-            run_name = ".".join(parts)
-            if self.has_workspace_association:
-                workspace_context = self.workspace.inject_configuration()
-            else:
-                workspace_context = nullcontext()
-            try:
-                with self._lock, workspace_context:
-                    maybe_log_level = dlt.config.get("runtime.log_level", str)
-                    if maybe_log_level:
-                        logger.set_level(maybe_log_level.upper())
-                    if self.auto_install:
-                        self.install_requirements()
-                    return runpy.run_path(
-                        str(self.path),
-                        run_name=run_name,
-                        init_globals={
-                            "__file__": str(self.path),
-                            c.CDF_MAIN: run_name,
-                        },
-                    )
-            except SystemExit as e:
-                if e.code != 0:
-                    raise
-                return {}
-            except Exception as e:
-                logger.exception(f"Error running script {self.name}: {e}")
-                raise
-            finally:
-                sys.path = origpath
-
-        return _run
-
-    def __call__(self) -> t.Dict[str, t.Any]:
-        """Run the script."""
-        return self.main()
-
-
-class PythonEntrypoint(BaseComponent, InstallableRequirements):
-    """A python entrypoint component."""
-
-    entrypoint: t.Annotated[
-        str,
-        pydantic.Field(
-            ...,
-            frozen=True,
-            pattern=r"^[a-zA-Z][a-zA-Z0-9_\.]*:[a-zA-Z][a-zA-Z0-9_\.]*$",
-        ),
-    ]
-    """The entrypoint of the component in the format module:func."""
-
-    @pydantic.model_validator(mode="after")
-    def _setup_entrypoint(self):
-        """Import the entrypoint and register the component."""
-        if self.name.startswith("anon_"):
-            mod, func = self.entrypoint.split(":", 1)
-            self.name = mod.replace(".", "_") + "_" + func.replace(".", "_")
-        if self.description == _NO_DESCRIPTION:
-            with logger.suppress_and_warn():
-                self.description = self.main(__return_func=1).__doc__ or _NO_DESCRIPTION
-        return self
-
-    @property
-    def main(self) -> t.Callable[..., t.Any]:
-        """Get the entrypoint function."""
-        module, func = self.entrypoint.split(":")
-
-        def _run(*args: t.Any, **kwargs: t.Any) -> t.Any:
-            """Execute the entrypoint."""
-            if self.has_workspace_association:
-                workspace_context = self.workspace.inject_configuration()
-            else:
-                workspace_context = nullcontext()
-            with workspace_context:
-                mod = importlib.import_module(module)
-                fn = operator.attrgetter(func)(mod)
-                if kwargs.pop("__return_func", 0):
-                    return fn
-                return fn(*args, **kwargs)
-
-        return _run
-
-    def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
-        """Run the entrypoint."""
-        return self.main(*args, **kwargs)
-
-
-class CanExecute(t.Protocol):
-    """A protocol specifying the minimum interface executable components satisfy."""
-
-    @property
-    def main(self) -> t.Callable[..., t.Any]: ...
-
-    def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any: ...
-
-
-__all__ = [
-    "BaseComponent",
-    "Schedulable",
-    "PythonScript",
-    "PythonEntrypoint",
-    "WorkspaceComponent",
-    "CanExecute",
-]
diff --git a/src/cdf/legacy/specification/notebook.py b/src/cdf/legacy/specification/notebook.py
deleted file mode 100644
index 2d738d9d..00000000
--- a/src/cdf/legacy/specification/notebook.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import typing as t
-from threading import Lock
-
-import pydantic
-
-from cdf.legacy.specification.base import InstallableRequirements, WorkspaceComponent
-
-
-class NotebookSpecification(WorkspaceComponent, InstallableRequirements):
-    """A sink specification."""
-
-    storage_path: t.Optional[str] = None
-    """The path to write the output notebook to for long term storage. 
-
-    Uses the configured Project fs provider. This may be gcs, s3, etc.
-
-    This is a format string which will be formatted with the following variables:
-    - name: The name of the notebook.
-    - date: The current date.
-    - timestamp: An ISO formatted timestamp.
-    - epoch: The current epoch time.
-    - params: A dict of the resolved parameters passed to the notebook.
-    """
-
-    parameters: t.Dict[str, t.Any] = {}
-    """Parameters to pass to the notebook when running."""
-    gc_duration: int = 86400 * 3
-    """The duration in seconds to keep the locally rendered notebook in the `_rendered` folder.
-
-    Rendered notebooks are written to the `_rendered` folder of the notebook's parent directory.
-    That folder is not intended to be a permanent storage location. This setting controls how long
-    rendered notebooks are kept before being garbage collected. The default is 3 days. Set to 0 to
-    clean up immediately after execution. Set to -1 to never clean up.
-    """
-
-    _folder: str = "notebooks"
-    """The folder where notebooks are stored."""
-    _extension: str = "ipynb"
-    """The default extension for notebooks."""
-
-    _lock: Lock = pydantic.PrivateAttr(default_factory=Lock)
-    """A lock to ensure the notebook is thread safe."""
-
-
-__all__ = ["NotebookSpecification"]
diff --git a/src/cdf/legacy/specification/pipeline.py b/src/cdf/legacy/specification/pipeline.py
deleted file mode 100644
index 5393d991..00000000
--- a/src/cdf/legacy/specification/pipeline.py
+++ /dev/null
@@ -1,223 +0,0 @@
-"""The spec classes for continuous data framework pipelines."""
-
-import atexit
-import decimal
-import fnmatch
-import time
-import typing as t
-
-import dlt
-import pydantic
-from dlt.common.destination.exceptions import DestinationLoadingViaStagingNotSupported
-from dlt.common.typing import TDataItem
-
-import cdf.legacy.logger as logger
-from cdf.legacy.specification.base import PythonEntrypoint, PythonScript, Schedulable
-
-T = t.TypeVar("T")
-TPipeline = t.TypeVar("TPipeline", bound=dlt.Pipeline)
-
-Metric = t.Union[float, int, decimal.Decimal]
-MetricStateContainer = t.MutableMapping[str, t.MutableMapping[str, Metric]]
-
-
-class MetricInterface(t.Protocol):
-    def __call__(
-        self, item: TDataItem, metric: t.Optional[t.Any] = None, /
-    ) -> Metric: ...
-
-
-class PipelineMetricSpecification(PythonEntrypoint):
-    """Defines metrics which can be captured during pipeline execution"""
-
-    options: t.Dict[str, t.Any] = {}
-    """Kwargs to pass to the metric function.
-
-    This assumes the metric is a callable which accepts kwargs and returns a metric
-    interface. If the metric is not parameterized, this should be left empty.
-    """
-
-    @property
-    def func(self) -> MetricInterface:
-        """A typed property to return the metric function"""
-        if self.options:
-            return self.main(**self.options)
-        return self.main
-
-    def __call__(
-        self, resource: dlt.sources.DltResource, state: MetricStateContainer
-    ) -> None:
-        """Adds a metric aggregator to a resource"""
-        func = self.func
-        first = True
-        resource_name = resource.name
-        metric_name = self.name
-        elapsed = 0.0
-
-        def _aggregator(item: T) -> T:
-            nonlocal first, elapsed
-            compstart = time.perf_counter()
-            if first:
-                state[resource_name][metric_name] = func(item)
-                first = False
-                return item
-            state[resource_name][metric_name] = func(
-                item,
-                state[resource_name][metric_name],
-            )
-            compend = time.perf_counter()
-            elapsed += compend - compstart
-            return item
-
-        state.setdefault(resource_name, {})
-        resource.add_map(_aggregator)
-
-        def _timing_stats():
-            logger.debug(
-                f"Collecting metric {metric_name} for {resource_name} took {elapsed} seconds"
-            )
-
-        atexit.register(_timing_stats)
-
-
-InlineMetricSpecifications = t.Dict[str, t.List[PipelineMetricSpecification]]
-"""Mapping of resource name glob patterns to metric specs"""
-
-
-class FilterInterface(t.Protocol):
-    def __call__(self, item: TDataItem) -> bool: ...
-
-
-class PipelineFilterSpecification(PythonEntrypoint):
-    """Defines filters which can be applied to pipeline execution"""
-
-    options: t.Dict[str, t.Any] = {}
-    """Kwargs to pass to the filter function. 
-
-    This assumes the filter is a callable which accepts kwargs and returns a filter
-    interface. If the filter is already a filter interface, this should be left empty.
-    """
-
-    @property
-    def func(self) -> FilterInterface:
-        """A typed property to return the filter function"""
-        if self.options:
-            return self.main(**self.options)
-        return self.main
-
-    def __call__(self, resource: dlt.sources.DltResource) -> None:
-        """Adds a filter to a resource"""
-        resource.add_filter(self.func)
-
-
-InlineFilterSpecifications = t.Dict[str, t.List[PipelineFilterSpecification]]
-"""Mapping of resource name glob patterns to filter specs"""
-
-
-class PipelineSpecification(PythonScript, Schedulable):
-    """A pipeline specification."""
-
-    metrics: InlineMetricSpecifications = {}
-    """A dict of resource name glob patterns to metric definitions.
-
-    Metrics are captured on a per resource basis during pipeline execution and are
-    accumulated into the metric_state dict. The metric definitions are callables that
-    take the current item and the current metric value and return the new metric value.
-    """
-    filters: InlineFilterSpecifications = {}
-    """A dict of resource name glob patterns to filter definitions.
-
-    Filters are applied on a per resource basis during pipeline execution. The filter
-    definitions are callables that take the current item and return a boolean indicating
-    whether the item should be filtered out.
-    """
-    dataset_name: str = "{name}_v{version}"
-    """The name of the dataset associated with the pipeline.
-
-    Defaults to the versioned name. This string is formatted with the pipeline name, version, meta, and tags.
-    """
-    options: t.Dict[str, t.Any] = {}
-    """Options available in pipeline scoped dlt config resolution."""
-    persist_extract_package: bool = True
-    """Whether to persist the extract package in the project filesystem."""
-
-    _folder = "pipelines"
-    """The folder where pipeline scripts are stored."""
-
-    @pydantic.model_validator(mode="after")
-    def _validate_dataset(self: "PipelineSpecification") -> "PipelineSpecification":
-        """Validate the dataset name and apply formatting."""
-        name = self.dataset_name.format(
-            name=self.name, version=self.version, meta=self.meta, tags=self.tags
-        ).strip()
-        self.dataset_name = name or self.versioned_name
-        return self
-
-    def inject_metrics_and_filters(
-        self, source: dlt.sources.DltSource, container: MetricStateContainer
-    ) -> dlt.sources.DltSource:
-        """Apply metrics and filters defined by the specification to a source.
-
-        For a source to conform to the specification, it must have this method applied to it. You
-        can manipulate sources without this method, but the metrics and filters will not be applied.
-
-        Args:
-            source: The source to apply metrics and filters to.
-            container: The container to store metric state in. This is mutated during execution.
-
-        Returns:
-            dlt.sources.DltSource: The source with metrics and filters applied.
-        """
-        for resource in source.selected_resources.values():
-            for patt, metric in self.metrics.items():
-                if fnmatch.fnmatch(resource.name, patt):
-                    for applicator in metric:
-                        applicator(resource, container)
-            for patt, filter_ in self.filters.items():
-                if fnmatch.fnmatch(resource.name, patt):
-                    for applicator in filter_:
-                        applicator(resource)
-        return source
-
-    def create_pipeline(
-        self,
-        klass: t.Type[TPipeline] = dlt.Pipeline,
-        /,
-        **kwargs: t.Any,
-    ) -> TPipeline:
-        """Convert the pipeline specification to a dlt pipeline object.
-
-        This is a convenience method to create a dlt pipeline object from the specification. The
-        dlt pipeline is expected to use the name and dataset name from the specification. This
-        is what allows declarative definitions to be associated with runtime artifacts.
-
-        Args:
-            klass (t.Type[TPipeline], optional): The pipeline class to use. Defaults to dlt.Pipeline.
-            **kwargs: Additional keyword arguments to pass to the dlt.pipeline constructor.
-
-        Returns:
-            TPipeline: The dlt pipeline object.
-        """
-        try:
-            pipe = dlt.pipeline(
-                pipeline_name=self.name,
-                dataset_name=self.dataset_name,
-                **kwargs,
-                _impl_cls=klass,
-            )
-        except DestinationLoadingViaStagingNotSupported:
-            logger.warning(
-                "Destination does not support loading via staging. Disabling staging."
-            )
-            kwargs.pop("staging", None)
-            pipe = dlt.pipeline(
-                pipeline_name=self.name,
-                dataset_name=self.dataset_name,
-                **kwargs,
-                _impl_cls=klass,
-            )
-        setattr(pipe, "specification", self)
-        return pipe
-
-
-__all__ = ["PipelineSpecification"]
diff --git a/src/cdf/legacy/specification/publisher.py b/src/cdf/legacy/specification/publisher.py
deleted file mode 100644
index 7ab5292c..00000000
--- a/src/cdf/legacy/specification/publisher.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import typing as t
-
-from cdf.legacy.specification.base import PythonScript, Schedulable
-
-
-class PublisherSpecification(PythonScript, Schedulable):
-    """A publisher specification."""
-
-    depends_on: t.List = []
-    """The dependencies of the publisher expressed as fully qualified names of SQLMesh tables."""
-
-    _folder = "publishers"
-    """The folder where publisher scripts are stored."""
-
-
-__all__ = ["PublisherSpecification"]
diff --git a/src/cdf/legacy/specification/script.py b/src/cdf/legacy/specification/script.py
deleted file mode 100644
index 7e3e78bc..00000000
--- a/src/cdf/legacy/specification/script.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from cdf.legacy.specification.base import PythonScript, Schedulable
-
-
-class ScriptSpecification(PythonScript, Schedulable):
-    """A script specification."""
-
-    _folder = "scripts"
-    """The folder where generic scripts are stored."""
-
-
-__all__ = ["ScriptSpecification"]
diff --git a/src/cdf/legacy/specification/sink.py b/src/cdf/legacy/specification/sink.py
deleted file mode 100644
index 3c36501a..00000000
--- a/src/cdf/legacy/specification/sink.py
+++ /dev/null
@@ -1,70 +0,0 @@
-import typing as t
-
-from dlt.common.destination.reference import Destination
-from sqlmesh.core.config import GatewayConfig
-
-from cdf.legacy.specification.base import PythonScript
-from cdf.legacy.state import with_audit
-
-
-class SinkSpecification(PythonScript):
-    """A sink specification."""
-
-    ingest_config: str = "ingest"
-    """The variable which holds the ingest configuration (a dlt destination)."""
-    stage_config: str = "stage"
-    """The variable which holds the staging configuration (a dlt destination)."""
-    transform_config: str = "transform"
-    """The variable which holds the transform configuration (a sqlmesh config)."""
-
-    _exports: t.Optional[t.Dict[str, t.Any]] = None
-    """Caches the exports from the sink script."""
-
-    _folder: str = "sinks"
-    """The folder where sink scripts are stored."""
-
-    @property
-    def main(self) -> t.Callable[..., t.Dict[str, t.Any]]:
-        """Run the sink script."""
-        loader = t.cast(t.Callable[..., t.Dict[str, t.Any]], super().main)
-        return with_audit(
-            "load_sink",
-            lambda self=self: {
-                "name": self.name,
-                "owner": self.owner,
-                "workspace": self.workspace.name,
-                "project": self.project.name,
-            },
-        )(loader)
-
-    def get_ingest_config(
-        self,
-    ) -> t.Tuple[Destination, t.Optional[Destination]]:
-        """Get the ingest configuration."""
-        if self._exports is None:
-            self._exports = self.main()
-        return self._exports[self.ingest_config], self._exports.get(self.stage_config)
-
-    def get_transform_config(self) -> GatewayConfig:
-        """Get the transform configuration."""
-        if self._exports is None:
-            self._exports = self.main()
-        return GatewayConfig.model_validate(self._exports[self.transform_config])
-
-    @property
-    def ingest(self) -> Destination:
-        """The ingest destination."""
-        return self.get_ingest_config()[0]
-
-    @property
-    def stage(self) -> t.Optional[Destination]:
-        """The stage destination."""
-        return self.get_ingest_config()[1]
-
-    @property
-    def transform(self) -> GatewayConfig:
-        """The transform configuration."""
-        return self.get_transform_config()
-
-
-__all__ = ["SinkSpecification"]
diff --git a/src/cdf/legacy/state.py b/src/cdf/legacy/state.py
deleted file mode 100644
index b54b8180..00000000
--- a/src/cdf/legacy/state.py
+++ /dev/null
@@ -1,407 +0,0 @@
-"""The state module is responible for providing an adapter through which we can persist data"""
-
-import json
-import time
-import typing as t
-from datetime import timedelta, timezone
-
-import pandas as pd
-import pydantic
-from dlt.common.pipeline import ExtractInfo, LoadInfo, NormalizeInfo
-from sqlglot import exp
-from sqlmesh.core.config.connection import (
-    DuckDBConnectionConfig,
-    MySQLConnectionConfig,
-    PostgresConnectionConfig,
-)
-from sqlmesh.core.engine_adapter import EngineAdapter
-
-import cdf.legacy.logger as logger
-from cdf.legacy.context import active_project, execution_id
-from cdf.types import M, P
-
-T = t.TypeVar("T")
-JSON = t.Union[bool, int, float, str, t.List["JSON"], t.Dict[str, "JSON"]]
-
-KV_SCHEMA = {"key": exp.DataType.build("TEXT"), "value": exp.DataType.build("TEXT")}
-"""The schema for the key-value store"""
-
-_PIPELINE_SCHEMA = {
-    "load_id": exp.DataType.build("TEXT"),
-    "timestamp": exp.DataType.build("INT64"),
-    "pipeline": exp.DataType.build("TEXT"),
-    "dataset": exp.DataType.build("TEXT"),
-    "destination_name": exp.DataType.build("TEXT"),
-    "destination_type": exp.DataType.build("TEXT"),
-    "data": exp.DataType.build("TEXT"),
-    "success": exp.DataType.build("BOOLEAN"),
-    "elapsed": exp.DataType.build("FLOAT"),
-    "execution_id": exp.DataType.build("TEXT"),
-}
-
-EXTRACT_SCHEMA = _PIPELINE_SCHEMA.copy()
-"""The schema for the extract store"""
-NORMALIZE_SCHEMA = _PIPELINE_SCHEMA.copy()
-"""The schema for the normalize store"""
-LOAD_SCHEMA = _PIPELINE_SCHEMA.copy()
-"""The schema for the load store"""
-
-AUDIT_SCHEMA = {
-    "event": exp.DataType.build("TEXT"),
-    "timestamp": exp.DataType.build("INT64"),
-    "elapsed": exp.DataType.build("FLOAT"),
-    "success": exp.DataType.build("BOOLEAN"),
-    "properties": exp.DataType.build("TEXT"),
-    "execution_id": exp.DataType.build("TEXT"),
-}
-"""The schema for the audit store"""
-
-
-def _no_props(*args: t.Any, **kwargs: t.Any) -> t.Dict[str, JSON]:
-    """Empty properties"""
-    return {}
-
-
-class StateStore(pydantic.BaseModel):
-    """The state store is responsible for persisting data"""
-
-    model_config = {"frozen": True, "from_attributes": True}
-
-    schema_: t.Annotated[str, pydantic.Field(alias="schema")] = "cdf_state"
-    """The schema in which to store data"""
-    protected: bool = True
-    """Whether the state store is protected, i.e. should never be torn down
-
-    A safety measure to prevent accidental data loss when users are consuming the cdf API
-    directly. This should be set to False when running tests or you know what you're doing.
-    """
-
-    connection: t.Union[
-        DuckDBConnectionConfig,
-        MySQLConnectionConfig,
-        PostgresConnectionConfig,
-    ] = DuckDBConnectionConfig(database=".cdf.db")
-    """The connection configuration to the state store"""
-
-    _adapter: t.Optional[EngineAdapter] = None
-    """Lazy loaded adapter to the state store"""
-
-    @property
-    def kv_table(self) -> str:
-        """The key-value table name"""
-        return f"{self.schema_}.json_store"
-
-    @property
-    def extract_table(self) -> str:
-        """The extract table name"""
-        return f"{self.schema_}.extract_store"
-
-    @property
-    def normalize_table(self) -> str:
-        """The normalize table name"""
-        return f"{self.schema_}.normalize_store"
-
-    @property
-    def load_table(self) -> str:
-        """The load table name"""
-        return f"{self.schema_}.load_store"
-
-    @property
-    def audit_table(self) -> str:
-        """The audit table name"""
-        return f"{self.schema_}.audit_store"
-
-    @property
-    def adapter(self) -> EngineAdapter:
-        """The adapter to the state store"""
-        if self._adapter is None:
-            adapter = self.connection.create_engine_adapter()
-            adapter.create_schema(self.schema_)
-            adapter.create_state_table(self.kv_table, KV_SCHEMA)
-            adapter.create_state_table(self.extract_table, EXTRACT_SCHEMA)
-            adapter.create_state_table(self.normalize_table, NORMALIZE_SCHEMA)
-            adapter.create_state_table(self.load_table, LOAD_SCHEMA)
-            adapter.create_state_table(self.audit_table, AUDIT_SCHEMA)
-            self._adapter = adapter
-        return self._adapter
-
-    def _execute(self, sql: str) -> None:
-        """Execute a SQL statement"""
-        self.adapter.execute(sql)
-
-    def store_json(self, key: str, value: JSON) -> None:
-        """Store a JSON value"""
-        with self.adapter.transaction(value is not None), logger.suppress_and_warn():
-            self.adapter.delete_from(self.kv_table, f"key = '{key}'")
-            if value is not None:
-                self.adapter.insert_append(
-                    self.kv_table,
-                    pd.DataFrame([{"key": key, "value": json.dumps(value)}]),
-                )
-
-    def load_json(self, key: str) -> JSON:
-        """Load a JSON value"""
-        return json.loads(
-            self.adapter.fetchone(
-                exp.select("value").from_(self.kv_table).where(f"key = '{key}'")
-            )[0]
-        )
-
-    __getitem__ = load_json
-    __setitem__ = store_json
-
-    def __enter__(self, condition: bool = True) -> "StateStore":
-        """Proxies to the transaction context manager"""
-        self.__trans = self.adapter.transaction(condition)
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback) -> None:
-        """Proxies to the transaction context manager"""
-        self.__trans.__exit__(exc_type, exc_value, traceback)
-
-    def __del__(self) -> None:
-        """Close the connection to the state store"""
-        if self._adapter is not None:
-            self.adapter.close()
-
-    def with_audit(
-        self,
-        event: str,
-        input_props: t.Union[t.Callable[P, JSON], t.Dict[str, JSON]] = _no_props,
-        output_props: t.Callable[[T], t.Dict[str, JSON]] = _no_props,
-    ) -> t.Callable[[t.Callable[P, T]], t.Callable[P, T]]:
-        """Decorator to add audit logging to a function
-
-        Args:
-            event (str): The event name
-            input_props (Union[Callable[[P], JSON], Dict[str, JSON], optional): A callable that takes the function arguments
-                and returns a dictionary of properties to log. Alternatively, static props are accepted as a dictionary.
-            output_props (Callable[[T], Dict[str, JSON], optional): A callable that takes the function return value
-                and returns a dictionary of properties to log.
-        """
-
-        def decorator(func: t.Callable[P, T]) -> t.Callable[P, T]:
-            def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
-                audit_event = {
-                    "event": event,
-                    "timestamp": time.time(),
-                    "elapsed": 0,
-                    "success": False,
-                    "properties": (
-                        input_props(*args, **kwargs)
-                        if callable(input_props)
-                        else input_props
-                    ),
-                    "execution_id": execution_id.get(),
-                }
-                start = time.perf_counter()
-                try:
-                    rv = func(*args, **kwargs)
-                except Exception as e:
-                    audit_event["elapsed"] = time.perf_counter() - start
-                    with self.adapter.transaction(), logger.suppress_and_warn():
-                        self.adapter.insert_append(
-                            self.audit_table,
-                            pd.DataFrame([audit_event]),
-                        )
-                    raise e
-                audit_event["elapsed"] = time.perf_counter() - start
-                audit_event["success"] = not isinstance(rv, M.Err)
-                audit_event["properties"].update(output_props(rv))
-                audit_event["properties"] = json.dumps(audit_event["properties"])
-                with self.adapter.transaction(), logger.suppress_and_warn():
-                    self.adapter.insert_append(
-                        self.audit_table,
-                        pd.DataFrame([audit_event]),
-                    )
-                return rv
-
-            return wrapper
-
-        return decorator
-
-    def audit(
-        self, event: str, success: bool = True, elapsed: float = 0.0, **properties: JSON
-    ) -> None:
-        """Audit an event"""
-        payload = {
-            "event": event,
-            "timestamp": time.time(),
-            "elapsed": elapsed,
-            "success": success,
-            "properties": json.dumps(properties),
-            "execution_id": execution_id.get(),
-        }
-        with self.adapter.transaction(), logger.suppress_and_warn():
-            self.adapter.insert_append(
-                self.audit_table,
-                pd.DataFrame([payload]),
-            )
-
-    def fetch_audits(
-        self, *event_names: str, limit: int = 100, failed_only: bool = False
-    ):
-        """List all audit events"""
-        assert limit > 0 and limit < 1000, "Limit must be between 1 and 1000"
-        q = (
-            exp.select("*")
-            .from_(self.audit_table)
-            .order_by("timestamp DESC")
-            .limit(limit)
-        )
-        if failed_only:
-            q = q.where("success = false")
-        if event_names:
-            q = q.where(f"event IN {tuple(event_names)}")
-        df = self.adapter.fetchdf(q).sort_values("timestamp", ascending=True)
-        df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
-        localtz = timezone(timedelta(seconds=-time.timezone))
-        df["timestamp"] = df["timestamp"].dt.tz_convert(localtz)
-        return df
-
-    def clear_audits(self):
-        """Clear all audit events"""
-        self.adapter.delete_from(self.audit_table, "1 = 1")
-
-    def capture_extract_info(self, info: ExtractInfo) -> None:
-        """Capture extract information"""
-        d = self._info_to_payload(info)
-        if not d:
-            return
-        with self.adapter.transaction(), logger.suppress_and_warn():
-            self.adapter.insert_append(self.extract_table, pd.DataFrame(d))
-
-    def capture_normalize_info(self, info: NormalizeInfo) -> None:
-        """Capture normalize information"""
-        d = self._info_to_payload(info)
-        if not d:
-            return
-        with self.adapter.transaction(), logger.suppress_and_warn():
-            self.adapter.insert_append(self.normalize_table, pd.DataFrame(d))
-
-    def capture_load_info(self, info: LoadInfo) -> None:
-        """Capture load information"""
-        d = self._info_to_payload(info)
-        if not d:
-            return
-        with self.adapter.transaction(), logger.suppress_and_warn():
-            self.adapter.insert_append(self.load_table, pd.DataFrame(d))
-
-    @staticmethod
-    def _info_to_payload(
-        info: t.Union[ExtractInfo, NormalizeInfo, LoadInfo],
-    ) -> t.List[t.Dict[str, t.Any]]:
-        """Convert an info object to a payload"""
-        payload = []
-        for pkg in info.load_packages:
-            payload.append(
-                {
-                    "load_id": pkg.load_id,
-                    "timestamp": int(time.time()),
-                    "pipeline": info.pipeline.pipeline_name,
-                    "dataset": info.pipeline.dataset_name,
-                    "destination_name": info.pipeline.destination.destination_name,
-                    "destination_type": info.pipeline.destination.destination_type,
-                    "data": json.dumps(pkg.asdict(), default=str),
-                    "success": pkg.state != "aborted",
-                    "elapsed": sum(
-                        [j.elapsed for k in pkg.jobs.keys() for j in pkg.jobs[k]]
-                    ),
-                    "execution_id": execution_id.get(),
-                }
-            )
-        return payload
-
-    def fetch_extracted(
-        self, *load_ids: str, limit: int = 100, failed_only: bool = False
-    ):
-        """List all extracted data"""
-        assert limit > 0 and limit < 1000, "Limit must be between 1 and 1000"
-        q = (
-            exp.select("*")
-            .from_(self.extract_table)
-            .order_by("timestamp DESC")
-            .limit(limit)
-        )
-        if failed_only:
-            q = q.where("success = false")
-        if load_ids:
-            q = q.where(f"load_id IN {tuple(load_ids)}")
-        df = self.adapter.fetchdf(q).sort_values("timestamp", ascending=True)
-        df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
-        localtz = timezone(timedelta(seconds=-time.timezone))
-        df["timestamp"] = df["timestamp"].dt.tz_convert(localtz)
-        return df
-
-    def fetch_normalized(
-        self, *load_ids: str, limit: int = 100, failed_only: bool = False
-    ):
-        """List all normalized data"""
-        assert limit > 0 and limit < 1000, "Limit must be between 1 and 1000"
-        q = (
-            exp.select("*")
-            .from_(self.normalize_table)
-            .order_by("timestamp DESC")
-            .limit(limit)
-        )
-        if failed_only:
-            q = q.where("success = false")
-        if load_ids:
-            q = q.where(f"load_id IN {tuple(load_ids)}")
-        df = self.adapter.fetchdf(q).sort_values("timestamp", ascending=True)
-        df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
-        localtz = timezone(timedelta(seconds=-time.timezone))
-        df["timestamp"] = df["timestamp"].dt.tz_convert(localtz)
-        return df
-
-    def fetch_loaded(self, *load_ids: str, limit: int = 100, failed_only: bool = False):
-        """List all loaded data"""
-        assert limit > 0 and limit < 1000, "Limit must be between 1 and 1000"
-        q = (
-            exp.select("*")
-            .from_(self.load_table)
-            .order_by("timestamp DESC")
-            .limit(limit)
-        )
-        if failed_only:
-            q = q.where("success = false")
-        if load_ids:
-            q = q.where(f"load_id IN {tuple(load_ids)}")
-        df = self.adapter.fetchdf(q)
-        df["timestamp"] = pd.to_datetime(df["timestamp"], unit="s", utc=True)
-        localtz = timezone(timedelta(seconds=-time.timezone))
-        df["timestamp"] = df["timestamp"].dt.tz_convert(localtz)
-        return df
-
-
-def with_audit(
-    event: str,
-    input_props: t.Union[t.Callable[P, JSON], t.Dict[str, JSON]] = _no_props,
-    output_props: t.Callable[[T], t.Dict[str, JSON]] = _no_props,
-) -> t.Callable[[t.Callable[P, T]], t.Callable[P, T]]:
-    """Decorator to add audit logging to a function given an active project"""
-
-    def decorator(func: t.Callable[P, T]) -> t.Callable[P, T]:
-        def wrapper(*args: P.args, **kwargs: P.kwargs) -> T:
-            project = active_project.get(None)
-            if project is None:
-                return func(*args, **kwargs)
-            return project.state.with_audit(
-                event,
-                input_props,
-                output_props,
-            )(func)(*args, **kwargs)
-
-        return wrapper
-
-    return decorator
-
-
-def audit(
-    event: str, success: bool = True, elapsed: float = 0.0, **properties: JSON
-) -> None:
-    """Audit an event given an active project"""
-    properties.setdefault("execution_id", execution_id.get())
-    project = active_project.get(None)
-    if project is not None:
-        project.state.audit(event, success, elapsed, **properties)
diff --git a/src/cdf/legacy/utility/__init__.py b/src/cdf/legacy/utility/__init__.py
deleted file mode 100644
index c279ee7d..00000000
--- a/src/cdf/legacy/utility/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import typing as t
-from operator import itemgetter
-
-TDict = t.TypeVar("TDict", bound=t.Dict[str, t.Any])
-
-
-def find_item(
-    lst: t.List[TDict], key: t.Union[t.Callable[[TDict], t.Any], str], value: t.Any
-) -> TDict:
-    """Find an item in a list by a key-value pair.
-
-    Example:
-        >>> find_item([{"name": "Alice"}, {"name": "Bob"}], "name", "Bob")
-        {"name": "Bob"}
-
-    Args:
-        lst: The list to search.
-        key: The key function to extract the value from an item or the key name.
-        value: The value to find.
-
-    Returns:
-        The item with the matching value.
-    """
-    fn = itemgetter(key) if isinstance(key, str) else key
-    return next((item for item in lst if fn(item) == value))
diff --git a/src/cdf/legacy/utility/file.py b/src/cdf/legacy/utility/file.py
deleted file mode 100644
index 6808c0d8..00000000
--- a/src/cdf/legacy/utility/file.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import json
-import typing as t
-from pathlib import Path
-
-import ruamel.yaml as yaml
-import tomlkit
-
-from cdf.types import M
-
-
-def load_file(path: Path) -> M.Result[t.Dict[str, t.Any], Exception]:
-    """Load a configuration from a file path.
-
-    Args:
-        path: The file path.
-
-    Returns:
-        A Result monad with the configuration dictionary if the file format is JSON, YAML or TOML.
-        Otherwise, a Result monad with an error.
-    """
-    if path.suffix == ".json":
-        return _load_json(path)
-    if path.suffix in (".yaml", ".yml"):
-        return _load_yaml(path)
-    if path.suffix == ".toml":
-        return _load_toml(path)
-    return M.error(ValueError("Invalid file format, must be JSON, YAML or TOML"))
-
-
-def _load_json(path: Path) -> M.Result[t.Dict[str, t.Any], Exception]:
-    """Load a configuration from a JSON file.
-
-    Args:
-        path: The file path to a valid JSON document.
-
-    Returns:
-        A Result monad with the configuration dictionary if the file format is JSON. Otherwise, a
-        Result monad with an error.
-    """
-    try:
-        return M.ok(json.loads(path.read_text()))
-    except Exception as e:
-        return M.error(e)
-
-
-def _load_yaml(path: Path) -> M.Result[t.Dict[str, t.Any], Exception]:
-    """Load a configuration from a YAML file.
-
-    Args:
-        path: The file path to a valid YAML document.
-
-    Returns:
-        A Result monad with the configuration dictionary if the file format is YAML. Otherwise, a
-        Result monad with an error.
-    """
-    try:
-        return M.ok(yaml.round_trip_load(path, preserve_quotes=True))
-    except Exception as e:
-        return M.error(e)
-
-
-def _load_toml(path: Path) -> M.Result[t.Dict[str, t.Any], Exception]:
-    """Load a configuration from a TOML file.
-
-    Args:
-        path: The file path to a valid TOML document.
-
-    Returns:
-        A Result monad with the configuration dictionary if the file format is TOML. Otherwise, a
-        Result monad with an error.
-    """
-    try:
-        return M.ok(tomlkit.loads(path.read_text()).unwrap())
-    except Exception as e:
-        return M.error(e)
diff --git a/tests/legacy/specification/test_notebook.py b/tests/legacy/specification/test_notebook.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/specification/test_pipeline.py b/tests/legacy/specification/test_pipeline.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/specification/test_publisher.py b/tests/legacy/specification/test_publisher.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/specification/test_script.py b/tests/legacy/specification/test_script.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/specification/test_sink.py b/tests/legacy/specification/test_sink.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/test_context.py b/tests/legacy/test_context.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/test_filesystem.py b/tests/legacy/test_filesystem.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/test_packaging.py b/tests/legacy/test_packaging.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/legacy/test_project.py b/tests/legacy/test_project.py
deleted file mode 100644
index 86e00f16..00000000
--- a/tests/legacy/test_project.py
+++ /dev/null
@@ -1,268 +0,0 @@
-"""Tests for the core.project module."""
-
-from pathlib import Path
-
-import dlt
-import pytest
-
-from cdf.legacy.project import Project, load_project
-
-
-def test_load_project():
-    """Test the load_project function."""
-    project = load_project("examples/sandbox")
-    assert project.is_ok()
-
-    err_monad = load_project("examples/idontexist")
-    with pytest.raises(FileNotFoundError):
-        err_monad.unwrap()
-
-    project = project.unwrap()
-    assert isinstance(project, Project)
-
-
-@pytest.fixture
-def project():
-    """Load the project for testing."""
-    return load_project("examples/sandbox").unwrap()
-
-
-def test_project_indexing(project: Project):
-    """Ensure the project can be indexed.
-
-    The project is a dictionary-like object. It exposes its own configuration,
-    and it exposes workspaces through the `workspaces` key. Dot notation is also
-    supported. Dunder methods like `__contains__` and `__len__` apply to
-    the workspace collection. The project is read-only. Indexing into a Workspace
-    object will invoke the workspace's __getitem__ method which also supports dot
-    notation. Hence the project is a tree-like structure.
-    """
-    assert project["name"] == "cdf-example"
-    assert project["version"] == "0.1.0"
-    assert project["feature_flags.provider"] == "filesystem"
-    assert project["workspaces.alex"] is project.get_workspace("alex").unwrap()
-    assert len(project) == 1
-    assert len(project["workspaces.alex.pipelines"]) == 3
-    assert "alex" in project
-    assert "jane" not in project
-    with pytest.raises(KeyError):
-        project["workspaces.jane"]
-    with pytest.raises(NotImplementedError):
-        del project["name"]
-    assert list(project)[0] is project["workspaces.alex"]
-    assert project["workspaces.alex.pipelines.us_cities.version"] == 1
-
-
-def test_project_get_spec(project: Project):
-    """Ensure the project can get a spec and that we get the same spec each time."""
-    spec = (
-        project.get_workspace("alex")
-        .bind(lambda workspace: workspace.get_pipeline_spec("us_cities"))
-        .unwrap()
-    )
-    assert spec["name"] == "us_cities"
-    assert callable(spec)
-    assert spec is (
-        project.get_workspace("alex")
-        .bind(lambda workspace: workspace.get_pipeline_spec("us_cities"))
-        .unwrap()
-    )
-
-
-def test_inject_configuration(project: Project):
-    """Ensure keys are persisted while injecting configuration."""
-    with project.inject_configuration():
-        assert dlt.config["something"] == "ok"
-        dlt.config["other"] = "cool"
-        assert dlt.config["other"] == "cool"
-        dlt.secrets["ok.nice.cool"] = "wow"
-        assert dlt.secrets["ok.nice.cool"] == "wow"
-
-
-def test_round_trip_serialization(project: Project):
-    """Test that the project can be serialized and deserialized."""
-    obj = project.model_dump()
-    roundtrip = Project.model_validate(obj)
-    assert roundtrip == project
-    assert roundtrip.is_newer_than(project)
-    assert (
-        project["workspaces.alex.scripts.nested/hello"]
-        == roundtrip["workspaces.alex.scripts.nested/hello"]
-    )
-
-
-def test_init_ff(project: Project):
-    """Test that the feature flag adapter is initialized."""
-    assert project.ff_adapter is not None
-    assert project.ff.provider == "filesystem"
-    # The example project _storage is not committed to git currently
-    # assert project.ff_adapter["cdf-example.alex.us_cities.us_cities"].to_bool() is True
-
-
-def test_init_fs(project: Project):
-    """Test that the filesystem adapter is initialized."""
-    assert project.fs_adapter is not None
-    assert project.fs_adapter.protocol == "cdf"
-
-
-def test_init_state(project: Project):
-    """Test that the state adapter is initialized."""
-    # from sqlglot import exp
-
-    adapter = project.state.adapter
-    assert adapter is not None
-    # adapter.create_schema("test")
-    # adapter.create_table("test1", {"name": exp.DataType.build("text")})
-    # assert adapter.table_exists("test1")
-    # project.state.store_json("test", {"name": "alex"})
-    # adapter.close()
-
-
-@pytest.fixture
-def python_project():
-    city_spec = {
-        "path": Path("pipelines/us_cities_pipeline.py"),
-        "cron_string": "@daily",
-        "description": "Get US city data",
-        "metrics": {
-            "*": [
-                {
-                    "name": "cdf_builtin_metrics_count",
-                    "description": "Counts the number of items in a dataset",
-                    "entrypoint": "cdf.builtin.metrics:count",
-                },
-                {
-                    "name": "cdf_builtin_metrics_max_value",
-                    "description": "Returns the maximum value of a key in a dataset",
-                    "entrypoint": "cdf.builtin.metrics:max_value",
-                    "options": {"key": "zip_code"},
-                },
-            ]
-        },
-        "filters": {},
-        "dataset_name": "test_city",
-        "options": {
-            "progress": None,
-            "full_refresh": False,
-            "loader_file_format": "insert_values",
-            "runtime": {"dlthub_telemetry": False},
-        },
-    }
-    dota_spec = {
-        "cron_string": "@daily",
-        "name": "dota2",
-        "description": "Dota2 is a Massive Online Battle Arena game based on Warcraft.",
-        "path": Path("pipelines/dota2_pipeline.py"),
-    }
-    local_spec = {
-        "name": "local",
-        "description": "No description provided.",
-        "path": Path("sinks/local_sink.py"),
-    }
-    httpbin_spec = {
-        "cron_string": "@daily",
-        "name": "httpbin",
-        "description": "A publisher that pushes data to httpbin.org",
-        "path": Path("publishers/httpbin_publisher.py"),
-        "depends_on": ["mart.zips"],
-    }
-    hello_spec = {
-        "cron_string": "@daily",
-        "name": "hello",
-        "description": "No description provided.",
-        "path": Path("scripts/hello_script.py"),
-    }
-    return Project.model_validate(
-        {
-            "path": Path("examples/sandbox").resolve(),
-            "name": "data-platform",
-            "version": "0.2.0",
-            "workspaces": {
-                "datateam": {
-                    "path": "alex",
-                    "pipelines": {"cities": city_spec, "dota": dota_spec},
-                    "sinks": {"local": local_spec},
-                    "publishers": {"httpbin": httpbin_spec},
-                    "scripts": {"hello": hello_spec},
-                }
-            },
-            "filesystem": {"uri": "file://_storage", "options": {}},
-            "feature_flags": {
-                "provider": "filesystem",
-                "filename": "@jinja dev_flags_{{ 1 + 1}}.json",
-            },
-        }
-    )
-
-
-def test_custom_project(python_project: Project):
-    """Test creating a project programmatically.
-
-    This project has a custom structure and is not loaded from a file. Components
-    are still ultimately based on python files, however the configuration wrapping
-    these components is done in code which offers more flexibility.
-    """
-    assert python_project.name == "data-platform"
-
-
-@pytest.fixture
-def barebones_project():
-    return Project.model_validate(
-        {
-            "path": "examples/sandbox",
-            "name": "data-platform",
-            "version": "0.2.0",
-            "workspaces": {
-                "datateam": {
-                    "path": "alex",
-                    "pipelines": {
-                        "cities": "pipelines/us_cities_pipeline.py",
-                        "dota": {"path": "pipelines/dota2_pipeline.py"},
-                    },
-                    "sinks": {"local": "sinks/local_sink.py"},
-                    "publishers": {
-                        "httpbin": {
-                            "path": "publishers/httpbin_publisher.py",
-                            "depends_on": ["mart.zips"],
-                        }
-                    },
-                    "scripts": {"hello": "scripts/hello_script.py"},
-                }
-            },
-        }
-    )
-
-
-def test_barebones_project(barebones_project: Project):
-    """Test creating a project programmatically with minimal configuration.
-
-    This asserts that certain heuristics are applied to the configuration to
-    make it more user-friendly.
-    """
-    assert barebones_project.name == "data-platform"
-    assert barebones_project["workspaces.datateam.pipelines.cities"] is not None
-    assert barebones_project["workspaces.datateam.publishers.httpbin.depends_on"] == [
-        "mart.zips"
-    ]
-    assert barebones_project["workspaces.datateam.sinks.local.component_path"] == Path(
-        "sinks/local_sink.py"
-    )
-    assert barebones_project[
-        "workspaces.datateam.scripts.hello.component_path"
-    ] == Path("scripts/hello_script.py")
-    assert barebones_project[
-        "workspaces.datateam.pipelines.cities.component_path"
-    ] == Path("pipelines/us_cities_pipeline.py")
-    assert len(barebones_project["workspaces.datateam.pipelines"]) == 2
-    assert len(barebones_project["workspaces.datateam.sinks"]) == 1
-    assert len(barebones_project["workspaces.datateam.publishers"]) == 1
-    assert len(barebones_project["workspaces.datateam.scripts"]) == 1
-    assert len(barebones_project["workspaces.datateam"]) == 5
-    assert len(barebones_project) == 1
-    assert "datateam" in barebones_project
-    assert "jane" not in barebones_project
-    with pytest.raises(KeyError):
-        barebones_project["workspaces.jane"]
-    with pytest.raises(NotImplementedError):
-        del barebones_project["name"]
-    assert list(barebones_project)[0] is barebones_project["workspaces.datateam"]
diff --git a/tests/legacy/utility/test_file_.py b/tests/legacy/utility/test_file_.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/tests/test_cli.py b/tests/test_cli.py
deleted file mode 100644
index c9a51307..00000000
--- a/tests/test_cli.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from typer.testing import CliRunner
-
-from cdf.cli import app
-
-runner = CliRunner()
-
-
-def test_index():
-    result = runner.invoke(app, ["-p", "examples/sandbox", "-w", "alex", "index"])
-    assert result.exit_code == 0
-    assert "Pipelines" in result.stdout
-    assert "Sinks" in result.stdout