From f3bc82a31da17715472d3cca4e815c71017cc430 Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Tue, 3 Sep 2024 15:17:04 +0100
Subject: [PATCH 1/5] fix: compatiblity with Python3.8 for Ibis (#906)

---
 .github/workflows/pytest.yml             |  2 +-
 narwhals/dependencies.py                 | 32 ++++++------
 tests/frame/arrow_c_stream_test.py       |  9 ++++
 tests/frame/interchange_schema_test.py   | 65 +++++++++++++++++-------
 tests/series_only/arrow_c_stream_test.py |  9 ++++
 5 files changed, 81 insertions(+), 36 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index ee5ba37ec..265442e9f 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -25,7 +25,7 @@ jobs:
         if: runner.os == 'Windows'
         run: powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
       - name: install-reqs
-        run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt --system
+        run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt ibis-framework[duckdb] --system
       - name: show-deps
         run: uv pip freeze
       - name: Run pytest
diff --git a/narwhals/dependencies.py b/narwhals/dependencies.py
index 24b435858..2cd9f0983 100644
--- a/narwhals/dependencies.py
+++ b/narwhals/dependencies.py
@@ -83,79 +83,79 @@ def get_ibis() -> Any:
 
 def is_pandas_dataframe(df: Any) -> TypeGuard[pd.DataFrame]:
     """Check whether `df` is a pandas DataFrame without importing pandas."""
-    return bool((pd := get_pandas()) is not None and isinstance(df, pd.DataFrame))
+    return (pd := get_pandas()) is not None and isinstance(df, pd.DataFrame)
 
 
 def is_pandas_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
     """Check whether `ser` is a pandas Series without importing pandas."""
-    return bool((pd := get_pandas()) is not None and isinstance(ser, pd.Series))
+    return (pd := get_pandas()) is not None and isinstance(ser, pd.Series)
 
 
 def is_modin_dataframe(df: Any) -> TypeGuard[mpd.DataFrame]:
     """Check whether `df` is a modin DataFrame without importing modin."""
-    return bool((pd := get_modin()) is not None and isinstance(df, pd.DataFrame))
+    return (pd := get_modin()) is not None and isinstance(df, pd.DataFrame)
 
 
 def is_modin_series(ser: Any) -> TypeGuard[mpd.Series]:
     """Check whether `ser` is a modin Series without importing modin."""
-    return bool((pd := get_modin()) is not None and isinstance(ser, pd.Series))
+    return (pd := get_modin()) is not None and isinstance(ser, pd.Series)
 
 
 def is_cudf_dataframe(df: Any) -> TypeGuard[cudf.DataFrame]:
     """Check whether `df` is a cudf DataFrame without importing cudf."""
-    return bool((pd := get_cudf()) is not None and isinstance(df, pd.DataFrame))
+    return (pd := get_cudf()) is not None and isinstance(df, pd.DataFrame)
 
 
 def is_cudf_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
     """Check whether `ser` is a cudf Series without importing cudf."""
-    return bool((pd := get_cudf()) is not None and isinstance(ser, pd.Series))
+    return (pd := get_cudf()) is not None and isinstance(ser, pd.Series)
 
 
 def is_dask_dataframe(df: Any) -> TypeGuard[dd.DataFrame]:
     """Check whether `df` is a Dask DataFrame without importing Dask."""
-    return bool((dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame))
+    return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)
 
 
 def is_duckdb_relation(df: Any) -> TypeGuard[duckdb.DuckDBPyRelation]:
     """Check whether `df` is a DuckDB Relation without importing DuckDB."""
-    return bool(
-        (duckdb := get_duckdb()) is not None and isinstance(df, duckdb.DuckDBPyRelation)
+    return (duckdb := get_duckdb()) is not None and isinstance(
+        df, duckdb.DuckDBPyRelation
     )
 
 
 def is_ibis_table(df: Any) -> TypeGuard[ibis.Table]:
     """Check whether `df` is a Ibis Table without importing Ibis."""
-    return bool((ibis := get_ibis()) is not None and isinstance(df, ibis.Table))
+    return (ibis := get_ibis()) is not None and isinstance(df, ibis.expr.types.Table)
 
 
 def is_polars_dataframe(df: Any) -> TypeGuard[pl.DataFrame]:
     """Check whether `df` is a Polars DataFrame without importing Polars."""
-    return bool((pl := get_polars()) is not None and isinstance(df, pl.DataFrame))
+    return (pl := get_polars()) is not None and isinstance(df, pl.DataFrame)
 
 
 def is_polars_lazyframe(df: Any) -> TypeGuard[pl.LazyFrame]:
     """Check whether `df` is a Polars LazyFrame without importing Polars."""
-    return bool((pl := get_polars()) is not None and isinstance(df, pl.LazyFrame))
+    return (pl := get_polars()) is not None and isinstance(df, pl.LazyFrame)
 
 
 def is_polars_series(ser: Any) -> TypeGuard[pl.Series]:
     """Check whether `ser` is a Polars Series without importing Polars."""
-    return bool((pl := get_polars()) is not None and isinstance(ser, pl.Series))
+    return (pl := get_polars()) is not None and isinstance(ser, pl.Series)
 
 
 def is_pyarrow_chunked_array(ser: Any) -> TypeGuard[pa.ChunkedArray]:
     """Check whether `ser` is a PyArrow ChunkedArray without importing PyArrow."""
-    return bool((pa := get_pyarrow()) is not None and isinstance(ser, pa.ChunkedArray))
+    return (pa := get_pyarrow()) is not None and isinstance(ser, pa.ChunkedArray)
 
 
 def is_pyarrow_table(df: Any) -> TypeGuard[pa.Table]:
     """Check whether `df` is a PyArrow Table without importing PyArrow."""
-    return bool((pa := get_pyarrow()) is not None and isinstance(df, pa.Table))
+    return (pa := get_pyarrow()) is not None and isinstance(df, pa.Table)
 
 
 def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]:
     """Check whether `arr` is a NumPy Array without importing NumPy."""
-    return bool((np := get_numpy()) is not None and isinstance(arr, np.ndarray))
+    return (np := get_numpy()) is not None and isinstance(arr, np.ndarray)
 
 
 def is_pandas_like_dataframe(df: Any) -> bool:
diff --git a/tests/frame/arrow_c_stream_test.py b/tests/frame/arrow_c_stream_test.py
index 7a3403f69..cb856adf9 100644
--- a/tests/frame/arrow_c_stream_test.py
+++ b/tests/frame/arrow_c_stream_test.py
@@ -10,6 +10,9 @@
 @pytest.mark.skipif(
     parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
 )
+@pytest.mark.skipif(
+    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+)
 def test_arrow_c_stream_test() -> None:
     df = nw.from_native(pl.Series([1, 2, 3]).to_frame("a"), eager_only=True)
     result = pa.table(df)
@@ -20,6 +23,9 @@ def test_arrow_c_stream_test() -> None:
 @pytest.mark.skipif(
     parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
 )
+@pytest.mark.skipif(
+    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+)
 def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
     # "poison" the dunder method to make sure it actually got called above
     monkeypatch.setattr(
@@ -33,6 +39,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
 @pytest.mark.skipif(
     parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
 )
+@pytest.mark.skipif(
+    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+)
 def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
     # Check that fallback to PyArrow works
     monkeypatch.delattr("polars.DataFrame.__arrow_c_stream__")
diff --git a/tests/frame/interchange_schema_test.py b/tests/frame/interchange_schema_test.py
index a73ca6259..afec06831 100644
--- a/tests/frame/interchange_schema_test.py
+++ b/tests/frame/interchange_schema_test.py
@@ -8,6 +8,7 @@
 import pytest
 
 import narwhals.stable.v1 as nw
+from narwhals.utils import parse_version
 
 
 def test_interchange_schema() -> None:
@@ -67,7 +68,10 @@ def test_interchange_schema() -> None:
     assert df["a"].dtype == nw.Int64
 
 
-def test_interchange_schema_ibis() -> None:  # pragma: no cover
+@pytest.mark.filterwarnings("ignore:.*locale specific date formats")
+def test_interchange_schema_ibis(
+    tmpdir: pytest.TempdirFactory,
+) -> None:  # pragma: no cover
     ibis = pytest.importorskip("ibis")
     df_pl = pl.DataFrame(
         {
@@ -105,26 +109,49 @@ def test_interchange_schema_ibis() -> None:  # pragma: no cover
             "o": pl.Boolean,
         },
     )
-    tbl = ibis.memtable(df_pl)
+    filepath = str(tmpdir / "file.parquet")  # type: ignore[operator]
+    df_pl.write_parquet(filepath)
+    tbl = ibis.read_parquet(filepath)
     df = nw.from_native(tbl, eager_or_interchange_only=True)
     result = df.schema
-    expected = {
-        "a": nw.Int64,
-        "b": nw.Int32,
-        "c": nw.Int16,
-        "d": nw.Int8,
-        "e": nw.UInt64,
-        "f": nw.UInt32,
-        "g": nw.UInt16,
-        "h": nw.UInt8,
-        "i": nw.Float64,
-        "j": nw.Float32,
-        "k": nw.String,
-        "l": nw.String,
-        "m": nw.Date,
-        "n": nw.Datetime,
-        "o": nw.Boolean,
-    }
+    if parse_version(ibis.__version__) > (6, 0, 0):
+        expected = {
+            "a": nw.Int64,
+            "b": nw.Int32,
+            "c": nw.Int16,
+            "d": nw.Int8,
+            "e": nw.UInt64,
+            "f": nw.UInt32,
+            "g": nw.UInt16,
+            "h": nw.UInt8,
+            "i": nw.Float64,
+            "j": nw.Float32,
+            "k": nw.String,
+            "l": nw.String,
+            "m": nw.Date,
+            "n": nw.Datetime,
+            "o": nw.Boolean,
+        }
+    else:
+        # Old versions of Ibis would read the file in
+        # with different data types
+        expected = {
+            "a": nw.Int64,
+            "b": nw.Int32,
+            "c": nw.Int16,
+            "d": nw.Int32,
+            "e": nw.Int32,
+            "f": nw.Int32,
+            "g": nw.Int32,
+            "h": nw.Int32,
+            "i": nw.Float64,
+            "j": nw.Float64,
+            "k": nw.String,
+            "l": nw.String,
+            "m": nw.Date,
+            "n": nw.Datetime,
+            "o": nw.Boolean,
+        }
     assert result == expected
     assert df["a"].dtype == nw.Int64
 
diff --git a/tests/series_only/arrow_c_stream_test.py b/tests/series_only/arrow_c_stream_test.py
index 9964d7408..9d2ebc8d0 100644
--- a/tests/series_only/arrow_c_stream_test.py
+++ b/tests/series_only/arrow_c_stream_test.py
@@ -10,6 +10,9 @@
 @pytest.mark.skipif(
     parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
 )
+@pytest.mark.skipif(
+    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+)
 def test_arrow_c_stream_test() -> None:
     s = nw.from_native(pl.Series([1, 2, 3]), series_only=True)
     result = pa.chunked_array(s)
@@ -20,6 +23,9 @@ def test_arrow_c_stream_test() -> None:
 @pytest.mark.skipif(
     parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
 )
+@pytest.mark.skipif(
+    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+)
 def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
     # "poison" the dunder method to make sure it actually got called above
     monkeypatch.setattr("narwhals.series.Series.__arrow_c_stream__", lambda *_: 1 / 0)
@@ -31,6 +37,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
 @pytest.mark.skipif(
     parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
 )
+@pytest.mark.skipif(
+    parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
+)
 def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
     # Check that fallback to PyArrow works
     monkeypatch.delattr("polars.Series.__arrow_c_stream__")

From 100735229e90aaed83f2e470b243573a36e02a2b Mon Sep 17 00:00:00 2001
From: Marco Edward Gorelli <marcogorelli@protonmail.com>
Date: Tue, 3 Sep 2024 15:22:38 +0100
Subject: [PATCH 2/5] release: Bump version to 1.6.2 (#907)

---
 docs/installation.md | 2 +-
 narwhals/__init__.py | 2 +-
 pyproject.toml       | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/installation.md b/docs/installation.md
index 38da4db9f..796cd8708 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
 ```python
 >>> import narwhals
 >>> narwhals.__version__
-'1.6.1'
+'1.6.2'
 ```
 then installation worked correctly!
diff --git a/narwhals/__init__.py b/narwhals/__init__.py
index 0cd5cb8a0..56c638e84 100644
--- a/narwhals/__init__.py
+++ b/narwhals/__init__.py
@@ -53,7 +53,7 @@
 from narwhals.utils import maybe_get_index
 from narwhals.utils import maybe_set_index
 
-__version__ = "1.6.1"
+__version__ = "1.6.2"
 
 __all__ = [
     "dependencies",
diff --git a/pyproject.toml b/pyproject.toml
index 262c7cdc5..a279280bf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 
 [project]
 name = "narwhals"
-version = "1.6.1"
+version = "1.6.2"
 authors = [
   { name="Marco Gorelli", email="33491632+MarcoGorelli@users.noreply.github.com" },
 ]

From 9eb5f46aa58672763d3309537416eef1bdfb2273 Mon Sep 17 00:00:00 2001
From: Isaias Gutierrez-Cruz
 <64386035+IsaiasGutierrezCruz@users.noreply.github.com>
Date: Wed, 4 Sep 2024 09:47:58 -0600
Subject: [PATCH 3/5] ci: add tests for the queries of TPC-H  (#899)

---
 .github/workflows/check_tpch_queries.yml | 30 ++++++++++++++++++++++++
 pyproject.toml                           |  1 +
 requirements-dev.txt                     |  1 +
 tpch/tests/__init__.py                   |  0
 tpch/tests/test_queries.py               | 29 +++++++++++++++++++++++
 5 files changed, 61 insertions(+)
 create mode 100644 .github/workflows/check_tpch_queries.yml
 create mode 100644 tpch/tests/__init__.py
 create mode 100644 tpch/tests/test_queries.py

diff --git a/.github/workflows/check_tpch_queries.yml b/.github/workflows/check_tpch_queries.yml
new file mode 100644
index 000000000..397163091
--- /dev/null
+++ b/.github/workflows/check_tpch_queries.yml
@@ -0,0 +1,30 @@
+name: Tests for TPCH Queries
+
+on:
+  pull_request:
+    types: [labeled]
+
+jobs:
+  validate-queries:
+    if: ${{ github.event.label.name == 'full-test' }}
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+        os: [ubuntu-latest]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install uv 
+        run: curl -LsSf https://astral.sh/uv/install.sh | sh
+      - name: install-reqs
+        run: uv pip install --upgrade -r requirements-dev.txt --system
+      - name: local-install
+        run: uv pip install -e . --system
+      - name: generate-data
+        run: cd tpch && python generate_data.py
+      - name: tpch-tests 
+        run: python -m unittest discover -s 'tpch/tests'
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index a279280bf..b3a2a0c28 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -76,6 +76,7 @@ lint.ignore = [
 
 [tool.ruff.lint.per-file-ignores]
 "tests/*" = ["S101"]
+"tpch/tests/*" = ["S101"]
 "utils/*" = ["S311", "PTH123"]
 "tpch/execute/*" = ["T201"]
 
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 213fcdcb8..23ff1757e 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -1,3 +1,4 @@
+tqdm
 covdefaults
 duckdb
 pandas
diff --git a/tpch/tests/__init__.py b/tpch/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tpch/tests/test_queries.py b/tpch/tests/test_queries.py
new file mode 100644
index 000000000..4b7cdd866
--- /dev/null
+++ b/tpch/tests/test_queries.py
@@ -0,0 +1,29 @@
+import os
+import subprocess
+import sys
+import unittest
+from pathlib import Path
+
+
+class TestQueries(unittest.TestCase):
+    def test_execute_scripts(self) -> None:
+        root = Path(__file__).resolve().parent.parent
+        # directory containing all the queries
+        execute_dir = root / "execute"
+
+        env = os.environ.copy()
+        env["PYTHONPATH"] = str(root)
+
+        for script_path in execute_dir.glob("q[1-9]*.py"):
+            result = subprocess.run(  # noqa: S603
+                [sys.executable, str(script_path)],
+                capture_output=True,
+                text=True,
+                env=env,
+                cwd=root,
+                check=False,
+                shell=False,
+            )
+            assert (
+                result.returncode == 0
+            ), f"Script {script_path} failed with error: {result.stderr}"

From 69da133bbf86906b7787c95db403fcd4a626e78a Mon Sep 17 00:00:00 2001
From: Zhengbo Wang <lw7205675@gmail.com>
Date: Wed, 4 Sep 2024 23:50:39 +0800
Subject: [PATCH 4/5] feat: Add more queries of tpch (#898)

---
 tpch/execute/__init__.py | 30 +++++++++++++++++++++++
 tpch/execute/q1.py       | 23 ++----------------
 tpch/execute/q10.py      | 25 ++++----------------
 tpch/execute/q11.py      | 23 ++++--------------
 tpch/execute/q15.py      | 21 +++++++++++++++++
 tpch/execute/q17.py      | 21 +++++++++++++++++
 tpch/execute/q18.py      | 22 +++++++++++++++++
 tpch/execute/q19.py      | 17 ++++++++++++++
 tpch/execute/q2.py       | 32 +++++--------------------
 tpch/execute/q20.py      | 20 ++++++++++++++++
 tpch/execute/q21.py      | 19 +++++++++++++++
 tpch/execute/q3.py       | 25 ++++----------------
 tpch/execute/q4.py       | 21 +++--------------
 tpch/execute/q5.py       | 29 ++++++-----------------
 tpch/execute/q6.py       | 15 ++----------
 tpch/execute/q7.py       | 27 +++++++++++++++++++++
 tpch/execute/q9.py       | 35 +++++++++++++++++++++++++++
 tpch/queries/q15.py      | 33 ++++++++++++++++++++++++++
 tpch/queries/q17.py      | 23 ++++++++++++++++++
 tpch/queries/q18.py      | 31 ++++++++++++++++++++++++
 tpch/queries/q19.py      | 39 ++++++++++++++++++++++++++++++
 tpch/queries/q20.py      | 43 +++++++++++++++++++++++++++++++++
 tpch/queries/q21.py      | 43 +++++++++++++++++++++++++++++++++
 tpch/queries/q6.py       |  4 ----
 tpch/queries/q7.py       | 51 ++++++++++++++++++++++++++++++++++++++++
 tpch/queries/q9.py       | 36 ++++++++++++++++++++++++++++
 26 files changed, 544 insertions(+), 164 deletions(-)
 create mode 100644 tpch/execute/q15.py
 create mode 100644 tpch/execute/q17.py
 create mode 100644 tpch/execute/q18.py
 create mode 100644 tpch/execute/q19.py
 create mode 100644 tpch/execute/q20.py
 create mode 100644 tpch/execute/q21.py
 create mode 100644 tpch/execute/q7.py
 create mode 100644 tpch/execute/q9.py
 create mode 100644 tpch/queries/q15.py
 create mode 100644 tpch/queries/q17.py
 create mode 100644 tpch/queries/q18.py
 create mode 100644 tpch/queries/q19.py
 create mode 100644 tpch/queries/q20.py
 create mode 100644 tpch/queries/q21.py
 create mode 100644 tpch/queries/q7.py
 create mode 100644 tpch/queries/q9.py

diff --git a/tpch/execute/__init__.py b/tpch/execute/__init__.py
index e69de29bb..e0c448649 100644
--- a/tpch/execute/__init__.py
+++ b/tpch/execute/__init__.py
@@ -0,0 +1,30 @@
+from pathlib import Path
+
+import dask.dataframe as dd
+import pandas as pd
+import polars as pl
+import pyarrow.parquet as pq
+
+pd.options.mode.copy_on_write = True
+pd.options.future.infer_string = True
+
+lineitem = Path("data") / "lineitem.parquet"
+region = Path("data") / "region.parquet"
+nation = Path("data") / "nation.parquet"
+supplier = Path("data") / "supplier.parquet"
+part = Path("data") / "part.parquet"
+partsupp = Path("data") / "partsupp.parquet"
+orders = Path("data") / "orders.parquet"
+customer = Path("data") / "customer.parquet"
+line_item = Path("data") / "lineitem.parquet"
+
+IO_FUNCS = {
+    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
+    "pandas[pyarrow]": lambda x: pd.read_parquet(
+        x, engine="pyarrow", dtype_backend="pyarrow"
+    ),
+    "polars[eager]": lambda x: pl.read_parquet(x),
+    "polars[lazy]": lambda x: pl.scan_parquet(x),
+    "pyarrow": lambda x: pq.read_table(x),
+    "dask": lambda x: dd.read_parquet(x, engine="pyarrow", dtype_backend="pyarrow"),
+}
diff --git a/tpch/execute/q1.py b/tpch/execute/q1.py
index dd839b292..9889c3af0 100644
--- a/tpch/execute/q1.py
+++ b/tpch/execute/q1.py
@@ -1,26 +1,7 @@
-from pathlib import Path
-
-import dask.dataframe as dd
-import pandas as pd
-import polars as pl
-import pyarrow.parquet as pq
 from queries import q1
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-lineitem = Path("data") / "lineitem.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-    "pyarrow": lambda x: pq.read_table(x),
-    "dask": lambda x: dd.read_parquet(x, engine="pyarrow", dtype_backend="pyarrow"),
-}
+from . import IO_FUNCS
+from . import lineitem
 
 print(q1.query(IO_FUNCS["pandas[pyarrow]"](lineitem)))
 print(q1.query(IO_FUNCS["polars[lazy]"](lineitem)).collect())
diff --git a/tpch/execute/q10.py b/tpch/execute/q10.py
index 19e2e7ce0..9876f2aa9 100644
--- a/tpch/execute/q10.py
+++ b/tpch/execute/q10.py
@@ -1,25 +1,10 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q10
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-customer = Path("data") / "customer.parquet"
-nation = Path("data") / "nation.parquet"
-lineitem = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import nation
+from . import orders
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q11.py b/tpch/execute/q11.py
index 55161ae6b..82b1936aa 100644
--- a/tpch/execute/q11.py
+++ b/tpch/execute/q11.py
@@ -1,24 +1,9 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q11
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-nation = Path("data") / "nation.parquet"
-partsupp = Path("data") / "partsupp.parquet"
-supplier = Path("data") / "supplier.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import nation
+from . import partsupp
+from . import supplier
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q15.py b/tpch/execute/q15.py
new file mode 100644
index 000000000..8fdaf2ab1
--- /dev/null
+++ b/tpch/execute/q15.py
@@ -0,0 +1,21 @@
+from queries import q15
+
+from . import IO_FUNCS
+from . import lineitem
+from . import supplier
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q15.query(fn(lineitem), fn(supplier)).collect())
diff --git a/tpch/execute/q17.py b/tpch/execute/q17.py
new file mode 100644
index 000000000..5f2228012
--- /dev/null
+++ b/tpch/execute/q17.py
@@ -0,0 +1,21 @@
+from queries import q17
+
+from . import IO_FUNCS
+from . import lineitem
+from . import part
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q17.query(fn(lineitem), fn(part)).collect())
diff --git a/tpch/execute/q18.py b/tpch/execute/q18.py
new file mode 100644
index 000000000..5a59f0e5e
--- /dev/null
+++ b/tpch/execute/q18.py
@@ -0,0 +1,22 @@
+from queries import q18
+
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import orders
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(q18.query(fn(customer), fn(lineitem), fn(orders)).collect())
diff --git a/tpch/execute/q19.py b/tpch/execute/q19.py
new file mode 100644
index 000000000..87467064c
--- /dev/null
+++ b/tpch/execute/q19.py
@@ -0,0 +1,17 @@
+from queries import q19
+
+from . import IO_FUNCS
+from . import lineitem
+from . import part
+
+fn = IO_FUNCS["pandas"]
+print(q19.query(fn(lineitem), fn(part)))
+
+fn = IO_FUNCS["pandas[pyarrow]"]
+print(q19.query(fn(lineitem), fn(part)))
+
+fn = IO_FUNCS["polars[eager]"]
+print(q19.query(fn(lineitem), fn(part)))
+
+fn = IO_FUNCS["polars[lazy]"]
+print(q19.query(fn(lineitem), fn(part)).collect())
diff --git a/tpch/execute/q2.py b/tpch/execute/q2.py
index 22a7f4317..cd82a9047 100644
--- a/tpch/execute/q2.py
+++ b/tpch/execute/q2.py
@@ -1,31 +1,11 @@
-from pathlib import Path
-
-import dask.dataframe as dd
-import pandas as pd
-import polars as pl
-import pyarrow.parquet as pq
 from queries import q2
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-region = Path("data") / "region.parquet"
-nation = Path("data") / "nation.parquet"
-supplier = Path("data") / "supplier.parquet"
-part = Path("data") / "part.parquet"
-partsupp = Path("data") / "partsupp.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-    "pyarrow": lambda x: pq.read_table(x),
-    "dask": lambda x: dd.read_parquet(x, engine="pyarrow", dtype_backend="pyarrow"),
-}
-
+from . import IO_FUNCS
+from . import nation
+from . import part
+from . import partsupp
+from . import region
+from . import supplier
 
 tool = "pandas[pyarrow]"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q20.py b/tpch/execute/q20.py
new file mode 100644
index 000000000..68d18a6b5
--- /dev/null
+++ b/tpch/execute/q20.py
@@ -0,0 +1,20 @@
+from queries import q20
+
+from . import IO_FUNCS
+from . import lineitem
+from . import nation
+from . import part
+from . import partsupp
+from . import supplier
+
+fn = IO_FUNCS["pandas"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
+
+fn = IO_FUNCS["pandas[pyarrow]"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
+
+fn = IO_FUNCS["polars[eager]"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)))
+
+fn = IO_FUNCS["polars[lazy]"]
+print(q20.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(supplier)).collect())
diff --git a/tpch/execute/q21.py b/tpch/execute/q21.py
new file mode 100644
index 000000000..693953870
--- /dev/null
+++ b/tpch/execute/q21.py
@@ -0,0 +1,19 @@
+from queries import q21
+
+from . import IO_FUNCS
+from . import lineitem
+from . import nation
+from . import orders
+from . import supplier
+
+fn = IO_FUNCS["pandas"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
+
+fn = IO_FUNCS["pandas[pyarrow]"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
+
+fn = IO_FUNCS["polars[eager]"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)))
+
+fn = IO_FUNCS["polars[lazy]"]
+print(q21.query(fn(lineitem), fn(nation), fn(orders), fn(supplier)).collect())
diff --git a/tpch/execute/q3.py b/tpch/execute/q3.py
index 30194b5da..8602bb3d0 100644
--- a/tpch/execute/q3.py
+++ b/tpch/execute/q3.py
@@ -1,26 +1,9 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q3
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-
-customer = Path("data") / "customer.parquet"
-lineitem = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
-
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import orders
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q4.py b/tpch/execute/q4.py
index 672a43e17..3e67a9c87 100644
--- a/tpch/execute/q4.py
+++ b/tpch/execute/q4.py
@@ -1,23 +1,8 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q4
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-line_item = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import line_item
+from . import orders
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q5.py b/tpch/execute/q5.py
index b77f740d8..317b15fc7 100644
--- a/tpch/execute/q5.py
+++ b/tpch/execute/q5.py
@@ -1,27 +1,12 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q5
 
-pd.options.mode.copy_on_write = True
-pd.options.future.infer_string = True
-
-region = Path("data") / "region.parquet"
-nation = Path("data") / "nation.parquet"
-customer = Path("data") / "customer.parquet"
-line_item = Path("data") / "lineitem.parquet"
-orders = Path("data") / "orders.parquet"
-supplier = Path("data") / "supplier.parquet"
-
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import customer
+from . import line_item
+from . import nation
+from . import orders
+from . import region
+from . import supplier
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q6.py b/tpch/execute/q6.py
index 85b3d9968..adca0e26d 100644
--- a/tpch/execute/q6.py
+++ b/tpch/execute/q6.py
@@ -1,18 +1,7 @@
-from pathlib import Path
-
-import pandas as pd
-import polars as pl
 from queries import q6
 
-lineitem = Path("data") / "lineitem.parquet"
-IO_FUNCS = {
-    "pandas": lambda x: pd.read_parquet(x, engine="pyarrow"),
-    "pandas[pyarrow]": lambda x: pd.read_parquet(
-        x, engine="pyarrow", dtype_backend="pyarrow"
-    ),
-    "polars[eager]": lambda x: pl.read_parquet(x),
-    "polars[lazy]": lambda x: pl.scan_parquet(x),
-}
+from . import IO_FUNCS
+from . import lineitem
 
 tool = "pandas"
 fn = IO_FUNCS[tool]
diff --git a/tpch/execute/q7.py b/tpch/execute/q7.py
new file mode 100644
index 000000000..43e110a72
--- /dev/null
+++ b/tpch/execute/q7.py
@@ -0,0 +1,27 @@
+from queries import q7
+
+from . import IO_FUNCS
+from . import customer
+from . import lineitem
+from . import nation
+from . import orders
+from . import supplier
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
+
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)))
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(
+    q7.query(fn(nation), fn(customer), fn(lineitem), fn(orders), fn(supplier)).collect()
+)
diff --git a/tpch/execute/q9.py b/tpch/execute/q9.py
new file mode 100644
index 000000000..9ccbe35b7
--- /dev/null
+++ b/tpch/execute/q9.py
@@ -0,0 +1,35 @@
+from queries import q9
+
+from . import IO_FUNCS
+from . import lineitem
+from . import nation
+from . import orders
+from . import part
+from . import partsupp
+from . import supplier
+
+tool = "pandas"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
+
+tool = "pandas[pyarrow]"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
+
+tool = "polars[eager]"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier))
+)
+
+tool = "polars[lazy]"
+fn = IO_FUNCS[tool]
+print(
+    q9.query(
+        fn(part), fn(partsupp), fn(nation), fn(lineitem), fn(orders), fn(supplier)
+    ).collect()
+)
diff --git a/tpch/queries/q15.py b/tpch/queries/q15.py
new file mode 100644
index 000000000..1ebae57d6
--- /dev/null
+++ b/tpch/queries/q15.py
@@ -0,0 +1,33 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    lineitem_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    var1 = datetime(1996, 1, 1)
+    var2 = datetime(1996, 4, 1)
+
+    revenue = (
+        lineitem_ds.filter(nw.col("l_shipdate").is_between(var1, var2, closed="left"))
+        .with_columns(
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias(
+                "total_revenue"
+            )
+        )
+        .group_by("l_suppkey")
+        .agg(nw.sum("total_revenue"))
+        .select(nw.col("l_suppkey").alias("supplier_no"), nw.col("total_revenue"))
+    )
+
+    return (
+        supplier_ds.join(revenue, left_on="s_suppkey", right_on="supplier_no")
+        .filter(nw.col("total_revenue") == nw.col("total_revenue").max())
+        .with_columns(nw.col("total_revenue").round(2))
+        .select("s_suppkey", "s_name", "s_address", "s_phone", "total_revenue")
+        .sort("s_suppkey")
+    )
diff --git a/tpch/queries/q17.py b/tpch/queries/q17.py
new file mode 100644
index 000000000..5d35929d1
--- /dev/null
+++ b/tpch/queries/q17.py
@@ -0,0 +1,23 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(lineitem_ds: FrameT, part_ds: FrameT) -> FrameT:
+    var1 = "Brand#23"
+    var2 = "MED BOX"
+
+    query1 = (
+        part_ds.filter(nw.col("p_brand") == var1)
+        .filter(nw.col("p_container") == var2)
+        .join(lineitem_ds, how="left", left_on="p_partkey", right_on="l_partkey")
+    )
+
+    return (
+        query1.group_by("p_partkey")
+        .agg((0.2 * nw.col("l_quantity").mean()).alias("avg_quantity"))
+        .select(nw.col("p_partkey").alias("key"), nw.col("avg_quantity"))
+        .join(query1, left_on="key", right_on="p_partkey")
+        .filter(nw.col("l_quantity") < nw.col("avg_quantity"))
+        .select((nw.col("l_extendedprice").sum() / 7.0).round(2).alias("avg_yearly"))
+    )
diff --git a/tpch/queries/q18.py b/tpch/queries/q18.py
new file mode 100644
index 000000000..d3d183176
--- /dev/null
+++ b/tpch/queries/q18.py
@@ -0,0 +1,31 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(customer_ds: FrameT, lineitem_ds: FrameT, orders_ds: FrameT) -> FrameT:
+    var1 = 300
+
+    query1 = (
+        lineitem_ds.group_by("l_orderkey")
+        .agg(nw.col("l_quantity").sum().alias("sum_quantity"))
+        .filter(nw.col("sum_quantity") > var1)
+    )
+
+    return (
+        orders_ds.join(query1, left_on="o_orderkey", right_on="l_orderkey", how="semi")
+        .join(lineitem_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .join(customer_ds, left_on="o_custkey", right_on="c_custkey")
+        .group_by("c_name", "o_custkey", "o_orderkey", "o_orderdate", "o_totalprice")
+        .agg(nw.col("l_quantity").sum().alias("col6"))
+        .select(
+            nw.col("c_name"),
+            nw.col("o_custkey").alias("c_custkey"),
+            nw.col("o_orderkey"),
+            nw.col("o_orderdate").alias("o_orderdat"),
+            nw.col("o_totalprice"),
+            nw.col("col6"),
+        )
+        .sort(by=["o_totalprice", "o_orderdat"], descending=[True, False])
+        .head(100)
+    )
diff --git a/tpch/queries/q19.py b/tpch/queries/q19.py
new file mode 100644
index 000000000..bcab36e9a
--- /dev/null
+++ b/tpch/queries/q19.py
@@ -0,0 +1,39 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(lineitem_ds: FrameT, part_ds: FrameT) -> FrameT:
+    return (
+        part_ds.join(lineitem_ds, left_on="p_partkey", right_on="l_partkey")
+        .filter(nw.col("l_shipmode").is_in(["AIR", "AIR REG"]))
+        .filter(nw.col("l_shipinstruct") == "DELIVER IN PERSON")
+        .filter(
+            (
+                (nw.col("p_brand") == "Brand#12")
+                & nw.col("p_container").is_in(["SM CASE", "SM BOX", "SM PACK", "SM PKG"])
+                & (nw.col("l_quantity").is_between(1, 11))
+                & (nw.col("p_size").is_between(1, 5))
+            )
+            | (
+                (nw.col("p_brand") == "Brand#23")
+                & nw.col("p_container").is_in(
+                    ["MED BAG", "MED BOX", "MED PKG", "MED PACK"]
+                )
+                & (nw.col("l_quantity").is_between(10, 20))
+                & (nw.col("p_size").is_between(1, 10))
+            )
+            | (
+                (nw.col("p_brand") == "Brand#34")
+                & nw.col("p_container").is_in(["LG CASE", "LG BOX", "LG PACK", "LG PKG"])
+                & (nw.col("l_quantity").is_between(20, 30))
+                & (nw.col("p_size").is_between(1, 15))
+            )
+        )
+        .select(
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount")))
+            .sum()
+            .round(2)
+            .alias("revenue")
+        )
+    )
diff --git a/tpch/queries/q20.py b/tpch/queries/q20.py
new file mode 100644
index 000000000..d9014f7b8
--- /dev/null
+++ b/tpch/queries/q20.py
@@ -0,0 +1,43 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    part_ds: FrameT,
+    partsupp_ds: FrameT,
+    nation_ds: FrameT,
+    lineitem_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    var1 = datetime(1994, 1, 1)
+    var2 = datetime(1995, 1, 1)
+    var3 = "CANADA"
+    var4 = "forest"
+
+    query1 = (
+        lineitem_ds.filter(nw.col("l_shipdate").is_between(var1, var2, closed="left"))
+        .group_by("l_partkey", "l_suppkey")
+        .agg((nw.col("l_quantity").sum()).alias("sum_quantity"))
+        .with_columns(sum_quantity=nw.col("sum_quantity") * 0.5)
+    )
+    query2 = nation_ds.filter(nw.col("n_name") == var3)
+    query3 = supplier_ds.join(query2, left_on="s_nationkey", right_on="n_nationkey")
+
+    return (
+        part_ds.filter(nw.col("p_name").str.starts_with(var4))
+        .select(nw.col("p_partkey").unique())
+        .join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey")
+        .join(
+            query1,
+            left_on=["ps_suppkey", "p_partkey"],
+            right_on=["l_suppkey", "l_partkey"],
+        )
+        .filter(nw.col("ps_availqty") > nw.col("sum_quantity"))
+        .select(nw.col("ps_suppkey").unique())
+        .join(query3, left_on="ps_suppkey", right_on="s_suppkey")
+        .select("s_name", "s_address")
+        .sort("s_name")
+    )
diff --git a/tpch/queries/q21.py b/tpch/queries/q21.py
new file mode 100644
index 000000000..d10ff394f
--- /dev/null
+++ b/tpch/queries/q21.py
@@ -0,0 +1,43 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    lineitem: FrameT,
+    nation: FrameT,
+    orders: FrameT,
+    supplier: FrameT,
+) -> FrameT:
+    var1 = "SAUDI ARABIA"
+
+    q1 = (
+        lineitem.group_by("l_orderkey")
+        .agg(nw.len().alias("n_supp_by_order"))
+        .filter(nw.col("n_supp_by_order") > 1)
+        .join(
+            lineitem.filter(nw.col("l_receiptdate") > nw.col("l_commitdate")),
+            left_on="l_orderkey",
+            right_on="l_orderkey",
+        )
+    )
+
+    return (
+        q1.group_by("l_orderkey")
+        .agg(nw.len().alias("n_supp_by_order"))
+        .join(
+            q1,
+            left_on="l_orderkey",
+            right_on="l_orderkey",
+        )
+        .join(supplier, left_on="l_suppkey", right_on="s_suppkey")
+        .join(nation, left_on="s_nationkey", right_on="n_nationkey")
+        .join(orders, left_on="l_orderkey", right_on="o_orderkey")
+        .filter(nw.col("n_supp_by_order") == 1)
+        .filter(nw.col("n_name") == var1)
+        .filter(nw.col("o_orderstatus") == "F")
+        .group_by("s_name")
+        .agg(nw.len().alias("numwait"))
+        .sort(by=["numwait", "s_name"], descending=[True, False])
+        .head(100)
+    )
diff --git a/tpch/queries/q6.py b/tpch/queries/q6.py
index 6a9b5c1d2..67f0ac785 100644
--- a/tpch/queries/q6.py
+++ b/tpch/queries/q6.py
@@ -1,12 +1,8 @@
 from datetime import datetime
 
-import pandas as pd
-
 import narwhals as nw
 from narwhals.typing import FrameT
 
-pd.options.mode.copy_on_write = True
-
 
 @nw.narwhalify
 def query(line_item_ds: FrameT) -> FrameT:
diff --git a/tpch/queries/q7.py b/tpch/queries/q7.py
new file mode 100644
index 000000000..ec0946ac3
--- /dev/null
+++ b/tpch/queries/q7.py
@@ -0,0 +1,51 @@
+from datetime import datetime
+
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    nation_ds: FrameT,
+    customer_ds: FrameT,
+    line_item_ds: FrameT,
+    orders_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    n1 = nation_ds.filter(nw.col("n_name") == "FRANCE")
+    n2 = nation_ds.filter(nw.col("n_name") == "GERMANY")
+
+    var_1 = datetime(1995, 1, 1)
+    var_2 = datetime(1996, 12, 31)
+
+    df1 = (
+        customer_ds.join(n1, left_on="c_nationkey", right_on="n_nationkey")
+        .join(orders_ds, left_on="c_custkey", right_on="o_custkey")
+        .rename({"n_name": "cust_nation"})
+        .join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey")
+        .join(n2, left_on="s_nationkey", right_on="n_nationkey")
+        .rename({"n_name": "supp_nation"})
+    )
+
+    df2 = (
+        customer_ds.join(n2, left_on="c_nationkey", right_on="n_nationkey")
+        .join(orders_ds, left_on="c_custkey", right_on="o_custkey")
+        .rename({"n_name": "cust_nation"})
+        .join(line_item_ds, left_on="o_orderkey", right_on="l_orderkey")
+        .join(supplier_ds, left_on="l_suppkey", right_on="s_suppkey")
+        .join(n1, left_on="s_nationkey", right_on="n_nationkey")
+        .rename({"n_name": "supp_nation"})
+    )
+
+    return (
+        nw.concat([df1, df2])
+        .filter(nw.col("l_shipdate").is_between(var_1, var_2))
+        .with_columns(
+            (nw.col("l_extendedprice") * (1 - nw.col("l_discount"))).alias("volume")
+        )
+        .with_columns(nw.col("l_shipdate").dt.year().alias("l_year"))
+        .group_by("supp_nation", "cust_nation", "l_year")
+        .agg(nw.sum("volume").alias("revenue"))
+        .sort(by=["supp_nation", "cust_nation", "l_year"])
+    )
diff --git a/tpch/queries/q9.py b/tpch/queries/q9.py
new file mode 100644
index 000000000..09dff4787
--- /dev/null
+++ b/tpch/queries/q9.py
@@ -0,0 +1,36 @@
+import narwhals as nw
+from narwhals.typing import FrameT
+
+
+@nw.narwhalify
+def query(
+    part_ds: FrameT,
+    partsupp_ds: FrameT,
+    nation_ds: FrameT,
+    lineitem_ds: FrameT,
+    orders_ds: FrameT,
+    supplier_ds: FrameT,
+) -> FrameT:
+    return (
+        part_ds.join(partsupp_ds, left_on="p_partkey", right_on="ps_partkey")
+        .join(supplier_ds, left_on="ps_suppkey", right_on="s_suppkey")
+        .join(
+            lineitem_ds,
+            left_on=["p_partkey", "ps_suppkey"],
+            right_on=["l_partkey", "l_suppkey"],
+        )
+        .join(orders_ds, left_on="l_orderkey", right_on="o_orderkey")
+        .join(nation_ds, left_on="s_nationkey", right_on="n_nationkey")
+        .filter(nw.col("p_name").str.contains("green"))
+        .select(
+            nw.col("n_name").alias("nation"),
+            nw.col("o_orderdate").dt.year().alias("o_year"),
+            (
+                nw.col("l_extendedprice") * (1 - nw.col("l_discount"))
+                - nw.col("ps_supplycost") * nw.col("l_quantity")
+            ).alias("amount"),
+        )
+        .group_by("nation", "o_year")
+        .agg(nw.sum("amount").alias("sum_profit"))
+        .sort(by=["nation", "o_year"], descending=[False, True])
+    )

From cb82d26b7d9d6a1aef882aa6fcbda79a612f1223 Mon Sep 17 00:00:00 2001
From: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
Date: Wed, 4 Sep 2024 17:51:36 +0200
Subject: [PATCH 5/5] feat: dask lit with dtype (#909)

---
 narwhals/_dask/namespace.py | 15 ++++++++++++---
 tests/frame/lit_test.py     |  6 +-----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/narwhals/_dask/namespace.py b/narwhals/_dask/namespace.py
index 89ca372ec..1668ee323 100644
--- a/narwhals/_dask/namespace.py
+++ b/narwhals/_dask/namespace.py
@@ -12,6 +12,7 @@
 from narwhals._dask.dataframe import DaskLazyFrame
 from narwhals._dask.expr import DaskExpr
 from narwhals._dask.selectors import DaskSelectorNamespace
+from narwhals._dask.utils import reverse_translate_dtype
 from narwhals._dask.utils import validate_comparand
 from narwhals._expression_parsing import parse_into_exprs
 
@@ -19,6 +20,7 @@
     import dask_expr
 
     from narwhals._dask.typing import IntoDaskExpr
+    from narwhals.dtypes import DType
 
 
 class DaskNamespace:
@@ -70,10 +72,17 @@ def col(self, *column_names: str) -> DaskExpr:
         )
 
     def lit(self, value: Any, dtype: dtypes.DType | None) -> DaskExpr:
-        # TODO @FBruzzesi: cast to dtype once `narwhals_to_native_dtype` is implemented.
-        # It should be enough to add `.astype(narwhals_to_native_dtype(dtype))`
+        def convert_if_dtype(
+            series: dask_expr.Series, dtype: DType | type[DType]
+        ) -> dask_expr.Series:
+            return series.astype(reverse_translate_dtype(dtype)) if dtype else series
+
         return DaskExpr(
-            lambda df: [df._native_frame.assign(lit=value).loc[:, "lit"]],
+            lambda df: [
+                df._native_frame.assign(lit=value)
+                .loc[:, "lit"]
+                .pipe(convert_if_dtype, dtype)
+            ],
             depth=0,
             function_name="lit",
             root_names=None,
diff --git a/tests/frame/lit_test.py b/tests/frame/lit_test.py
index 328e4d8e0..e5756e035 100644
--- a/tests/frame/lit_test.py
+++ b/tests/frame/lit_test.py
@@ -17,11 +17,7 @@
     ("dtype", "expected_lit"),
     [(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])],
 )
-def test_lit(
-    constructor: Any, dtype: DType | None, expected_lit: list[Any], request: Any
-) -> None:
-    if "dask" in str(constructor) and dtype == nw.String:
-        request.applymarker(pytest.mark.xfail)
+def test_lit(constructor: Any, dtype: DType | None, expected_lit: list[Any]) -> None:
     data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
     df_raw = constructor(data)
     df = nw.from_native(df_raw).lazy()