Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into add-joinasof
Browse files Browse the repository at this point in the history
  • Loading branch information
raisadz committed Sep 5, 2024
2 parents 01a4d14 + cb82d26 commit 627ff76
Show file tree
Hide file tree
Showing 40 changed files with 702 additions and 211 deletions.
30 changes: 30 additions & 0 deletions .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
name: Tests for TPCH Queries

on:
pull_request:
types: [labeled]

jobs:
validate-queries:
if: ${{ github.event.label.name == 'full-test' }}
strategy:
matrix:
python-version: ["3.12"]
os: [ubuntu-latest]

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: install-reqs
run: uv pip install --upgrade -r requirements-dev.txt --system
- name: local-install
run: uv pip install -e . --system
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
run: python -m unittest discover -s 'tpch/tests'
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
if: runner.os == 'Windows'
run: powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
- name: install-reqs
run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt --system
run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt ibis-framework[duckdb] --system
- name: show-deps
run: uv pip freeze
- name: Run pytest
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
```python
>>> import narwhals
>>> narwhals.__version__
'1.6.1'
'1.6.2'
```
then installation worked correctly!
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
from narwhals.utils import maybe_get_index
from narwhals.utils import maybe_set_index

__version__ = "1.6.1"
__version__ = "1.6.2"

__all__ = [
"dependencies",
Expand Down
15 changes: 12 additions & 3 deletions narwhals/_dask/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@
from narwhals._dask.dataframe import DaskLazyFrame
from narwhals._dask.expr import DaskExpr
from narwhals._dask.selectors import DaskSelectorNamespace
from narwhals._dask.utils import reverse_translate_dtype
from narwhals._dask.utils import validate_comparand
from narwhals._expression_parsing import parse_into_exprs

if TYPE_CHECKING:
import dask_expr

from narwhals._dask.typing import IntoDaskExpr
from narwhals.dtypes import DType


class DaskNamespace:
Expand Down Expand Up @@ -70,10 +72,17 @@ def col(self, *column_names: str) -> DaskExpr:
)

def lit(self, value: Any, dtype: dtypes.DType | None) -> DaskExpr:
# TODO @FBruzzesi: cast to dtype once `narwhals_to_native_dtype` is implemented.
# It should be enough to add `.astype(narwhals_to_native_dtype(dtype))`
def convert_if_dtype(
series: dask_expr.Series, dtype: DType | type[DType]
) -> dask_expr.Series:
return series.astype(reverse_translate_dtype(dtype)) if dtype else series

return DaskExpr(
lambda df: [df._native_frame.assign(lit=value).loc[:, "lit"]],
lambda df: [
df._native_frame.assign(lit=value)
.loc[:, "lit"]
.pipe(convert_if_dtype, dtype)
],
depth=0,
function_name="lit",
root_names=None,
Expand Down
32 changes: 16 additions & 16 deletions narwhals/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,79 +83,79 @@ def get_ibis() -> Any:

def is_pandas_dataframe(df: Any) -> TypeGuard[pd.DataFrame]:
"""Check whether `df` is a pandas DataFrame without importing pandas."""
return bool((pd := get_pandas()) is not None and isinstance(df, pd.DataFrame))
return (pd := get_pandas()) is not None and isinstance(df, pd.DataFrame)


def is_pandas_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
"""Check whether `ser` is a pandas Series without importing pandas."""
return bool((pd := get_pandas()) is not None and isinstance(ser, pd.Series))
return (pd := get_pandas()) is not None and isinstance(ser, pd.Series)


def is_modin_dataframe(df: Any) -> TypeGuard[mpd.DataFrame]:
"""Check whether `df` is a modin DataFrame without importing modin."""
return bool((pd := get_modin()) is not None and isinstance(df, pd.DataFrame))
return (pd := get_modin()) is not None and isinstance(df, pd.DataFrame)


def is_modin_series(ser: Any) -> TypeGuard[mpd.Series]:
"""Check whether `ser` is a modin Series without importing modin."""
return bool((pd := get_modin()) is not None and isinstance(ser, pd.Series))
return (pd := get_modin()) is not None and isinstance(ser, pd.Series)


def is_cudf_dataframe(df: Any) -> TypeGuard[cudf.DataFrame]:
"""Check whether `df` is a cudf DataFrame without importing cudf."""
return bool((pd := get_cudf()) is not None and isinstance(df, pd.DataFrame))
return (pd := get_cudf()) is not None and isinstance(df, pd.DataFrame)


def is_cudf_series(ser: Any) -> TypeGuard[pd.Series[Any]]:
"""Check whether `ser` is a cudf Series without importing cudf."""
return bool((pd := get_cudf()) is not None and isinstance(ser, pd.Series))
return (pd := get_cudf()) is not None and isinstance(ser, pd.Series)


def is_dask_dataframe(df: Any) -> TypeGuard[dd.DataFrame]:
"""Check whether `df` is a Dask DataFrame without importing Dask."""
return bool((dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame))
return (dd := get_dask_dataframe()) is not None and isinstance(df, dd.DataFrame)


def is_duckdb_relation(df: Any) -> TypeGuard[duckdb.DuckDBPyRelation]:
"""Check whether `df` is a DuckDB Relation without importing DuckDB."""
return bool(
(duckdb := get_duckdb()) is not None and isinstance(df, duckdb.DuckDBPyRelation)
return (duckdb := get_duckdb()) is not None and isinstance(
df, duckdb.DuckDBPyRelation
)


def is_ibis_table(df: Any) -> TypeGuard[ibis.Table]:
"""Check whether `df` is a Ibis Table without importing Ibis."""
return bool((ibis := get_ibis()) is not None and isinstance(df, ibis.Table))
return (ibis := get_ibis()) is not None and isinstance(df, ibis.expr.types.Table)


def is_polars_dataframe(df: Any) -> TypeGuard[pl.DataFrame]:
"""Check whether `df` is a Polars DataFrame without importing Polars."""
return bool((pl := get_polars()) is not None and isinstance(df, pl.DataFrame))
return (pl := get_polars()) is not None and isinstance(df, pl.DataFrame)


def is_polars_lazyframe(df: Any) -> TypeGuard[pl.LazyFrame]:
"""Check whether `df` is a Polars LazyFrame without importing Polars."""
return bool((pl := get_polars()) is not None and isinstance(df, pl.LazyFrame))
return (pl := get_polars()) is not None and isinstance(df, pl.LazyFrame)


def is_polars_series(ser: Any) -> TypeGuard[pl.Series]:
"""Check whether `ser` is a Polars Series without importing Polars."""
return bool((pl := get_polars()) is not None and isinstance(ser, pl.Series))
return (pl := get_polars()) is not None and isinstance(ser, pl.Series)


def is_pyarrow_chunked_array(ser: Any) -> TypeGuard[pa.ChunkedArray]:
"""Check whether `ser` is a PyArrow ChunkedArray without importing PyArrow."""
return bool((pa := get_pyarrow()) is not None and isinstance(ser, pa.ChunkedArray))
return (pa := get_pyarrow()) is not None and isinstance(ser, pa.ChunkedArray)


def is_pyarrow_table(df: Any) -> TypeGuard[pa.Table]:
"""Check whether `df` is a PyArrow Table without importing PyArrow."""
return bool((pa := get_pyarrow()) is not None and isinstance(df, pa.Table))
return (pa := get_pyarrow()) is not None and isinstance(df, pa.Table)


def is_numpy_array(arr: Any) -> TypeGuard[np.ndarray]:
"""Check whether `arr` is a NumPy Array without importing NumPy."""
return bool((np := get_numpy()) is not None and isinstance(arr, np.ndarray))
return (np := get_numpy()) is not None and isinstance(arr, np.ndarray)


def is_pandas_like_dataframe(df: Any) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "narwhals"
version = "1.6.1"
version = "1.6.2"
authors = [
{ name="Marco Gorelli", email="[email protected]" },
]
Expand Down Expand Up @@ -76,6 +76,7 @@ lint.ignore = [

[tool.ruff.lint.per-file-ignores]
"tests/*" = ["S101"]
"tpch/tests/*" = ["S101"]
"utils/*" = ["S311", "PTH123"]
"tpch/execute/*" = ["T201"]

Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
tqdm
covdefaults
duckdb
pandas
Expand Down
9 changes: 9 additions & 0 deletions tests/frame/arrow_c_stream_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
@pytest.mark.skipif(
parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
)
@pytest.mark.skipif(
parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
)
def test_arrow_c_stream_test() -> None:
df = nw.from_native(pl.Series([1, 2, 3]).to_frame("a"), eager_only=True)
result = pa.table(df)
Expand All @@ -20,6 +23,9 @@ def test_arrow_c_stream_test() -> None:
@pytest.mark.skipif(
parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
)
@pytest.mark.skipif(
parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
)
def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
# "poison" the dunder method to make sure it actually got called above
monkeypatch.setattr(
Expand All @@ -33,6 +39,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
@pytest.mark.skipif(
parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
)
@pytest.mark.skipif(
parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
)
def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
# Check that fallback to PyArrow works
monkeypatch.delattr("polars.DataFrame.__arrow_c_stream__")
Expand Down
65 changes: 46 additions & 19 deletions tests/frame/interchange_schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pytest

import narwhals.stable.v1 as nw
from narwhals.utils import parse_version


def test_interchange_schema() -> None:
Expand Down Expand Up @@ -67,7 +68,10 @@ def test_interchange_schema() -> None:
assert df["a"].dtype == nw.Int64


def test_interchange_schema_ibis() -> None: # pragma: no cover
@pytest.mark.filterwarnings("ignore:.*locale specific date formats")
def test_interchange_schema_ibis(
tmpdir: pytest.TempdirFactory,
) -> None: # pragma: no cover
ibis = pytest.importorskip("ibis")
df_pl = pl.DataFrame(
{
Expand Down Expand Up @@ -105,26 +109,49 @@ def test_interchange_schema_ibis() -> None: # pragma: no cover
"o": pl.Boolean,
},
)
tbl = ibis.memtable(df_pl)
filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
df_pl.write_parquet(filepath)
tbl = ibis.read_parquet(filepath)
df = nw.from_native(tbl, eager_or_interchange_only=True)
result = df.schema
expected = {
"a": nw.Int64,
"b": nw.Int32,
"c": nw.Int16,
"d": nw.Int8,
"e": nw.UInt64,
"f": nw.UInt32,
"g": nw.UInt16,
"h": nw.UInt8,
"i": nw.Float64,
"j": nw.Float32,
"k": nw.String,
"l": nw.String,
"m": nw.Date,
"n": nw.Datetime,
"o": nw.Boolean,
}
if parse_version(ibis.__version__) > (6, 0, 0):
expected = {
"a": nw.Int64,
"b": nw.Int32,
"c": nw.Int16,
"d": nw.Int8,
"e": nw.UInt64,
"f": nw.UInt32,
"g": nw.UInt16,
"h": nw.UInt8,
"i": nw.Float64,
"j": nw.Float32,
"k": nw.String,
"l": nw.String,
"m": nw.Date,
"n": nw.Datetime,
"o": nw.Boolean,
}
else:
# Old versions of Ibis would read the file in
# with different data types
expected = {
"a": nw.Int64,
"b": nw.Int32,
"c": nw.Int16,
"d": nw.Int32,
"e": nw.Int32,
"f": nw.Int32,
"g": nw.Int32,
"h": nw.Int32,
"i": nw.Float64,
"j": nw.Float64,
"k": nw.String,
"l": nw.String,
"m": nw.Date,
"n": nw.Datetime,
"o": nw.Boolean,
}
assert result == expected
assert df["a"].dtype == nw.Int64

Expand Down
6 changes: 1 addition & 5 deletions tests/frame/lit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,7 @@
("dtype", "expected_lit"),
[(None, [2, 2, 2]), (nw.String, ["2", "2", "2"]), (nw.Float32, [2.0, 2.0, 2.0])],
)
def test_lit(
constructor: Any, dtype: DType | None, expected_lit: list[Any], request: Any
) -> None:
if "dask" in str(constructor) and dtype == nw.String:
request.applymarker(pytest.mark.xfail)
def test_lit(constructor: Any, dtype: DType | None, expected_lit: list[Any]) -> None:
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8, 9]}
df_raw = constructor(data)
df = nw.from_native(df_raw).lazy()
Expand Down
9 changes: 9 additions & 0 deletions tests/series_only/arrow_c_stream_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
@pytest.mark.skipif(
parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
)
@pytest.mark.skipif(
parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
)
def test_arrow_c_stream_test() -> None:
s = nw.from_native(pl.Series([1, 2, 3]), series_only=True)
result = pa.chunked_array(s)
Expand All @@ -20,6 +23,9 @@ def test_arrow_c_stream_test() -> None:
@pytest.mark.skipif(
parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
)
@pytest.mark.skipif(
parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
)
def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
# "poison" the dunder method to make sure it actually got called above
monkeypatch.setattr("narwhals.series.Series.__arrow_c_stream__", lambda *_: 1 / 0)
Expand All @@ -31,6 +37,9 @@ def test_arrow_c_stream_test_invalid(monkeypatch: pytest.MonkeyPatch) -> None:
@pytest.mark.skipif(
parse_version(pl.__version__) < (1, 3), reason="too old for pycapsule in Polars"
)
@pytest.mark.skipif(
parse_version(pa.__version__) < (16, 0, 0), reason="too old for pycapsule in PyArrow"
)
def test_arrow_c_stream_test_fallback(monkeypatch: pytest.MonkeyPatch) -> None:
# Check that fallback to PyArrow works
monkeypatch.delattr("polars.Series.__arrow_c_stream__")
Expand Down
Loading

0 comments on commit 627ff76

Please sign in to comment.