Skip to content

Commit

Permalink
resolve merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
lucas-nelson-uiuc committed Jan 9, 2025
2 parents 6e59b1b + 20eb53b commit 75372a6
Show file tree
Hide file tree
Showing 163 changed files with 5,775 additions and 2,149 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: local-install
run: uv pip install -e ".[dev, core, dask]" --system
run: uv pip install -U --pre -e ".[dev, core, dask]" --system
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/downstream_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ jobs:
run: |
cd tea-tasting
pdm remove narwhals
pdm add ./..
pdm add ./..[dev]
- name: show-deps
run: |
cd tea-tasting
Expand Down
30 changes: 16 additions & 14 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-pretty-old-versions
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" pyspark==3.3.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata --system
run: uv pip install pipdeptree tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata --system
- name: install-reqs
run: uv pip install -e ".[dev]" --system
- name: show-deps
Expand All @@ -75,7 +75,7 @@ jobs:
echo "$DEPS" | grep 'polars==0.20.3'
echo "$DEPS" | grep 'numpy==1.17.5'
echo "$DEPS" | grep 'pyarrow==11.0.0'
echo "$DEPS" | grep 'pyspark==3.3.0'
echo "$DEPS" | grep 'pyspark==3.5.0'
echo "$DEPS" | grep 'scipy==1.5.0'
echo "$DEPS" | grep 'scikit-learn==1.1.0'
- name: Run pytest
Expand All @@ -84,7 +84,7 @@ jobs:
not_so_old_versions:
strategy:
matrix:
python-version: ["3.9"]
python-version: ["3.10"]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -99,7 +99,7 @@ jobs:
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "pyproject.toml"
- name: install-not-so-old-versions
run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==14.0.0 "pyarrow-stubs<17" pyspark==3.4.0 scipy==1.8.0 scikit-learn==1.3.0 dask[dataframe]==2024.7 tzdata --system
run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==15.0.0 "pyarrow-stubs<17" pyspark==3.5.0 scipy==1.8.0 scikit-learn==1.3.0 dask[dataframe]==2024.10 tzdata --system
- name: install-reqs
run: uv pip install -e ".[dev]" --system
- name: show-deps
Expand All @@ -110,11 +110,11 @@ jobs:
echo "$DEPS" | grep 'pandas==2.0.3'
echo "$DEPS" | grep 'polars==0.20.8'
echo "$DEPS" | grep 'numpy==1.24.4'
echo "$DEPS" | grep 'pyarrow==14.0.0'
echo "$DEPS" | grep 'pyspark==3.4.0'
echo "$DEPS" | grep 'pyarrow==15.0.0'
echo "$DEPS" | grep 'pyspark==3.5.0'
echo "$DEPS" | grep 'scipy==1.8.0'
echo "$DEPS" | grep 'scikit-learn==1.3.0'
echo "$DEPS" | grep 'dask==2024.7'
echo "$DEPS" | grep 'dask==2024.10'
- name: Run pytest
run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow --constructors=pandas,pyarrow,polars[eager],polars[lazy],dask

Expand Down Expand Up @@ -158,27 +158,29 @@ jobs:
run: |
uv pip uninstall pyarrow --system
uv pip install --extra-index-url https://pypi.fury.io/arrow-nightlies/ --pre pyarrow --system
- name: show-deps
run: uv pip freeze
- name: install numpy nightly
run: |
uv pip uninstall numpy --system
uv pip install --pre --extra-index-url https://pypi.anaconda.org/scientific-python-nightly-wheels/simple numpy --system
- name: install dask
run: |
uv pip uninstall dask dask-expr --system
python -m pip install git+https://github.com/dask/distributed git+https://github.com/dask/dask git+https://github.com/dask/dask-expr
python -m pip install git+https://github.com/dask/distributed git+https://github.com/dask/dask
- name: install duckdb
run: |
python -m pip install -U --pre duckdb
- name: show-deps
run: uv pip freeze
- name: Assert nightlies dependencies
run: |
DEPS=$(uv pip freeze)
echo "$DEPS" | grep 'polars'
echo "$DEPS" | grep 'polars.*@'
echo "$DEPS" | grep 'pandas.*dev'
echo "$DEPS" | grep 'pyarrow.*dev'
echo "$DEPS" | grep 'numpy'
echo "$DEPS" | grep 'dask'
echo "$DEPS" | grep 'numpy.*dev'
echo "$DEPS" | grep 'dask.*@'
echo "$DEPS" | grep 'duckdb.*dev'
- name: Run pytest
run: |
pytest tests --cov=narwhals --cov=tests --cov-fail-under=50 --runslow \
--constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask
--constructors=pandas,pandas[nullable],pandas[pyarrow],pyarrow,polars[eager],polars[lazy],dask,duckdb
8 changes: 6 additions & 2 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ jobs:
cache-dependency-glob: "pyproject.toml"
- name: install-reqs
run: uv pip install -e ".[dev, core, extra, dask, modin]" --system
- name: install pyspark
run: uv pip install -e ".[pyspark]" --system
# PySpark is not yet available on Python3.12+
if: matrix.python-version != '3.12'
- name: show-deps
run: uv pip freeze
- name: Run pytest
Expand All @@ -59,7 +63,7 @@ jobs:
pytest-full-coverage:
strategy:
matrix:
python-version: ["3.9", "3.11", "3.13"]
python-version: ["3.11", "3.13"]
os: [ubuntu-latest]
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -78,7 +82,7 @@ jobs:
- name: install pyspark
run: uv pip install -e ".[pyspark]" --system
# PySpark is not yet available on Python3.12+
if: matrix.python-version == '3.9' || matrix.python-version == '3.11'
if: matrix.python-version != '3.13'
- name: install ibis
run: uv pip install -e ".[ibis]" --system
# Ibis puts upper bounds on dependencies, and requires Python3.10+,
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ ci:
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.8.1'
rev: 'v0.8.6'
hooks:
# Run the formatter.
- id: ruff-format
Expand All @@ -14,7 +14,7 @@ repos:
alias: check-docstrings
entry: python utils/check_docstrings.py
- repo: https://github.com/pre-commit/mirrors-mypy
rev: 'v1.13.0'
rev: 'v1.14.1'
hooks:
- id: mypy
additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2']
Expand Down
4 changes: 4 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ where `YOUR-GITHUB-USERNAME` will be your GitHub user name.

Here's how you can set up your local development environment to contribute.

#### Prerequisites for PySpark tests

If you want to run PySpark-related tests, you'll need to have Java installed. Refer to the [Spark documentation](https://spark.apache.org/docs/latest/#downloading) for more information.

#### Option 1: Use UV (recommended)

1. Make sure you have Python3.12 installed, create a virtual environment,
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,7 @@
Extremely lightweight and extensible compatibility layer between dataframe libraries!

- **Full API support**: cuDF, Modin, pandas, Polars, PyArrow
- **Lazy-only support**: Dask
- **Interchange-level support**: DuckDB, Ibis, Vaex, anything which implements the DataFrame Interchange Protocol
- **Lazy-only support**: Dask. Work in progress: DuckDB, Ibis, PySpark.

Seamlessly support all, without depending on any!

Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/expr.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
- over
- pipe
- quantile
- rank
- replace_strict
- rolling_mean
- rolling_std
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/series.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
- null_count
- pipe
- quantile
- rank
- rename
- replace_strict
- rolling_mean
Expand Down
4 changes: 4 additions & 0 deletions docs/backcompat.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ before making any change.

### After `stable.v1`


- Since Narwhals 1.21, passing a `DuckDBPyRelation` to `from_native` returns a `LazyFrame`. In
`narwhals.stable.v1`, it returns a `DataFrame` with `level='interchange'`.

- Since Narwhals 1.15, `Series` is generic in the native Series, meaning that you can
write:
```python
Expand Down
18 changes: 13 additions & 5 deletions docs/basics/dataframe_conversion.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ To illustrate, we create dataframes in various formats:
```python exec="1" source="above" session="conversion"
import narwhals as nw
from narwhals.typing import IntoDataFrame
from typing import Any

import duckdb
import polars as pl
Expand Down Expand Up @@ -45,11 +46,17 @@ print(df_to_pandas(df_polars))

### Via PyCapsule Interface

Similarly, if your library uses Polars internally, you can convert any user-supplied dataframe to Polars format using Narwhals.
Similarly, if your library uses Polars internally, you can convert any user-supplied dataframe
which implements `__arrow_c_stream__`:

```python exec="1" source="above" session="conversion" result="python"
def df_to_polars(df: IntoDataFrame) -> pl.DataFrame:
return nw.from_arrow(nw.from_native(df), native_namespace=pl).to_native()
def df_to_polars(df_native: Any) -> pl.DataFrame:
if hasattr(df_native, "__arrow_c_stream__"):
return nw.from_arrow(df_native, native_namespace=pl).to_native()
msg = (
f"Expected object which implements '__arrow_c_stream__' got: {type(df_native)}"
)
raise TypeError(msg)


print(df_to_polars(df_duckdb)) # You can only execute this line of code once.
Expand All @@ -66,8 +73,9 @@ If you need to ingest the same dataframe multiple times, then you may want to go
This may be less efficient than the PyCapsule approach above (and always requires PyArrow!), but is more forgiving:

```python exec="1" source="above" session="conversion" result="python"
def df_to_polars(df: IntoDataFrame) -> pl.DataFrame:
return pl.DataFrame(nw.from_native(df).to_arrow())
def df_to_polars(df_native: IntoDataFrame) -> pl.DataFrame:
df = nw.from_native(df_native).lazy().collect()
return pl.DataFrame(nw.from_native(df, eager_only=True).to_arrow())


df_duckdb = duckdb.sql("SELECT * FROM df_polars")
Expand Down
4 changes: 4 additions & 0 deletions docs/css/extra.css
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
.md-typeset ul li {
margin-bottom: 0.1em !important;
}
.md-main__inner.md-grid {
max-width: initial;
margin-left: 5vw;
}
7 changes: 3 additions & 4 deletions docs/extending.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,16 @@ Currently, Narwhals has **full API** support for the following libraries:
It also has **lazy-only** support for [Dask](https://github.com/dask/dask), and **interchange** support
for [DuckDB](https://github.com/duckdb/duckdb) and [Ibis](https://github.com/ibis-project/ibis).

We are working towards full "lazy-only" support for DuckDB, Ibis, and PySpark.

### Levels of support

Narwhals comes with three levels of support:

- **Full API support**: cuDF, Modin, pandas, Polars, PyArrow
- **Lazy-only support**: Dask
- **Lazy-only support**: Dask. Work in progress: DuckDB, Ibis, PySpark.
- **Interchange-level support**: DuckDB, Ibis, Vaex, anything which implements the DataFrame Interchange Protocol

The lazy-only layer is a major item on our 2025 roadmap, and hope to be able to bring libraries currently in
the "interchange" level into that one.

Libraries for which we have full support can benefit from the whole
[Narwhals API](./api-reference/index.md).

Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ To verify the installation, start the Python REPL and execute:
```python
>>> import narwhals
>>> narwhals.__version__
'1.20.1'
'1.21.1'
```

If you see the version number, then the installation was successful!
Expand Down
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
from narwhals.utils import maybe_reset_index
from narwhals.utils import maybe_set_index

__version__ = "1.20.1"
__version__ = "1.21.1"

__all__ = [
"Array",
Expand Down
Loading

0 comments on commit 75372a6

Please sign in to comment.