Skip to content

Commit

Permalink
Merge branch 'main' into add-when-chaining
Browse files Browse the repository at this point in the history
  • Loading branch information
aivanoved committed Sep 16, 2024
2 parents fbb6c26 + 33f102e commit 4c872b9
Show file tree
Hide file tree
Showing 164 changed files with 990 additions and 502 deletions.
8 changes: 6 additions & 2 deletions .github/workflows/check_docs_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-reqs
run: uv pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt --system
- name: install-docs-reqs
Expand Down
10 changes: 7 additions & 3 deletions .github/workflows/check_tpch_queries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-reqs
run: uv pip install --upgrade -r requirements-dev.txt --system
- name: local-install
run: uv pip install -e . --system
- name: generate-data
run: cd tpch && python generate_data.py
- name: tpch-tests
run: cd tpch && pytest tests
run: cd tpch && pytest tests
16 changes: 12 additions & 4 deletions .github/workflows/downstream_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: clone-altair
run: |
git clone https://github.com/vega/altair.git --depth=1
Expand Down Expand Up @@ -58,8 +62,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: clone-scikit-lego
run: git clone https://github.com/koaning/scikit-lego.git --depth 1
- name: install-basics
Expand Down
87 changes: 36 additions & 51 deletions .github/workflows/extremes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-minimum-versions
run: uv pip install tox virtualenv setuptools pandas==0.25.3 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata --system
- name: install-reqs
Expand All @@ -41,8 +45,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-minimum-versions
run: uv pip install tox virtualenv setuptools pandas==1.1.5 polars==0.20.3 numpy==1.17.5 pyarrow==11.0.0 scipy==1.5.0 scikit-learn==1.1.0 tzdata --system
- name: install-reqs
Expand All @@ -66,8 +74,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-minimum-versions
run: uv pip install tox virtualenv setuptools pandas==2.0.3 polars==0.20.8 numpy==1.24.4 pyarrow==14.0.0 scipy==1.8.0 scikit-learn==1.3.0 dask[dataframe]==2024.7 tzdata --system
- name: install-reqs
Expand All @@ -79,22 +91,34 @@ jobs:
- name: Run doctests
run: pytest narwhals --doctest-modules

pandas-nightly-and-dask:
nightlies:
strategy:
matrix:
python-version: ["3.12"]
python-version: ["3.11"]
os: [ubuntu-latest]

if: github.event.pull_request.head.repo.full_name == github.repository
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-kaggle
run: uv pip install kaggle --system
- name: Download Kaggle notebook artifact
env:
KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
run: |
kaggle kernels output "marcogorelli/variable-brink-glacier"
- name: install-polars
run: uv pip install polars --system
run: python -m pip install *.whl
- name: install-reqs
run: uv pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt --system
- name: uninstall pyarrow
Expand All @@ -119,43 +143,4 @@ jobs:
- name: Run doctests
run: pytest narwhals --doctest-modules

# polars-nightly:
# if: github.ref == 'refs/heads/main'
# strategy:
# matrix:
# python-version: ["3.12"]
# os: [ubuntu-latest]

# runs-on: ${{ matrix.os }}
# steps:
# - uses: actions/checkout@v4
# - uses: actions/setup-python@v5
# with:
# python-version: ${{ matrix.python-version }}
# - name: Cache multiple paths
# uses: actions/cache@v4
# with:
# path: |
# ~/.cache/pip
# $RUNNER_TOOL_CACHE/Python/*
# ~\AppData\Local\pip\Cache
# key: ${{ runner.os }}-build-${{ matrix.python-version }}
# - name: install-kaggle
# run: python -m pip install kaggle
# - name: Download Kaggle notebook artifact
# env:
# KAGGLE_USERNAME: ${{ secrets.KAGGLE_USERNAME }}
# KAGGLE_KEY: ${{ secrets.KAGGLE_KEY }}
# run: kaggle kernels output marcogorelli/polars-nightly
# - name: install-reqs
# run: python -m pip install --upgrade tox virtualenv setuptools pip -r requirements-dev.txt
# - name: uninstall polars
# run: python -m pip uninstall polars -y
# - name: install-modin-pandas
# run: pip install modin[dask] pandas
# - name: install-polars-nightly
# run: python -m pip install *.whl
# - name: Run pytest
# run: pytest tests --cov=narwhals --cov=tests --cov-fail-under=50
# - name: Run doctests
# run: pytest narwhals --doctest-modules
28 changes: 18 additions & 10 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
if: runner.os != 'Windows'
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv (Windows)
if: runner.os == 'Windows'
run: powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-reqs
run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt ibis-framework[duckdb] --system
- name: show-deps
Expand All @@ -45,8 +45,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Windows)
run: powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-reqs
run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt --system
- name: install-modin
Expand All @@ -70,8 +74,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install-reqs
run: uv pip install --upgrade tox virtualenv setuptools -r requirements-dev.txt --system
- name: install-modin
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/random_ci_pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install uv (Unix)
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Install uv
uses: astral-sh/setup-uv@v2
with:
enable-cache: "true"
cache-suffix: ${{ matrix.python-version }}
cache-dependency-glob: "**requirements*.txt"
- name: install package
run: uv pip install -e . --system
- name: generate-random-versions
Expand Down
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.6.3'
rev: 'v0.6.4'
hooks:
# Run the formatter.
- id: ruff-format
Expand Down
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,6 @@ See the [tutorial](https://narwhals-dev.github.io/narwhals/basics/dataframe/) fo

If you said yes to both, we'd love to hear from you!

**Note**: You might suspect that this is a secret ploy to infiltrate the Polars API everywhere.
Indeed, you may suspect that.

## Sponsors and institutional partners

Narwhals is 100% independent, community-driven, and community-owned.
Expand Down
1 change: 1 addition & 0 deletions docs/api-reference/dataframe.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
- rename
- row
- rows
- sample
- schema
- select
- shape
Expand Down
4 changes: 0 additions & 4 deletions docs/basics/complete_example.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@ We'll need to write two methods:
- `transform`: scale a given dataset with the mean and standard deviations calculated
during `fit`.

The `fit` method is a bit complicated, so let's start with `transform`.
Suppose we've already calculated the mean and standard deviation of each column, and have
stored them in attributes `self.means` and `self.std_devs`.

## Fit method

Unlike the `transform` method, which we'll write below, `fit` cannot stay lazy,
Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ Then, if you start the Python REPL and see the following:
```python
>>> import narwhals
>>> narwhals.__version__
'1.7.0'
'1.8.1'
```
then installation worked correctly!
2 changes: 1 addition & 1 deletion narwhals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
from narwhals.utils import maybe_get_index
from narwhals.utils import maybe_set_index

__version__ = "1.7.0"
__version__ = "1.8.1"

__all__ = [
"dependencies",
Expand Down
46 changes: 42 additions & 4 deletions narwhals/_arrow/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import overload

from narwhals._arrow.utils import broadcast_series
from narwhals._arrow.utils import convert_slice_to_nparray
from narwhals._arrow.utils import translate_dtype
from narwhals._arrow.utils import validate_dataframe_comparand
from narwhals._expression_parsing import evaluate_into_exprs
Expand Down Expand Up @@ -126,7 +127,8 @@ def __getitem__(
| slice
| Sequence[int]
| Sequence[str]
| tuple[Sequence[int], str | int],
| tuple[Sequence[int], str | int]
| tuple[slice, str | int],
) -> ArrowSeries | ArrowDataFrame:
if isinstance(item, str):
from narwhals._arrow.series import ArrowSeries
Expand All @@ -144,7 +146,10 @@ def __getitem__(
if item[0] == slice(None):
selected_rows = self._native_frame
else:
selected_rows = self._native_frame.take(item[0])
range_ = convert_slice_to_nparray(
num_rows=len(self._native_frame), rows_slice=item[0]
)
selected_rows = self._native_frame.take(range_)

return self._from_native_frame(selected_rows.select(item[1]))

Expand Down Expand Up @@ -174,13 +179,24 @@ def __getitem__(
)
msg = f"Expected slice of integers or strings, got: {type(item[1])}" # pragma: no cover
raise TypeError(msg) # pragma: no cover

from narwhals._arrow.series import ArrowSeries

# PyArrow columns are always strings
col_name = item[1] if isinstance(item[1], str) else self.columns[item[1]]
if isinstance(item[0], str): # pragma: no cover
msg = "Can not slice with tuple with the first element as a str"
raise TypeError(msg)
if (isinstance(item[0], slice)) and (item[0] == slice(None)):
return ArrowSeries(
self._native_frame[col_name],
name=col_name,
backend_version=self._backend_version,
)
range_ = convert_slice_to_nparray(
num_rows=len(self._native_frame), rows_slice=item[0]
)
return ArrowSeries(
self._native_frame[col_name].take(item[0]),
self._native_frame[col_name].take(range_),
name=col_name,
backend_version=self._backend_version,
)
Expand Down Expand Up @@ -572,3 +588,25 @@ def gather_every(self: Self, n: int, offset: int = 0) -> Self:

def to_arrow(self: Self) -> Any:
return self._native_frame

def sample(
self: Self,
n: int | None = None,
*,
fraction: float | None = None,
with_replacement: bool = False,
seed: int | None = None,
) -> Self:
import numpy as np # ignore-banned-import
import pyarrow.compute as pc # ignore-banned-import()

frame = self._native_frame
num_rows = len(self)
if n is None and fraction is not None:
n = int(num_rows * fraction)

rng = np.random.default_rng(seed=seed)
idx = np.arange(0, num_rows)
mask = rng.choice(idx, size=n, replace=with_replacement)

return self._from_native_frame(pc.take(frame, mask))
Loading

0 comments on commit 4c872b9

Please sign in to comment.